Cranelift AArch64: Improve code generation for vector constants
In particular, introduce initial support for the MOVI and MVNI instructions, with 8-bit elements. Also, treat vector constants as 32- or 64-bit floating-point numbers, if their value allows it, by relying on the architectural zero extension. Finally, stop generating literal loads for 32-bit constants. Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -18,6 +18,7 @@ use regalloc::{RegUsageCollector, RegUsageMapper};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryFrom;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::string::{String, ToString};
|
||||
|
||||
@@ -826,14 +827,9 @@ pub enum Inst {
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
LoadFpuConst32 {
|
||||
rd: Writable<Reg>,
|
||||
const_data: f32,
|
||||
},
|
||||
|
||||
LoadFpuConst64 {
|
||||
rd: Writable<Reg>,
|
||||
const_data: f64,
|
||||
const_data: u64,
|
||||
},
|
||||
|
||||
LoadFpuConst128 {
|
||||
@@ -922,6 +918,14 @@ pub enum Inst {
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Duplicate immediate to vector.
|
||||
VecDupImm {
|
||||
rd: Writable<Reg>,
|
||||
imm: ASIMDMovModImm,
|
||||
invert: bool,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector extend.
|
||||
VecExtend {
|
||||
t: VecExtendOp,
|
||||
@@ -934,8 +938,8 @@ pub enum Inst {
|
||||
VecMovElement {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
idx1: u8,
|
||||
idx2: u8,
|
||||
dest_idx: u8,
|
||||
src_idx: u8,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
@@ -1297,29 +1301,146 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an instruction that loads a 32-bit floating-point constant.
|
||||
pub fn load_fp_constant32(rd: Writable<Reg>, value: f32) -> Inst {
|
||||
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits.
|
||||
Inst::LoadFpuConst32 {
|
||||
rd,
|
||||
const_data: value,
|
||||
/// Create instructions that load a 32-bit floating-point constant.
|
||||
pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
value: u32,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
if value == 0 {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(),
|
||||
invert: false,
|
||||
size: VectorSize::Size8x8
|
||||
}]
|
||||
} else {
|
||||
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits.
|
||||
let tmp = alloc_tmp(RegClass::I64, I32);
|
||||
let mut insts = Inst::load_constant(tmp, value as u64);
|
||||
|
||||
insts.push(Inst::MovToFpu {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
|
||||
insts
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an instruction that loads a 64-bit floating-point constant.
|
||||
pub fn load_fp_constant64(rd: Writable<Reg>, value: f64) -> Inst {
|
||||
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits.
|
||||
Inst::LoadFpuConst64 {
|
||||
rd,
|
||||
const_data: value,
|
||||
/// Create instructions that load a 64-bit floating-point constant.
|
||||
pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
const_data: u64,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
if let Ok(const_data) = u32::try_from(const_data) {
|
||||
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
|
||||
// TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent bits.
|
||||
// Also, treat it as half of a 128-bit vector and consider replicated patterns. Scalar MOVI
|
||||
// might also be an option.
|
||||
} else if const_data & (u32::MAX as u64) == 0 {
|
||||
let tmp = alloc_tmp(RegClass::I64, I64);
|
||||
let mut insts = Inst::load_constant(tmp, const_data);
|
||||
|
||||
insts.push(Inst::MovToFpu {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
|
||||
insts
|
||||
} else {
|
||||
smallvec![Inst::LoadFpuConst64 { rd, const_data }]
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an instruction that loads a 128-bit vector constant.
|
||||
pub fn load_fp_constant128(rd: Writable<Reg>, value: u128) -> Inst {
|
||||
Inst::LoadFpuConst128 {
|
||||
rd,
|
||||
const_data: value,
|
||||
/// Create instructions that load a 128-bit vector constant.
|
||||
pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
const_data: u128,
|
||||
alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 5]> {
|
||||
if let Ok(const_data) = u64::try_from(const_data) {
|
||||
SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
|
||||
} else if let Some((pattern, size)) =
|
||||
Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
|
||||
{
|
||||
Inst::load_replicated_vector_pattern(
|
||||
rd,
|
||||
pattern,
|
||||
VectorSize::from_lane_size(size, true),
|
||||
alloc_tmp,
|
||||
)
|
||||
} else {
|
||||
smallvec![Inst::LoadFpuConst128 { rd, const_data }]
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether a 128-bit constant represents a vector consisting of elements with
|
||||
/// the same value.
|
||||
pub fn get_replicated_vector_pattern(
|
||||
value: u128,
|
||||
size: ScalarSize,
|
||||
) -> Option<(u64, ScalarSize)> {
|
||||
let (mask, shift, next_size) = match size {
|
||||
ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
|
||||
ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
|
||||
ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
|
||||
ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
|
||||
_ => return None,
|
||||
};
|
||||
let mut r = None;
|
||||
let v = value & mask;
|
||||
|
||||
if (value >> shift) & mask == v {
|
||||
r = Inst::get_replicated_vector_pattern(v, next_size);
|
||||
|
||||
if r.is_none() {
|
||||
r = Some((v as u64, size));
|
||||
}
|
||||
}
|
||||
|
||||
r
|
||||
}
|
||||
|
||||
/// Create instructions that load a 128-bit vector constant consisting of elements with
|
||||
/// the same value.
|
||||
pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
pattern: u64,
|
||||
size: VectorSize,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 5]> {
|
||||
let lane_size = size.lane_size();
|
||||
|
||||
if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert: false,
|
||||
size
|
||||
}]
|
||||
} else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
|
||||
debug_assert_ne!(lane_size, ScalarSize::Size8);
|
||||
debug_assert_ne!(lane_size, ScalarSize::Size64);
|
||||
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert: true,
|
||||
size
|
||||
}]
|
||||
} else {
|
||||
let tmp = alloc_tmp(RegClass::I64, I64);
|
||||
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
|
||||
|
||||
insts.push(Inst::VecDup {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
size,
|
||||
});
|
||||
|
||||
insts
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1704,9 +1825,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_use(rd);
|
||||
memarg_regs(mem, collector);
|
||||
}
|
||||
&Inst::LoadFpuConst32 { rd, .. }
|
||||
| &Inst::LoadFpuConst64 { rd, .. }
|
||||
| &Inst::LoadFpuConst128 { rd, .. } => {
|
||||
&Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
&Inst::FpuToInt { rd, rn, .. } => {
|
||||
@@ -1746,6 +1865,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::VecDupImm { rd, .. } => {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
&Inst::VecExtend { rd, rn, .. } => {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
@@ -2344,9 +2466,6 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_use(mapper, rd);
|
||||
map_mem(mapper, mem);
|
||||
}
|
||||
&mut Inst::LoadFpuConst32 { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
&mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
@@ -2441,6 +2560,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_def(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::VecDupImm { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
&mut Inst::VecExtend {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
@@ -2631,19 +2753,12 @@ impl MachInst for Inst {
|
||||
to_reg: Writable<Reg>,
|
||||
value: u64,
|
||||
ty: Type,
|
||||
_alloc_tmp: F,
|
||||
alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
if ty == F64 {
|
||||
let mut ret = SmallVec::new();
|
||||
ret.push(Inst::load_fp_constant64(to_reg, f64::from_bits(value)));
|
||||
ret
|
||||
Inst::load_fp_constant64(to_reg, value, alloc_tmp)
|
||||
} else if ty == F32 {
|
||||
let mut ret = SmallVec::new();
|
||||
ret.push(Inst::load_fp_constant32(
|
||||
to_reg,
|
||||
f32::from_bits(value as u32),
|
||||
));
|
||||
ret
|
||||
Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp)
|
||||
} else {
|
||||
// Must be an integer type.
|
||||
debug_assert!(
|
||||
@@ -3216,13 +3331,9 @@ impl Inst {
|
||||
let mem = mem.show_rru(mb_rru);
|
||||
format!("{}str {}, {}", mem_str, rd, mem)
|
||||
}
|
||||
&Inst::LoadFpuConst32 { rd, const_data } => {
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
|
||||
format!("ldr {}, pc+8 ; b 8 ; data.f32 {}", rd, const_data)
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, const_data } => {
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
|
||||
format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, const_data)
|
||||
format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, f64::from_bits(const_data))
|
||||
}
|
||||
&Inst::LoadFpuConst128 { rd, const_data } => {
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
|
||||
@@ -3330,6 +3441,17 @@ impl Inst {
|
||||
let rn = show_vreg_element(rn, mb_rru, 0, size);
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecDupImm { rd, imm, invert, size } => {
|
||||
let imm = imm.show_rru(mb_rru);
|
||||
let op = if invert {
|
||||
"mvni"
|
||||
} else {
|
||||
"movi"
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
|
||||
format!("{} {}, {}", op, rd, imm)
|
||||
}
|
||||
&Inst::VecExtend { t, rd, rn, high_half } => {
|
||||
let (op, dest, src) = match (t, high_half) {
|
||||
(VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
@@ -3352,12 +3474,12 @@ impl Inst {
|
||||
&Inst::VecMovElement {
|
||||
rd,
|
||||
rn,
|
||||
idx1,
|
||||
idx2,
|
||||
dest_idx,
|
||||
src_idx,
|
||||
size,
|
||||
} => {
|
||||
let rd = show_vreg_element(rd.to_reg(), mb_rru, idx1, size);
|
||||
let rn = show_vreg_element(rn, mb_rru, idx2, size);
|
||||
let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size);
|
||||
let rn = show_vreg_element(rn, mb_rru, src_idx, size);
|
||||
format!("mov {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
|
||||
|
||||
Reference in New Issue
Block a user