Improve code generation for floating-point constants

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Freddie Liardet
2021-12-02 15:38:15 +00:00
parent 06a7bfdcbd
commit b5531580e7
10 changed files with 490 additions and 309 deletions

View File

@@ -1983,6 +1983,19 @@ impl MachInstEmit for Inst {
};
sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
}
&Inst::FpuMoveFPImm { rd, imm, size } => {
let size_code = match size {
ScalarSize::Size32 => 0b00,
ScalarSize::Size64 => 0b01,
_ => unimplemented!(),
};
sink.put4(
0b000_11110_00_1_00_000_000100_00000_00000
| size_code << 22
| ((imm.enc_bits() as u32) << 13)
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::MovToVec { rd, rn, idx, size } => {
let (imm5, shift) = match size.lane_size() {
ScalarSize::Size8 => (0b00001, 1),

View File

@@ -2051,6 +2051,25 @@ fn test_aarch64_binemit() {
"8103271E",
"fmov s1, w28",
));
insns.push((
Inst::FpuMoveFPImm {
rd: writable_vreg(31),
imm: ASIMDFPModImm::maybe_from_u64(f64::to_bits(1.0), ScalarSize::Size64).unwrap(),
size: ScalarSize::Size64,
},
"1F106E1E",
"fmov d31, #1",
));
insns.push((
Inst::FpuMoveFPImm {
rd: writable_vreg(1),
imm: ASIMDFPModImm::maybe_from_u64(f32::to_bits(31.0).into(), ScalarSize::Size32)
.unwrap(),
size: ScalarSize::Size32,
},
"01F0271E",
"fmov s1, #31",
));
insns.push((
Inst::MovToVec {
rd: writable_vreg(0),

View File

@@ -239,29 +239,35 @@ impl Inst {
/// Create instructions that load a 32-bit floating-point constant.
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
value: u32,
const_data: u32,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
// Note that we must make sure that all bits outside the lowest 32 are set to 0
// because this function is also used to load wider constants (that have zeros
// in their most significant bits).
if value == 0 {
if const_data == 0 {
smallvec![Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
invert: false,
size: VectorSize::Size32x2
size: VectorSize::Size32x2,
}]
} else if let Some(imm) =
ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32)
{
smallvec![Inst::FpuMoveFPImm {
rd,
imm,
size: ScalarSize::Size32,
}]
} else {
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
// bits.
let tmp = alloc_tmp(I32);
let mut insts = Inst::load_constant(tmp, value as u64);
let mut insts = Inst::load_constant(tmp, const_data as u64);
insts.push(Inst::MovToFpu {
rd,
rn: tmp.to_reg(),
size: ScalarSize::Size64,
size: ScalarSize::Size32,
});
insts
@@ -277,11 +283,23 @@ impl Inst {
// Note that we must make sure that all bits outside the lowest 64 are set to 0
// because this function is also used to load wider constants (that have zeros
// in their most significant bits).
if let Ok(const_data) = u32::try_from(const_data) {
// TODO: Treat as half of a 128 bit vector and consider replicated patterns.
// Scalar MOVI might also be an option.
if const_data == 0 {
smallvec![Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
invert: false,
size: VectorSize::Size32x2,
}]
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) {
smallvec![Inst::FpuMoveFPImm {
rd,
imm,
size: ScalarSize::Size64,
}]
} else if let Ok(const_data) = u32::try_from(const_data) {
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
// TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
// bits. Also, treat it as half of a 128-bit vector and consider replicated
// patterns. Scalar MOVI might also be an option.
} else if const_data & (u32::MAX as u64) == 0 {
let tmp = alloc_tmp(I64);
let mut insts = Inst::load_constant(tmp, const_data);
@@ -879,6 +897,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::FpuMoveFPImm { rd, .. } => {
collector.add_def(rd);
}
&Inst::MovToVec { rd, rn, .. } => {
collector.add_mod(rd);
collector.add_use(rn);
@@ -1654,6 +1675,9 @@ pub fn aarch64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
mapper.map_def(rd);
mapper.map_use(rn);
}
&mut Inst::FpuMoveFPImm { ref mut rd, .. } => {
mapper.map_def(rd);
}
&mut Inst::MovToVec {
ref mut rd,
ref mut rn,
@@ -2693,6 +2717,12 @@ impl Inst {
let rn = show_ireg_sized(rn, mb_rru, operand_size);
format!("fmov {}, {}", rd, rn)
}
&Inst::FpuMoveFPImm { rd, imm, size } => {
let imm = imm.show_rru(mb_rru);
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
format!("fmov {}, {}", rd, imm)
}
&Inst::MovToVec { rd, rn, idx, size } => {
let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());