Improve code generation for floating-point constants
Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -454,6 +454,12 @@
|
||||
(rn Reg)
|
||||
(size ScalarSize))
|
||||
|
||||
;; Loads a floating-point immediate.
|
||||
(FpuMoveFPImm
|
||||
(rd WritableReg)
|
||||
(imm ASIMDFPModImm)
|
||||
(size ScalarSize))
|
||||
|
||||
;; Move to a vector element from a GPR.
|
||||
(MovToVec
|
||||
(rd WritableReg)
|
||||
|
||||
@@ -1983,6 +1983,19 @@ impl MachInstEmit for Inst {
|
||||
};
|
||||
sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
|
||||
}
|
||||
&Inst::FpuMoveFPImm { rd, imm, size } => {
|
||||
let size_code = match size {
|
||||
ScalarSize::Size32 => 0b00,
|
||||
ScalarSize::Size64 => 0b01,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(
|
||||
0b000_11110_00_1_00_000_000100_00000_00000
|
||||
| size_code << 22
|
||||
| ((imm.enc_bits() as u32) << 13)
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::MovToVec { rd, rn, idx, size } => {
|
||||
let (imm5, shift) = match size.lane_size() {
|
||||
ScalarSize::Size8 => (0b00001, 1),
|
||||
|
||||
@@ -2051,6 +2051,25 @@ fn test_aarch64_binemit() {
|
||||
"8103271E",
|
||||
"fmov s1, w28",
|
||||
));
|
||||
insns.push((
|
||||
Inst::FpuMoveFPImm {
|
||||
rd: writable_vreg(31),
|
||||
imm: ASIMDFPModImm::maybe_from_u64(f64::to_bits(1.0), ScalarSize::Size64).unwrap(),
|
||||
size: ScalarSize::Size64,
|
||||
},
|
||||
"1F106E1E",
|
||||
"fmov d31, #1",
|
||||
));
|
||||
insns.push((
|
||||
Inst::FpuMoveFPImm {
|
||||
rd: writable_vreg(1),
|
||||
imm: ASIMDFPModImm::maybe_from_u64(f32::to_bits(31.0).into(), ScalarSize::Size32)
|
||||
.unwrap(),
|
||||
size: ScalarSize::Size32,
|
||||
},
|
||||
"01F0271E",
|
||||
"fmov s1, #31",
|
||||
));
|
||||
insns.push((
|
||||
Inst::MovToVec {
|
||||
rd: writable_vreg(0),
|
||||
|
||||
@@ -239,29 +239,35 @@ impl Inst {
|
||||
/// Create instructions that load a 32-bit floating-point constant.
|
||||
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
value: u32,
|
||||
const_data: u32,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
// Note that we must make sure that all bits outside the lowest 32 are set to 0
|
||||
// because this function is also used to load wider constants (that have zeros
|
||||
// in their most significant bits).
|
||||
if value == 0 {
|
||||
if const_data == 0 {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
|
||||
invert: false,
|
||||
size: VectorSize::Size32x2
|
||||
size: VectorSize::Size32x2,
|
||||
}]
|
||||
} else if let Some(imm) =
|
||||
ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32)
|
||||
{
|
||||
smallvec![Inst::FpuMoveFPImm {
|
||||
rd,
|
||||
imm,
|
||||
size: ScalarSize::Size32,
|
||||
}]
|
||||
} else {
|
||||
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
|
||||
// bits.
|
||||
let tmp = alloc_tmp(I32);
|
||||
let mut insts = Inst::load_constant(tmp, value as u64);
|
||||
let mut insts = Inst::load_constant(tmp, const_data as u64);
|
||||
|
||||
insts.push(Inst::MovToFpu {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
size: ScalarSize::Size64,
|
||||
size: ScalarSize::Size32,
|
||||
});
|
||||
|
||||
insts
|
||||
@@ -277,11 +283,23 @@ impl Inst {
|
||||
// Note that we must make sure that all bits outside the lowest 64 are set to 0
|
||||
// because this function is also used to load wider constants (that have zeros
|
||||
// in their most significant bits).
|
||||
if let Ok(const_data) = u32::try_from(const_data) {
|
||||
// TODO: Treat as half of a 128 bit vector and consider replicated patterns.
|
||||
// Scalar MOVI might also be an option.
|
||||
if const_data == 0 {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
|
||||
invert: false,
|
||||
size: VectorSize::Size32x2,
|
||||
}]
|
||||
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) {
|
||||
smallvec![Inst::FpuMoveFPImm {
|
||||
rd,
|
||||
imm,
|
||||
size: ScalarSize::Size64,
|
||||
}]
|
||||
} else if let Ok(const_data) = u32::try_from(const_data) {
|
||||
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
|
||||
// TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
|
||||
// bits. Also, treat it as half of a 128-bit vector and consider replicated
|
||||
// patterns. Scalar MOVI might also be an option.
|
||||
} else if const_data & (u32::MAX as u64) == 0 {
|
||||
let tmp = alloc_tmp(I64);
|
||||
let mut insts = Inst::load_constant(tmp, const_data);
|
||||
@@ -879,6 +897,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::FpuMoveFPImm { rd, .. } => {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
&Inst::MovToVec { rd, rn, .. } => {
|
||||
collector.add_mod(rd);
|
||||
collector.add_use(rn);
|
||||
@@ -1654,6 +1675,9 @@ pub fn aarch64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
|
||||
mapper.map_def(rd);
|
||||
mapper.map_use(rn);
|
||||
}
|
||||
&mut Inst::FpuMoveFPImm { ref mut rd, .. } => {
|
||||
mapper.map_def(rd);
|
||||
}
|
||||
&mut Inst::MovToVec {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
@@ -2693,6 +2717,12 @@ impl Inst {
|
||||
let rn = show_ireg_sized(rn, mb_rru, operand_size);
|
||||
format!("fmov {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::FpuMoveFPImm { rd, imm, size } => {
|
||||
let imm = imm.show_rru(mb_rru);
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
|
||||
|
||||
format!("fmov {}, {}", rd, imm)
|
||||
}
|
||||
&Inst::MovToVec { rd, rn, idx, size } => {
|
||||
let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
|
||||
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle f176ef3bba99365
|
||||
src/prelude.isle 22dd5ff133398960
|
||||
src/isa/aarch64/inst.isle 5fa80451697b084f
|
||||
src/isa/aarch64/inst.isle f946561093de4ff5
|
||||
src/isa/aarch64/lower.isle 2d2e1e076a0c8a23
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -53,16 +53,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
match op {
|
||||
Opcode::Iconst | Opcode::Bconst | Opcode::Null => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::F32const => {
|
||||
let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
lower_constant_f32(ctx, rd, value);
|
||||
}
|
||||
Opcode::F64const => {
|
||||
let value = f64::from_bits(ctx.get_constant(insn).unwrap());
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
lower_constant_f64(ctx, rd, value);
|
||||
}
|
||||
Opcode::F32const | Opcode::F64const => unreachable!(
|
||||
"Should never see constant ops at top level lowering entry
|
||||
point, as constants are rematerialized at use-sites"
|
||||
),
|
||||
|
||||
Opcode::Iadd => implemented_in_isle(ctx),
|
||||
Opcode::Isub => implemented_in_isle(ctx),
|
||||
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
|
||||
|
||||
Reference in New Issue
Block a user