arm64: Fold some constants into load instructions

This changes the following:
  mov x0, #4
  ldr x0, [x1, #4]

Into:
  ldr x0, [x1]

I noticed this pattern (but with #0), in a benchmark.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Joey Gouly
2020-11-11 18:47:43 +00:00
parent 9ced345aed
commit a5011e8212
2 changed files with 49 additions and 7 deletions

View File

@@ -574,11 +574,11 @@ type AddressAddend64List = SmallVec<[Reg; 4]>;
/// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide; /// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
/// do a zero-extension. /// do a zero-extension.
/// ///
/// We do not descend further into the inputs of extensions, because supporting /// We do not descend further into the inputs of extensions (unless it is a constant),
/// (e.g.) a 32-bit add that is later extended would require additional masking /// because supporting (e.g.) a 32-bit add that is later extended would require
/// of high-order bits, which is too complex. So, in essence, we descend any /// additional masking of high-order bits, which is too complex. So, in essence, we
/// number of adds from the roots, collecting all 64-bit address addends; then /// descend any number of adds from the roots, collecting all 64-bit address addends;
/// possibly support extensions at these leaves. /// then possibly support extensions at these leaves.
fn collect_address_addends<C: LowerCtx<I = Inst>>( fn collect_address_addends<C: LowerCtx<I = Inst>>(
ctx: &mut C, ctx: &mut C,
roots: &[InsnInput], roots: &[InsnInput],
@@ -609,8 +609,20 @@ fn collect_address_addends<C: LowerCtx<I = Inst>>(
ExtendOp::SXTW ExtendOp::SXTW
}; };
let extendee_input = InsnInput { insn, input: 0 }; let extendee_input = InsnInput { insn, input: 0 };
let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None); // If the input is a zero-extension of a constant, add the value to the known
result32.push((reg, extendop)); // offset.
// Only do this for zero-extension, as generating a sign-extended
// constant may be more instructions than using the 'SXTW' addressing mode.
if let (Some(insn), ExtendOp::UXTW) = (
maybe_input_insn(ctx, extendee_input, Opcode::Iconst),
extendop,
) {
let value = ctx.get_constant(insn).unwrap() as i64;
offset += value;
} else {
let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
result32.push((reg, extendop));
}
} }
Opcode::Uextend | Opcode::Sextend => { Opcode::Uextend | Opcode::Sextend => {
let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);

View File

@@ -269,3 +269,33 @@ block0(v0: i32, v1: i32):
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
function %f16(i64) -> i32 {
block0(v0: i64):
v1 = iconst.i32 0
v2 = uextend.i64 v1
v3 = load_complex.i32 v0+v2
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f17(i64) -> i32 {
block0(v0: i64):
v1 = iconst.i32 4
v2 = uextend.i64 v1
v3 = load_complex.i32 v0+v2
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldur w0, [x0, #4]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret