arm64: Fold some constants into load instructions
This changes the following: mov x0, #4 ldr x0, [x1, #4] Into: ldr x0, [x1] I noticed this pattern (but with #0), in a benchmark. Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -574,11 +574,11 @@ type AddressAddend64List = SmallVec<[Reg; 4]>;
|
|||||||
/// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
|
/// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
|
||||||
/// do a zero-extension.
|
/// do a zero-extension.
|
||||||
///
|
///
|
||||||
/// We do not descend further into the inputs of extensions, because supporting
|
/// We do not descend further into the inputs of extensions (unless it is a constant),
|
||||||
/// (e.g.) a 32-bit add that is later extended would require additional masking
|
/// because supporting (e.g.) a 32-bit add that is later extended would require
|
||||||
/// of high-order bits, which is too complex. So, in essence, we descend any
|
/// additional masking of high-order bits, which is too complex. So, in essence, we
|
||||||
/// number of adds from the roots, collecting all 64-bit address addends; then
|
/// descend any number of adds from the roots, collecting all 64-bit address addends;
|
||||||
/// possibly support extensions at these leaves.
|
/// then possibly support extensions at these leaves.
|
||||||
fn collect_address_addends<C: LowerCtx<I = Inst>>(
|
fn collect_address_addends<C: LowerCtx<I = Inst>>(
|
||||||
ctx: &mut C,
|
ctx: &mut C,
|
||||||
roots: &[InsnInput],
|
roots: &[InsnInput],
|
||||||
@@ -609,8 +609,20 @@ fn collect_address_addends<C: LowerCtx<I = Inst>>(
|
|||||||
ExtendOp::SXTW
|
ExtendOp::SXTW
|
||||||
};
|
};
|
||||||
let extendee_input = InsnInput { insn, input: 0 };
|
let extendee_input = InsnInput { insn, input: 0 };
|
||||||
let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
|
// If the input is a zero-extension of a constant, add the value to the known
|
||||||
result32.push((reg, extendop));
|
// offset.
|
||||||
|
// Only do this for zero-extension, as generating a sign-extended
|
||||||
|
// constant may be more instructions than using the 'SXTW' addressing mode.
|
||||||
|
if let (Some(insn), ExtendOp::UXTW) = (
|
||||||
|
maybe_input_insn(ctx, extendee_input, Opcode::Iconst),
|
||||||
|
extendop,
|
||||||
|
) {
|
||||||
|
let value = ctx.get_constant(insn).unwrap() as i64;
|
||||||
|
offset += value;
|
||||||
|
} else {
|
||||||
|
let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
|
||||||
|
result32.push((reg, extendop));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Opcode::Uextend | Opcode::Sextend => {
|
Opcode::Uextend | Opcode::Sextend => {
|
||||||
let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
|
let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
|
||||||
|
|||||||
@@ -269,3 +269,33 @@ block0(v0: i32, v1: i32):
|
|||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f16(i64) -> i32 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iconst.i32 0
|
||||||
|
v2 = uextend.i64 v1
|
||||||
|
v3 = load_complex.i32 v0+v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldr w0, [x0]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f17(i64) -> i32 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iconst.i32 4
|
||||||
|
v2 = uextend.i64 v1
|
||||||
|
v3 = load_complex.i32 v0+v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldur w0, [x0, #4]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|||||||
Reference in New Issue
Block a user