diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index bcc5cbeaf0..b79afae4b4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -574,11 +574,11 @@ type AddressAddend64List = SmallVec<[Reg; 4]>; /// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide; /// do a zero-extension. /// -/// We do not descend further into the inputs of extensions, because supporting -/// (e.g.) a 32-bit add that is later extended would require additional masking -/// of high-order bits, which is too complex. So, in essence, we descend any -/// number of adds from the roots, collecting all 64-bit address addends; then -/// possibly support extensions at these leaves. +/// We do not descend further into the inputs of extensions (unless it is a constant), +/// because supporting (e.g.) a 32-bit add that is later extended would require +/// additional masking of high-order bits, which is too complex. So, in essence, we +/// descend any number of adds from the roots, collecting all 64-bit address addends; +/// then possibly support extensions at these leaves. fn collect_address_addends>( ctx: &mut C, roots: &[InsnInput], @@ -609,8 +609,20 @@ fn collect_address_addends>( ExtendOp::SXTW }; let extendee_input = InsnInput { insn, input: 0 }; - let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None); - result32.push((reg, extendop)); + // If the input is a zero-extension of a constant, add the value to the known + // offset. + // Only do this for zero-extension, as generating a sign-extended + // constant may be more instructions than using the 'SXTW' addressing mode. + if let (Some(insn), ExtendOp::UXTW) = ( + maybe_input_insn(ctx, extendee_input, Opcode::Iconst), + extendop, + ) { + let value = ctx.get_constant(insn).unwrap() as i64; + offset += value; + } else { + let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None); + result32.push((reg, extendop)); + } } Opcode::Uextend | Opcode::Sextend => { let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif index b88b8e6590..6cb728c45d 100644 --- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif @@ -269,3 +269,33 @@ block0(v0: i32, v1: i32): ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + +function %f16(i64) -> i32 { +block0(v0: i64): + v1 = iconst.i32 0 + v2 = uextend.i64 v1 + v3 = load_complex.i32 v0+v2 + return v3 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr w0, [x0] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f17(i64) -> i32 { +block0(v0: i64): + v1 = iconst.i32 4 + v2 = uextend.i64 v1 + v3 = load_complex.i32 v0+v2 + return v3 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldur w0, [x0, #4] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret