diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 2396bf1889..9b17d95b07 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -342,19 +342,18 @@ pub(crate) fn lower_pair_address(ctx: &mut Lower, addr: Value, offset: i32 zero_reg() }; - let addr = ctx.alloc_tmp(I64).only_reg().unwrap(); - ctx.emit(Inst::gen_move(addr, base_reg, I64)); - // We have the base register, if we have any others, we need to add them - lower_add_addends(ctx, addr, addends64, addends32); + let addr = lower_add_addends(ctx, base_reg, addends64, addends32); // Figure out what offset we should emit - let imm7 = SImm7Scaled::maybe_from_i64(offset, I64).unwrap_or_else(|| { - lower_add_immediate(ctx, addr, addr.to_reg(), offset); - SImm7Scaled::maybe_from_i64(0, I64).unwrap() - }); + let (addr, imm7) = if let Some(imm7) = SImm7Scaled::maybe_from_i64(offset, I64) { + (addr, imm7) + } else { + let res = lower_add_immediate(ctx, addr, offset); + (res, SImm7Scaled::maybe_from_i64(0, I64).unwrap()) + }; - PairAMode::SignedOffset(addr.to_reg(), imm7) + PairAMode::SignedOffset(addr, imm7) } /// Lower the address of a load or store. @@ -454,63 +453,48 @@ pub(crate) fn lower_address( return memarg; } - // Allocate the temp and shoehorn it into the AMode. - let addr = ctx.alloc_tmp(I64).only_reg().unwrap(); - let (reg, memarg) = match memarg { - AMode::RegExtended { rn, rm, extendop } => ( - rn, - AMode::RegExtended { - rn: addr.to_reg(), - rm, - extendop, - }, - ), - AMode::RegOffset { rn, off, ty } => ( - rn, - AMode::RegOffset { - rn: addr.to_reg(), - off, - ty, - }, - ), - AMode::RegReg { rn, rm } => ( - rm, - AMode::RegReg { - rn: addr.to_reg(), - rm: rn, - }, - ), - AMode::UnsignedOffset { rn, uimm12 } => ( - rn, - AMode::UnsignedOffset { - rn: addr.to_reg(), - uimm12, - }, - ), + // Extract the first register from the memarg so that we can add all the + // immediate values to it. + let addr = match memarg { + AMode::RegExtended { rn, .. } => rn, + AMode::RegOffset { rn, .. } => rn, + AMode::RegReg { rm, .. } => rm, + AMode::UnsignedOffset { rn, .. } => rn, _ => unreachable!(), }; // If there is any offset, load that first into `addr`, and add the `reg` // that we kicked out of the `AMode`; otherwise, start with that reg. - if offset != 0 { - lower_add_immediate(ctx, addr, reg, offset) + let addr = if offset != 0 { + lower_add_immediate(ctx, addr, offset) } else { - ctx.emit(Inst::gen_move(addr, reg, I64)); - } + addr + }; // Now handle reg64 and reg32-extended components. - lower_add_addends(ctx, addr, addends64, addends32); + let addr = lower_add_addends(ctx, addr, addends64, addends32); - memarg + // Shoehorn addr into the AMode. + match memarg { + AMode::RegExtended { rm, extendop, .. } => AMode::RegExtended { + rn: addr, + rm, + extendop, + }, + AMode::RegOffset { off, ty, .. } => AMode::RegOffset { rn: addr, off, ty }, + AMode::RegReg { rn, .. } => AMode::RegReg { rn: addr, rm: rn }, + AMode::UnsignedOffset { uimm12, .. } => AMode::UnsignedOffset { rn: addr, uimm12 }, + _ => unreachable!(), + } } fn lower_add_addends( ctx: &mut Lower, - rd: Writable, + init: Reg, addends64: AddressAddend64List, addends32: AddressAddend32List, -) { - for reg in addends64 { +) -> Reg { + let init = addends64.into_iter().fold(init, |prev, reg| { // If the register is the stack reg, we must move it to another reg // before adding it. let reg = if reg == stack_reg() { @@ -520,30 +504,43 @@ fn lower_add_addends( } else { reg }; + + let rd = ctx.alloc_tmp(I64).only_reg().unwrap(); + ctx.emit(Inst::AluRRR { alu_op: ALUOp::Add, size: OperandSize::Size64, rd, - rn: rd.to_reg(), + rn: prev, rm: reg, }); - } - for (reg, extendop) in addends32 { + + rd.to_reg() + }); + + addends32.into_iter().fold(init, |prev, (reg, extendop)| { assert!(reg != stack_reg()); + + let rd = ctx.alloc_tmp(I64).only_reg().unwrap(); + ctx.emit(Inst::AluRRRExtend { alu_op: ALUOp::Add, size: OperandSize::Size64, rd, - rn: rd.to_reg(), + rn: prev, rm: reg, extendop, }); - } + + rd.to_reg() + }) } /// Adds into `rd` a signed imm pattern matching the best instruction for it. // TODO: This function is duplicated in ctx.gen_add_imm -fn lower_add_immediate(ctx: &mut Lower, dst: Writable, src: Reg, imm: i64) { +fn lower_add_immediate(ctx: &mut Lower, src: Reg, imm: i64) -> Reg { + let dst = ctx.alloc_tmp(I64).only_reg().unwrap(); + // If we can fit offset or -offset in an imm12, use an add-imm // Otherwise, lower the constant first then add. if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) { @@ -563,15 +560,18 @@ fn lower_add_immediate(ctx: &mut Lower, dst: Writable, src: Reg, imm: imm12, }); } else { - lower_constant_u64(ctx, dst, imm as u64); + let tmp = ctx.alloc_tmp(I64).only_reg().unwrap(); + lower_constant_u64(ctx, tmp, imm as u64); ctx.emit(Inst::AluRRR { alu_op: ALUOp::Add, size: OperandSize::Size64, rd: dst, - rn: dst.to_reg(), + rn: tmp.to_reg(), rm: src, }); } + + dst.to_reg() } pub(crate) fn lower_constant_u64(ctx: &mut Lower, rd: Writable, value: u64) { diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif index 43ef1b4d63..7645c98413 100644 --- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif @@ -53,9 +53,9 @@ block0(v0: i64, v1: i32): ; block0: ; add x3, x0, #68 -; add x3, x3, x0 -; add x3, x3, x1, SXTW -; ldr w0, [x3, w1, SXTW] +; add x5, x3, x0 +; add x7, x5, x1, SXTW +; ldr w0, [x7, w1, SXTW] ; ret function %f9(i64, i64, i64) -> i32 { @@ -69,10 +69,9 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; mov x5, x0 -; add x5, x5, x2 -; add x5, x5, x1 -; ldr w0, [x5, #48] +; add x4, x0, x2 +; add x6, x4, x1 +; ldr w0, [x6, #48] ; ret function %f10(i64, i64, i64) -> i32 { @@ -86,10 +85,10 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; movz x4, #4100 -; add x4, x4, x1 -; add x4, x4, x2 -; ldr w0, [x4, x0] +; movz x5, #4100 +; add x5, x5, x1 +; add x8, x5, x2 +; ldr w0, [x8, x0] ; ret function %f10() -> i32 { @@ -139,10 +138,10 @@ block0(v0: i64): } ; block0: -; movz w2, #51712 -; movk w2, w2, #15258, LSL #16 -; add x2, x2, x0 -; ldr w0, [x2] +; movz w3, #51712 +; movk w3, w3, #15258, LSL #16 +; add x4, x3, x0 +; ldr w0, [x4] ; ret function %f14(i32) -> i32 { @@ -233,10 +232,8 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; mov x4, x6 -; ldp x0, x1, [x4] -; mov x5, x6 +; mov x5, x0 +; ldp x0, x1, [x5] ; stp x0, x1, [x5] ; ret @@ -248,10 +245,8 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; mov x4, x6 -; ldp x0, x1, [x4, #16] -; mov x5, x6 +; mov x5, x0 +; ldp x0, x1, [x5, #16] ; stp x0, x1, [x5, #16] ; ret @@ -263,10 +258,8 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; mov x4, x6 -; ldp x0, x1, [x4, #504] -; mov x5, x6 +; mov x5, x0 +; ldp x0, x1, [x5, #504] ; stp x0, x1, [x5, #504] ; ret @@ -278,10 +271,8 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; mov x4, x6 -; ldp x0, x1, [x4, #-512] -; mov x5, x6 +; mov x5, x0 +; ldp x0, x1, [x5, #-512] ; stp x0, x1, [x5, #-512] ; ret @@ -294,10 +285,8 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; mov x4, x6 -; ldp x0, x1, [x4, #32] -; mov x5, x6 +; mov x5, x0 +; ldp x0, x1, [x5, #32] ; stp x0, x1, [x5, #32] ; ret @@ -310,11 +299,11 @@ block0(v0: i32): } ; block0: -; sxtw x4, w0 -; mov x11, x0 -; ldp x0, x1, [x4] -; sxtw x5, w11 -; stp x0, x1, [x5] +; sxtw x3, w0 +; mov x8, x0 +; ldp x0, x1, [x3] +; sxtw x4, w8 +; stp x0, x1, [x4] ; ret function %i128_32bit_sextend(i64, i32) -> i128 { @@ -328,13 +317,11 @@ block0(v0: i64, v1: i32): } ; block0: -; mov x9, x0 -; mov x5, x9 -; add x5, x5, x1, SXTW -; mov x11, x1 -; ldp x0, x1, [x5, #24] -; mov x7, x9 -; add x7, x7, x11, SXTW -; stp x0, x1, [x7, #24] +; add x4, x0, x1, SXTW +; mov x11, x0 +; mov x9, x1 +; ldp x0, x1, [x4, #24] +; add x5, x11, x9, SXTW +; stp x0, x1, [x5, #24] ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/stack.clif b/cranelift/filetests/filetests/isa/aarch64/stack.clif index 1bb9a205a2..e69532a143 100644 --- a/cranelift/filetests/filetests/isa/aarch64/stack.clif +++ b/cranelift/filetests/filetests/isa/aarch64/stack.clif @@ -442,8 +442,8 @@ block0(v0: i128): ; mov fp, sp ; sub sp, sp, #16 ; block0: -; mov x4, sp -; stp x0, x1, [x4] +; mov x3, sp +; stp x0, x1, [x3] ; add sp, sp, #16 ; ldp fp, lr, [sp], #16 ; ret @@ -461,8 +461,8 @@ block0(v0: i128): ; mov fp, sp ; sub sp, sp, #32 ; block0: -; add x4, sp, #32 -; stp x0, x1, [x4] +; add x3, sp, #32 +; stp x0, x1, [x3] ; add sp, sp, #32 ; ldp fp, lr, [sp], #16 ; ret @@ -482,8 +482,8 @@ block0(v0: i128): ; movk w16, w16, #1, LSL #16 ; sub sp, sp, x16, UXTX ; block0: -; mov x4, sp -; stp x0, x1, [x4] +; mov x3, sp +; stp x0, x1, [x3] ; movz w16, #34480 ; movk w16, w16, #1, LSL #16 ; add sp, sp, x16, UXTX @@ -502,8 +502,8 @@ block0: ; mov fp, sp ; sub sp, sp, #16 ; block0: -; mov x3, sp -; ldp x0, x1, [x3] +; mov x2, sp +; ldp x0, x1, [x2] ; add sp, sp, #16 ; ldp fp, lr, [sp], #16 ; ret @@ -521,8 +521,8 @@ block0: ; mov fp, sp ; sub sp, sp, #32 ; block0: -; add x3, sp, #32 -; ldp x0, x1, [x3] +; add x2, sp, #32 +; ldp x0, x1, [x2] ; add sp, sp, #32 ; ldp fp, lr, [sp], #16 ; ret @@ -542,8 +542,8 @@ block0: ; movk w16, w16, #1, LSL #16 ; sub sp, sp, x16, UXTX ; block0: -; mov x3, sp -; ldp x0, x1, [x3] +; mov x2, sp +; ldp x0, x1, [x2] ; movz w16, #34480 ; movk w16, w16, #1, LSL #16 ; add sp, sp, x16, UXTX