arm64: Use SignedOffset rather than PreIndexed addressing mode for callee-saved registers
This also passes `fixed_frame_storage_size` (previously `total_sp_adjust`) into `gen_clobber_save` so that it can be combined with other stack adjustments. Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -129,6 +129,18 @@ impl Into<AMode> for StackAMode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the size of stack space needed to store the
|
||||||
|
// `int_reg` and `vec_reg`.
|
||||||
|
fn saved_reg_stack_size(
|
||||||
|
int_reg: &[Writable<RealReg>],
|
||||||
|
vec_reg: &[Writable<RealReg>],
|
||||||
|
) -> (usize, usize) {
|
||||||
|
// Round up to multiple of 2, to keep 16-byte stack alignment.
|
||||||
|
let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8;
|
||||||
|
let vec_save_bytes = vec_reg.len() * 16;
|
||||||
|
(int_save_bytes, vec_save_bytes)
|
||||||
|
}
|
||||||
|
|
||||||
/// AArch64-specific ABI behavior. This struct just serves as an implementation
|
/// AArch64-specific ABI behavior. This struct just serves as an implementation
|
||||||
/// point for the trait; it is never actually instantiated.
|
/// point for the trait; it is never actually instantiated.
|
||||||
pub(crate) struct AArch64MachineDeps;
|
pub(crate) struct AArch64MachineDeps;
|
||||||
@@ -495,11 +507,18 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
_: &settings::Flags,
|
_: &settings::Flags,
|
||||||
clobbers: &Set<Writable<RealReg>>,
|
clobbers: &Set<Writable<RealReg>>,
|
||||||
|
fixed_frame_storage_size: u32,
|
||||||
) -> (u64, SmallVec<[Inst; 16]>) {
|
) -> (u64, SmallVec<[Inst; 16]>) {
|
||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers);
|
let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers);
|
||||||
let mut clobber_size = 0;
|
|
||||||
for reg_pair in clobbered_int.chunks(2) {
|
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
|
||||||
|
let total_save_bytes = (vec_save_bytes + int_save_bytes) as i32;
|
||||||
|
insts.extend(Self::gen_sp_reg_adjust(
|
||||||
|
-(total_save_bytes + fixed_frame_storage_size as i32),
|
||||||
|
));
|
||||||
|
|
||||||
|
for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
|
||||||
let (r1, r2) = if reg_pair.len() == 2 {
|
let (r1, r2) = if reg_pair.len() == 2 {
|
||||||
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
|
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
|
||||||
(reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
|
(reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
|
||||||
@@ -510,36 +529,30 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
debug_assert!(r1.get_class() == RegClass::I64);
|
debug_assert!(r1.get_class() == RegClass::I64);
|
||||||
debug_assert!(r2.get_class() == RegClass::I64);
|
debug_assert!(r2.get_class() == RegClass::I64);
|
||||||
|
|
||||||
// stp r1, r2, [sp, #-16]!
|
// stp r1, r2, [sp, #(i * #16)]
|
||||||
insts.push(Inst::StoreP64 {
|
insts.push(Inst::StoreP64 {
|
||||||
rt: r1,
|
rt: r1,
|
||||||
rt2: r2,
|
rt2: r2,
|
||||||
mem: PairAMode::PreIndexed(
|
mem: PairAMode::SignedOffset(
|
||||||
writable_stack_reg(),
|
stack_reg(),
|
||||||
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
|
SImm7Scaled::maybe_from_i64((i * 16) as i64, types::I64).unwrap(),
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
clobber_size += 16;
|
|
||||||
}
|
|
||||||
let vec_save_bytes = clobbered_vec.len() * 16;
|
|
||||||
if vec_save_bytes != 0 {
|
|
||||||
insts.push(Inst::AluRRImm12 {
|
|
||||||
alu_op: ALUOp::Sub64,
|
|
||||||
rd: writable_stack_reg(),
|
|
||||||
rn: stack_reg(),
|
|
||||||
imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
|
|
||||||
});
|
|
||||||
clobber_size += vec_save_bytes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let vec_offset = int_save_bytes;
|
||||||
for (i, reg) in clobbered_vec.iter().enumerate() {
|
for (i, reg) in clobbered_vec.iter().enumerate() {
|
||||||
insts.push(Inst::FpuStore128 {
|
insts.push(Inst::FpuStore128 {
|
||||||
rd: reg.to_reg().to_reg(),
|
rd: reg.to_reg().to_reg(),
|
||||||
mem: AMode::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()),
|
mem: AMode::Unscaled(
|
||||||
|
stack_reg(),
|
||||||
|
SImm9::maybe_from_i64((vec_offset + (i * 16)) as i64).unwrap(),
|
||||||
|
),
|
||||||
srcloc: None,
|
srcloc: None,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
(clobber_size as u64, insts)
|
(total_save_bytes as u64, insts)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_clobber_restore(
|
fn gen_clobber_restore(
|
||||||
@@ -550,24 +563,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers);
|
let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers);
|
||||||
|
|
||||||
for (i, reg) in clobbered_vec.iter().enumerate() {
|
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
|
||||||
insts.push(Inst::FpuLoad128 {
|
for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
|
||||||
rd: Writable::from_reg(reg.to_reg().to_reg()),
|
|
||||||
mem: AMode::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()),
|
|
||||||
srcloc: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let vec_save_bytes = clobbered_vec.len() * 16;
|
|
||||||
if vec_save_bytes != 0 {
|
|
||||||
insts.push(Inst::AluRRImm12 {
|
|
||||||
alu_op: ALUOp::Add64,
|
|
||||||
rd: writable_stack_reg(),
|
|
||||||
rn: stack_reg(),
|
|
||||||
imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
for reg_pair in clobbered_int.chunks(2).rev() {
|
|
||||||
let (r1, r2) = if reg_pair.len() == 2 {
|
let (r1, r2) = if reg_pair.len() == 2 {
|
||||||
(
|
(
|
||||||
reg_pair[0].map(|r| r.to_reg()),
|
reg_pair[0].map(|r| r.to_reg()),
|
||||||
@@ -580,17 +577,36 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
debug_assert!(r1.to_reg().get_class() == RegClass::I64);
|
debug_assert!(r1.to_reg().get_class() == RegClass::I64);
|
||||||
debug_assert!(r2.to_reg().get_class() == RegClass::I64);
|
debug_assert!(r2.to_reg().get_class() == RegClass::I64);
|
||||||
|
|
||||||
// ldp r1, r2, [sp], #16
|
// ldp r1, r2, [sp, #(i * 16)]
|
||||||
insts.push(Inst::LoadP64 {
|
insts.push(Inst::LoadP64 {
|
||||||
rt: r1,
|
rt: r1,
|
||||||
rt2: r2,
|
rt2: r2,
|
||||||
mem: PairAMode::PostIndexed(
|
mem: PairAMode::SignedOffset(
|
||||||
writable_stack_reg(),
|
stack_reg(),
|
||||||
SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
|
SImm7Scaled::maybe_from_i64((i * 16) as i64, types::I64).unwrap(),
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i, reg) in clobbered_vec.iter().enumerate() {
|
||||||
|
insts.push(Inst::FpuLoad128 {
|
||||||
|
rd: Writable::from_reg(reg.to_reg().to_reg()),
|
||||||
|
mem: AMode::Unscaled(
|
||||||
|
stack_reg(),
|
||||||
|
SImm9::maybe_from_i64(((i * 16) + int_save_bytes) as i64).unwrap(),
|
||||||
|
),
|
||||||
|
srcloc: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// For non-baldrdash calling conventions, the frame pointer
|
||||||
|
// will be moved into the stack pointer in the epilogue, so we
|
||||||
|
// can skip restoring the stack pointer value with this `add`.
|
||||||
|
if call_conv.extends_baldrdash() {
|
||||||
|
let total_save_bytes = (int_save_bytes + vec_save_bytes) as i32;
|
||||||
|
insts.extend(Self::gen_sp_reg_adjust(total_save_bytes));
|
||||||
|
}
|
||||||
|
|
||||||
// If this is Baldrdash-2020, restore the callee (i.e., our) TLS
|
// If this is Baldrdash-2020, restore the callee (i.e., our) TLS
|
||||||
// register. We may have allocated it for something else and clobbered
|
// register. We may have allocated it for something else and clobbered
|
||||||
// it, but the ABI expects us to leave the TLS register unchanged.
|
// it, but the ABI expects us to leave the TLS register unchanged.
|
||||||
|
|||||||
@@ -389,13 +389,15 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
_: &settings::Flags,
|
_: &settings::Flags,
|
||||||
clobbers: &Set<Writable<RealReg>>,
|
clobbers: &Set<Writable<RealReg>>,
|
||||||
|
fixed_frame_storage_size: u32,
|
||||||
) -> (u64, SmallVec<[Self::I; 16]>) {
|
) -> (u64, SmallVec<[Self::I; 16]>) {
|
||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
// Find all clobbered registers that are callee-save. These are only I64
|
// Find all clobbered registers that are callee-save. These are only I64
|
||||||
// registers (all XMM registers are caller-save) so we can compute the
|
// registers (all XMM registers are caller-save) so we can compute the
|
||||||
// total size of the needed stack space easily.
|
// total size of the needed stack space easily.
|
||||||
let clobbered = get_callee_saves(&call_conv, clobbers);
|
let clobbered = get_callee_saves(&call_conv, clobbers);
|
||||||
let stack_size = 8 * clobbered.len() as u32;
|
let clobbered_size = 8 * clobbered.len() as u32;
|
||||||
|
let stack_size = clobbered_size + fixed_frame_storage_size;
|
||||||
// Align to 16 bytes.
|
// Align to 16 bytes.
|
||||||
let stack_size = (stack_size + 15) & !15;
|
let stack_size = (stack_size + 15) & !15;
|
||||||
// Adjust the stack pointer downward with one `sub rsp, IMM`
|
// Adjust the stack pointer downward with one `sub rsp, IMM`
|
||||||
@@ -428,7 +430,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(stack_size as u64, insts)
|
(clobbered_size as u64, insts)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_clobber_restore(
|
fn gen_clobber_restore(
|
||||||
|
|||||||
@@ -325,6 +325,7 @@ pub trait ABIMachineSpec {
|
|||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
flags: &settings::Flags,
|
flags: &settings::Flags,
|
||||||
clobbers: &Set<Writable<RealReg>>,
|
clobbers: &Set<Writable<RealReg>>,
|
||||||
|
fixed_frame_storage_size: u32,
|
||||||
) -> (u64, SmallVec<[Self::I; 16]>);
|
) -> (u64, SmallVec<[Self::I; 16]>);
|
||||||
|
|
||||||
/// Generate a clobber-restore sequence. This sequence should perform the
|
/// Generate a clobber-restore sequence. This sequence should perform the
|
||||||
@@ -931,7 +932,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
let mask = 2 * bytes - 1;
|
let mask = 2 * bytes - 1;
|
||||||
let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
|
let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
|
||||||
|
|
||||||
let mut total_sp_adjust = 0;
|
let mut fixed_frame_storage_size = 0;
|
||||||
|
|
||||||
if !self.call_conv.extends_baldrdash() {
|
if !self.call_conv.extends_baldrdash() {
|
||||||
// Leaf functions with zero stack don't need a stack check if one's
|
// Leaf functions with zero stack don't need a stack check if one's
|
||||||
@@ -943,7 +944,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if total_stacksize > 0 {
|
if total_stacksize > 0 {
|
||||||
total_sp_adjust += total_stacksize as u64;
|
fixed_frame_storage_size += total_stacksize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -957,15 +958,13 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
// [crate::machinst::abi_impl](this module) for more details on
|
// [crate::machinst::abi_impl](this module) for more details on
|
||||||
// stackframe layout and nominal SP maintenance.
|
// stackframe layout and nominal SP maintenance.
|
||||||
|
|
||||||
if total_sp_adjust > 0 {
|
|
||||||
// sub sp, sp, #total_stacksize
|
|
||||||
let adj = total_sp_adjust as i32;
|
|
||||||
insts.extend(M::gen_sp_reg_adjust(-adj));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save clobbered registers.
|
// Save clobbered registers.
|
||||||
let (clobber_size, clobber_insts) =
|
let (clobber_size, clobber_insts) = M::gen_clobber_save(
|
||||||
M::gen_clobber_save(self.call_conv, &self.flags, &self.clobbered);
|
self.call_conv,
|
||||||
|
&self.flags,
|
||||||
|
&self.clobbered,
|
||||||
|
fixed_frame_storage_size,
|
||||||
|
);
|
||||||
insts.extend(clobber_insts);
|
insts.extend(clobber_insts);
|
||||||
|
|
||||||
if clobber_size > 0 {
|
if clobber_size > 0 {
|
||||||
|
|||||||
@@ -179,7 +179,6 @@ block0:
|
|||||||
; nextln: ldr q8, [sp]
|
; nextln: ldr q8, [sp]
|
||||||
; nextln: ldr q9, [sp, #16]
|
; nextln: ldr q9, [sp, #16]
|
||||||
; nextln: ldr q10, [sp, #32]
|
; nextln: ldr q10, [sp, #32]
|
||||||
; nextln: add sp, sp, #48
|
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -230,7 +229,6 @@ block0:
|
|||||||
; nextln: ldr q8, [sp]
|
; nextln: ldr q8, [sp]
|
||||||
; nextln: ldr q9, [sp, #16]
|
; nextln: ldr q9, [sp, #16]
|
||||||
; nextln: ldr q10, [sp, #32]
|
; nextln: ldr q10, [sp, #32]
|
||||||
; nextln: add sp, sp, #48
|
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -285,7 +283,6 @@ block0:
|
|||||||
; nextln: ldr q8, [sp]
|
; nextln: ldr q8, [sp]
|
||||||
; nextln: ldr q9, [sp, #16]
|
; nextln: ldr q9, [sp, #16]
|
||||||
; nextln: ldr q10, [sp, #32]
|
; nextln: ldr q10, [sp, #32]
|
||||||
; nextln: add sp, sp, #48
|
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|||||||
@@ -77,8 +77,8 @@ block3(v7: r64, v8: r64):
|
|||||||
; check: Block 0:
|
; check: Block 0:
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: sub sp, sp, #32
|
; nextln: sub sp, sp, #48
|
||||||
; nextln: stp x19, x20, [sp, #-16]!
|
; nextln: stp x19, x20, [sp]
|
||||||
; nextln: virtual_sp_offset_adjust 16
|
; nextln: virtual_sp_offset_adjust 16
|
||||||
; nextln: mov x19, x0
|
; nextln: mov x19, x0
|
||||||
; nextln: mov x20, x1
|
; nextln: mov x20, x1
|
||||||
@@ -111,7 +111,7 @@ block3(v7: r64, v8: r64):
|
|||||||
; nextln: ldr x1, [x1]
|
; nextln: ldr x1, [x1]
|
||||||
; nextln: mov x2, x1
|
; nextln: mov x2, x1
|
||||||
; nextln: mov x1, x19
|
; nextln: mov x1, x19
|
||||||
; nextln: ldp x19, x20, [sp], #16
|
; nextln: ldp x19, x20, [sp]
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|||||||
Reference in New Issue
Block a user