Merge pull request #1607 from cfallin/aarch64-stack-frame

Rework aarch64 stack frame implementation to use positive offsets.
This commit is contained in:
Chris Fallin
2020-05-06 10:29:30 -07:00
committed by GitHub
16 changed files with 496 additions and 320 deletions

View File

@@ -1,4 +1,63 @@
//! Implementation of the standard AArch64 ABI.
//!
//! We implement the standard AArch64 ABI, as documented by ARM. This ABI
//! specifies how arguments are passed (in registers or on the stack, as
//! appropriate), which registers are caller- and callee-saved, and how a
//! particular part of the stack frame (the FP/LR pair) must be linked through
//! the active stack frames.
//!
//! Note, however, that the exact stack layout is up to us. We settled on the
//! below design based on several requirements. In particular, we need to be
//! able to generate instructions (or instruction sequences) to access
//! arguments, stack slots, and spill slots before we know how many spill slots
//! or clobber-saves there will be, because of our pass structure. We also
//! prefer positive offsets to negative offsets because of an asymmetry in
//! AArch64 addressing modes (positive offsets have a larger possible range
//! without a long-form sequence to synthesize an arbitrary offset). Finally, it
//! is not allowed to access memory below the current SP value.
//!
//! As a result, we keep the FP/LR pair just below stack args so that we can
//! access these args at known offsets from FP, and we access on-stack storage
//! using positive offsets from SP. In order to allow codegen for the latter
//! before knowing how many clobber-saves we have, and also allow it while SP is
//! being adjusted to set up a call, we implement a "nominal SP" tracking
//! feature by which a fixup (distance between actual SP and a "nominal" SP) is
//! known at each instruction. See the documentation for
//! [MemArg::NominalSPOffset] for more on this.
//!
//! The stack looks like:
//!
//! ```plain
//! (high address)
//!
//! +---------------------------+
//! | ... |
//! | stack args |
//! | (accessed via FP) |
//! +---------------------------+
//! SP at function entry -----> | LR (pushed by prologue) |
//! +---------------------------+
//! FP after prologue --------> | FP (pushed by prologue) |
//! +---------------------------+
//! | ... |
//! | spill slots |
//! | (accessed via nominal-SP) |
//! | ... |
//! | stack slots |
//! | (accessed via nominal-SP) |
//! nominal SP ---------------> | (alloc'd by prologue) |
//! +---------------------------+
//! | ... |
//! | clobbered callee-saves |
//! SP at end of prologue ----> | (pushed by prologue) |
//! +---------------------------+
//! | ... |
//! | args for call |
//! SP before making a call --> | (pushed at callsite) |
//! +---------------------------+
//!
//! (low address)
//! ```
use crate::ir;
use crate::ir::types;
@@ -13,7 +72,7 @@ use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
use log::debug;
use log::{debug, trace};
/// A location for an argument or return value.
#[derive(Clone, Copy, Debug)]
@@ -188,7 +247,7 @@ pub struct AArch64ABIBody {
/// Total number of spillslots, from regalloc.
spillslots: Option<usize>,
/// Total frame size.
frame_size: Option<u32>,
total_frame_size: Option<u32>,
/// Calling convention this function expects.
call_conv: isa::CallConv,
/// The settings controlling this function's compilation.
@@ -347,7 +406,7 @@ impl AArch64ABIBody {
stackslots_size: stack_offset,
clobbered: Set::empty(),
spillslots: None,
frame_size: None,
total_frame_size: None,
call_conv,
flags,
is_leaf: f.is_leaf(),
@@ -355,9 +414,9 @@ impl AArch64ABIBody {
}
}
/// Returns the size of a function call frame (including return address and FP) for this
/// function's body.
fn frame_size(&self) -> i64 {
/// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
/// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
fn fp_to_arg_offset(&self) -> i64 {
if self.call_conv.extends_baldrdash() {
let num_words = self.flags.baldrdash_prologue_words() as i64;
debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
@@ -383,8 +442,8 @@ impl AArch64ABIBody {
/// happening so late in the pipeline (e.g. after register allocation). This
/// means that we need to do manual register allocation here and also be
/// careful to not clobber any callee-saved or argument registers. For now
/// this routine makes do with the `writable_spilltmp_reg` as one temporary
/// register, and a second register of `x16` which is caller-saved. This
/// this routine makes do with the `spilltmp_reg` as one temporary
/// register, and a second register of `tmp2` which is caller-saved. This
/// should be fine for us since no spills should happen in this sequence of
/// instructions, so our register won't get accidentally clobbered.
///
@@ -413,9 +472,9 @@ impl AArch64ABIBody {
// Note though that `stack_limit`'s register may be the same as
// `scratch`. If our stack size doesn't fit into an immediate this
// means we need a second scratch register for loading the stack size
// into a register. We use `x16` here since it's caller-saved and we're
// in the function prologue and nothing else is allocated to it yet.
// into a register.
let scratch = writable_spilltmp_reg();
let scratch2 = writable_tmp2_reg();
let stack_size = u64::from(stack_size);
if let Some(imm12) = Imm12::maybe_from_u64(stack_size) {
insts.push(Inst::AluRRImm12 {
@@ -425,16 +484,12 @@ impl AArch64ABIBody {
imm12,
});
} else {
let scratch2 = 16;
insts.extend(Inst::load_constant(
Writable::from_reg(xreg(scratch2)),
stack_size.into(),
));
insts.extend(Inst::load_constant(scratch2, stack_size.into()));
insts.push(Inst::AluRRRExtend {
alu_op: ALUOp::Add64,
rd: scratch,
rn: stack_limit,
rm: xreg(scratch2),
rm: scratch2.to_reg(),
extendop: ExtendOp::UXTX,
});
}
@@ -460,8 +515,7 @@ impl AArch64ABIBody {
}
}
fn load_stack_from_fp(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst {
let mem = MemArg::FPOffset(fp_offset);
fn load_stack(mem: MemArg, into_reg: Writable<Reg>, ty: Type) -> Inst {
match ty {
types::B1
| types::B8
@@ -486,15 +540,11 @@ fn load_stack_from_fp(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst
mem,
srcloc: None,
},
_ => unimplemented!("load_stack_from_fp({})", ty),
_ => unimplemented!("load_stack({})", ty),
}
}
fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
debug_assert!(match &mem {
MemArg::SPOffset(off) => SImm9::maybe_from_i64(*off).is_some(),
_ => true,
});
match ty {
types::B1
| types::B8
@@ -523,50 +573,6 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
}
}
fn store_stack_fp(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
store_stack(MemArg::FPOffset(fp_offset), from_reg, ty)
}
fn store_stack_sp<C: LowerCtx<I = Inst>>(
ctx: &mut C,
sp_offset: i64,
from_reg: Reg,
ty: Type,
) -> Vec<Inst> {
if SImm9::maybe_from_i64(sp_offset).is_some() {
vec![store_stack(MemArg::SPOffset(sp_offset), from_reg, ty)]
} else {
// mem_finalize will try to generate an add, but in an addition, x31 is the zero register,
// not sp! So we have to synthesize the full add here.
let tmp1 = ctx.tmp(RegClass::I64, I64);
let tmp2 = ctx.tmp(RegClass::I64, I64);
let mut result = Vec::new();
// tmp1 := sp
result.push(Inst::Mov {
rd: tmp1,
rm: stack_reg(),
});
// tmp2 := offset
for inst in Inst::load_constant(tmp2, sp_offset as u64) {
result.push(inst);
}
// tmp1 := add tmp1, tmp2
result.push(Inst::AluRRR {
alu_op: ALUOp::Add64,
rd: tmp1,
rn: tmp1.to_reg(),
rm: tmp2.to_reg(),
});
// Actual store.
result.push(store_stack(
MemArg::Unscaled(tmp1.to_reg(), SImm9::maybe_from_i64(0).unwrap()),
from_reg,
ty,
));
result
}
}
fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool {
if call_conv.extends_baldrdash() {
match r.get_class() {
@@ -706,7 +712,11 @@ impl ABIBody for AArch64ABIBody {
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
match &self.sig.args[idx] {
&ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty),
&ABIArg::Stack(off, ty) => load_stack_from_fp(off + self.frame_size(), into_reg, ty),
&ABIArg::Stack(off, ty) => load_stack(
MemArg::FPOffset(self.fp_to_arg_offset() + off),
into_reg,
ty,
),
}
}
@@ -767,8 +777,8 @@ impl ABIBody for AArch64ABIBody {
}
_ => {}
};
ret.push(store_stack_fp(
off + self.frame_size(),
ret.push(store_stack(
MemArg::FPOffset(self.fp_to_arg_offset() + off),
from_reg.to_reg(),
ty,
))
@@ -793,6 +803,7 @@ impl ABIBody for AArch64ABIBody {
self.clobbered = clobbered;
}
/// Load from a stackslot.
fn load_stackslot(
&self,
slot: StackSlot,
@@ -800,47 +811,54 @@ impl ABIBody for AArch64ABIBody {
ty: Type,
into_reg: Writable<Reg>,
) -> Inst {
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
// Offset from beginning of stackslot area, which is at nominal-SP (see
// [MemArg::NominalSPOffset] for more details on nominal-SP tracking).
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
load_stack_from_fp(fp_off, into_reg, ty)
let sp_off: i64 = stack_off + (offset as i64);
trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off);
load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty)
}
/// Store to a stackslot.
fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst {
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
// Offset from beginning of stackslot area, which is at nominal-SP (see
// [MemArg::NominalSPOffset] for more details on nominal-SP tracking).
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
store_stack_fp(fp_off, from_reg, ty)
let sp_off: i64 = stack_off + (offset as i64);
trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off);
store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty)
}
/// Produce an instruction that computes a stackslot address.
fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Inst {
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
// Offset from beginning of stackslot area, which is at nominal-SP (see
// [MemArg::NominalSPOffset] for more details on nominal-SP tracking).
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
let sp_off: i64 = stack_off + (offset as i64);
Inst::LoadAddr {
rd: into_reg,
mem: MemArg::FPOffset(fp_off),
mem: MemArg::NominalSPOffset(sp_off),
}
}
// Load from a spillslot.
/// Load from a spillslot.
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst {
// Note that when spills/fills are generated, we don't yet know how many
// spillslots there will be, so we allocate *downward* from the beginning
// of the stackslot area. Hence: FP - stackslot_size - 8*spillslot -
// sizeof(ty).
// Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
let islot = slot.get() as i64;
let ty_size = self.get_spillslot_size(into_reg.to_reg().get_class(), ty) * 8;
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
load_stack_from_fp(fp_off, into_reg, ty)
let spill_off = islot * 8;
let sp_off = self.stackslots_size as i64 + spill_off;
trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty)
}
// Store to a spillslot.
/// Store to a spillslot.
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst {
// Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
let islot = slot.get() as i64;
let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8;
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
store_stack_fp(fp_off, from_reg, ty)
let spill_off = islot * 8;
let sp_off = self.stackslots_size as i64 + spill_off;
trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty)
}
fn gen_prologue(&mut self) -> Vec<Inst> {
@@ -916,9 +934,18 @@ impl ABIBody for AArch64ABIBody {
}
}
// N.B.: "nominal SP", which we use to refer to stackslots
// and spillslots, is *here* (the value of SP at this program point).
// If we push any clobbers below, we emit a virtual-SP adjustment
// meta-instruction so that the nominal-SP references behave as if SP
// were still at this point. See documentation for
// [crate::isa::aarch64::abi](this module) for more details on
// stackframe layout and nominal-SP maintenance.
// Save clobbered registers.
let (clobbered_int, clobbered_vec) =
get_callee_saves(self.call_conv, self.clobbered.to_vec());
let mut clobber_size = 0;
for reg_pair in clobbered_int.chunks(2) {
let (r1, r2) = if reg_pair.len() == 2 {
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
@@ -939,6 +966,7 @@ impl ABIBody for AArch64ABIBody {
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
),
});
clobber_size += 16;
}
let vec_save_bytes = clobbered_vec.len() * 16;
if vec_save_bytes != 0 {
@@ -948,6 +976,7 @@ impl ABIBody for AArch64ABIBody {
rn: stack_reg(),
imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
});
clobber_size += vec_save_bytes;
}
for (i, reg) in clobbered_vec.iter().enumerate() {
insts.push(Inst::FpuStore128 {
@@ -957,7 +986,13 @@ impl ABIBody for AArch64ABIBody {
});
}
self.frame_size = Some(total_stacksize);
if clobber_size > 0 {
insts.push(Inst::VirtualSPOffsetAdj {
offset: clobber_size as i64,
});
}
self.total_frame_size = Some(total_stacksize);
insts
}
@@ -1009,6 +1044,12 @@ impl ABIBody for AArch64ABIBody {
});
}
// N.B.: we do *not* emit a nominal-SP adjustment here, because (i) there will be no
// references to nominal-SP offsets before the return below, and (ii) the instruction
// emission tracks running SP offset linearly (in straight-line order), not according to
// the CFG, so early returns in the middle of function bodies would cause an incorrect
// offset for the rest of the body.
if !self.call_conv.extends_baldrdash() {
// The MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
// MOV to SP is an alias of ADD.
@@ -1037,7 +1078,7 @@ impl ABIBody for AArch64ABIBody {
}
fn frame_size(&self) -> u32 {
self.frame_size
self.total_frame_size
.expect("frame size not computed before prologue generation")
}
@@ -1138,20 +1179,32 @@ impl AArch64ABICall {
}
}
fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> {
if amt > 0 {
fn adjust_stack(amount: u64, is_sub: bool) -> Vec<Inst> {
if amount > 0 {
let sp_adjustment = if is_sub {
amount as i64
} else {
-(amount as i64)
};
let adj_meta_insn = Inst::VirtualSPOffsetAdj {
offset: sp_adjustment,
};
let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
if let Some(imm12) = Imm12::maybe_from_u64(amt) {
vec![Inst::AluRRImm12 {
if let Some(imm12) = Imm12::maybe_from_u64(amount) {
vec![
adj_meta_insn,
Inst::AluRRImm12 {
alu_op,
rd: writable_stack_reg(),
rn: stack_reg(),
imm12,
}]
},
]
} else {
let const_load = Inst::LoadConst64 {
rd: writable_spilltmp_reg(),
const_data: amt,
const_data: amount,
};
let adj = Inst::AluRRRExtend {
alu_op,
@@ -1160,7 +1213,7 @@ fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> {
rm: spilltmp_reg(),
extendop: ExtendOp::UXTX,
};
vec![const_load, adj]
vec![adj_meta_insn, const_load, adj]
}
} else {
vec![]
@@ -1182,19 +1235,14 @@ impl ABICall for AArch64ABICall {
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false)
}
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
&self,
ctx: &mut C,
idx: usize,
from_reg: Reg,
) -> Vec<Inst> {
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Vec<Inst> {
match &self.sig.args[idx] {
&ABIArg::Reg(reg, ty) => vec![Inst::gen_move(
Writable::from_reg(reg.to_reg()),
from_reg,
ty,
)],
&ABIArg::Stack(off, ty) => store_stack_sp(ctx, off, from_reg, ty),
&ABIArg::Stack(off, ty) => vec![store_stack(MemArg::SPOffset(off), from_reg, ty)],
}
}

View File

@@ -112,7 +112,9 @@ pub enum MemLabel {
/// A memory argument to load/store, encapsulating the possible addressing modes.
#[derive(Clone, Debug)]
pub enum MemArg {
Label(MemLabel),
//
// Real ARM64 addressing modes:
//
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
PostIndexed(Writable<Reg>, SImm9),
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
@@ -137,11 +139,31 @@ pub enum MemArg {
/// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
UnsignedOffset(Reg, UImm12Scaled),
/// Offset from the stack pointer. Lowered into a real amode at emission.
//
// virtual addressing modes that are lowered at emission time:
//
/// Reference to a "label": e.g., a symbol.
Label(MemLabel),
/// Offset from the stack pointer.
SPOffset(i64),
/// Offset from the frame pointer. Lowered into a real amode at emission.
/// Offset from the frame pointer.
FPOffset(i64),
/// Offset from the "nominal stack pointer", which is where the real SP is
/// just after stack and spill slots are allocated in the function prologue.
/// At emission time, this is converted to `SPOffset` with a fixup added to
/// the offset constant. The fixup is a running value that is tracked as
/// emission iterates through instructions in linear order, and can be
/// adjusted up and down with [Inst::VirtualSPOffsetAdj].
///
/// The standard ABI is in charge of handling this (by emitting the
/// adjustment meta-instructions). It maintains the invariant that "nominal
/// SP" is where the actual SP is after the function prologue and before
/// clobber pushes. See the diagram in the documentation for
/// [crate::isa::aarch64::abi](the ABI module) for more details.
NominalSPOffset(i64),
}
impl MemArg {
@@ -443,7 +465,7 @@ impl ShowWithRRU for MemArg {
simm9.show_rru(mb_rru)
),
// Eliminated by `mem_finalize()`.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => {
panic!("Unexpected stack-offset mem-arg mode!")
}
}

View File

@@ -10,6 +10,7 @@ use regalloc::{Reg, RegClass, Writable};
use alloc::vec::Vec;
use core::convert::TryFrom;
use log::debug;
/// Memory label/reference finalization: convert a MemLabel to a PC-relative
/// offset, possibly emitting relocation(s) as necessary.
@@ -23,33 +24,44 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
/// generic arbitrary stack offset) into real addressing modes, possibly by
/// emitting some helper instructions that come immediately before the use
/// of this amode.
pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg, state: &EmitState) -> (Vec<Inst>, MemArg) {
match mem {
&MemArg::SPOffset(off) | &MemArg::FPOffset(off) => {
&MemArg::SPOffset(off) | &MemArg::FPOffset(off) | &MemArg::NominalSPOffset(off) => {
let basereg = match mem {
&MemArg::SPOffset(..) => stack_reg(),
&MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(),
&MemArg::FPOffset(..) => fp_reg(),
_ => unreachable!(),
};
let adj = match mem {
&MemArg::NominalSPOffset(..) => {
debug!(
"mem_finalize: nominal SP offset {} + adj {} -> {}",
off,
state.virtual_sp_offset,
off + state.virtual_sp_offset
);
state.virtual_sp_offset
}
_ => 0,
};
let off = off + adj;
if let Some(simm9) = SImm9::maybe_from_i64(off) {
let mem = MemArg::Unscaled(basereg, simm9);
(vec![], mem)
} else {
// In an addition, x31 is the zero register, not sp; we have only one temporary
// so we can't do the proper add here.
debug_assert_ne!(
basereg,
stack_reg(),
"should have diverted SP before mem_finalize"
);
let tmp = writable_spilltmp_reg();
let mut const_insts = Inst::load_constant(tmp, off as u64);
let add_inst = Inst::AluRRR {
// N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
// (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
// is a valid base (for SPOffset) which we must handle here.
// Also, SP needs to be the first arg, not second.
let add_inst = Inst::AluRRRExtend {
alu_op: ALUOp::Add64,
rd: tmp,
rn: tmp.to_reg(),
rm: basereg,
rn: basereg,
rm: tmp.to_reg(),
extendop: ExtendOp::UXTX,
};
const_insts.push(add_inst);
(const_insts.to_vec(), MemArg::reg(tmp.to_reg()))
@@ -322,8 +334,16 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
}
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
virtual_sp_offset: i64,
}
impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
fn emit(&self, sink: &mut O, flags: &settings::Flags) {
type State = EmitState;
fn emit(&self, sink: &mut O, flags: &settings::Flags, state: &mut EmitState) {
match self {
&Inst::AluRRR { alu_op, rd, rn, rm } => {
let top11 = match alu_op {
@@ -596,10 +616,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ref mem,
srcloc,
} => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags);
inst.emit(sink, flags, state);
}
// ldst encoding helpers take Reg, not Writable<Reg>.
@@ -697,9 +717,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
}
// Eliminated by `mem_finalize()` above.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
panic!("Should not see stack-offset here!")
}
&MemArg::SPOffset(..)
| &MemArg::FPOffset(..)
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
}
}
@@ -739,10 +759,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ref mem,
srcloc,
} => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags);
inst.emit(sink, flags, state);
}
let op = match self {
@@ -794,9 +814,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
}
// Eliminated by `mem_finalize()` above.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
panic!("Should not see stack-offset here!")
}
&MemArg::SPOffset(..)
| &MemArg::FPOffset(..)
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
}
}
@@ -980,11 +1000,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None,
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(8),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
sink.put4(const_data.to_bits());
}
&Inst::LoadFpuConst64 { rd, const_data } => {
@@ -993,11 +1013,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None,
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
sink.put8(const_data.to_bits());
}
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
@@ -1084,7 +1104,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
if top22 != 0 {
sink.put4(enc_extend(top22, rd, rn));
} else {
Inst::mov32(rd, rn).emit(sink, flags);
Inst::mov32(rd, rn).emit(sink, flags, state);
}
}
&Inst::Extend {
@@ -1107,7 +1127,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
rn: zero_reg(),
rm: rd.to_reg(),
};
sub_inst.emit(sink, flags);
sub_inst.emit(sink, flags, state);
}
&Inst::Extend {
rd,
@@ -1248,13 +1268,13 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
// Save index in a tmp (the live range of ridx only goes to start of this
// sequence; rtmp1 or rtmp2 may overwrite it).
let inst = Inst::gen_move(rtmp2, ridx, I64);
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Load address of jump table
let inst = Inst::Adr {
rd: rtmp1,
label: MemLabel::PCRel(16),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Load value out of jump table
let inst = Inst::SLoad32 {
rd: rtmp2,
@@ -1266,7 +1286,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
),
srcloc: None, // can't cause a user trap.
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Add base of jump table to jump-table-sourced block offset
let inst = Inst::AluRRR {
alu_op: ALUOp::Add64,
@@ -1274,14 +1294,14 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Branch to computed address. (`targets` here is only used for successor queries
// and is not needed for emission.)
let inst = Inst::IndirectBr {
rn: rtmp1.to_reg(),
targets: vec![],
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Emit jump table (table of 32-bit offsets).
for target in targets {
let off = target.as_offset_words() * 4;
@@ -1297,11 +1317,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None, // can't cause a user trap.
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
sink.put8(const_data);
}
&Inst::LoadExtName {
@@ -1315,11 +1335,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None, // can't cause a user trap.
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
if flags.emit_all_ones_funcaddrs() {
sink.put8(u64::max_value());
@@ -1327,52 +1347,81 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put8(0);
}
}
&Inst::LoadAddr { rd, ref mem } => match *mem {
MemArg::FPOffset(fp_off) => {
let alu_op = if fp_off < 0 {
&Inst::LoadAddr { rd, ref mem } => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags, state);
}
let (reg, offset) = match mem {
MemArg::Unscaled(r, simm9) => (r, simm9.value()),
MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
};
let abs_offset = if offset < 0 {
-offset as u64
} else {
offset as u64
};
let alu_op = if offset < 0 {
ALUOp::Sub64
} else {
ALUOp::Add64
};
if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap())
{
let inst = Inst::AluRRImm12 {
if offset == 0 {
let mov = Inst::mov(rd, reg);
mov.emit(sink, flags, state);
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
let add = Inst::AluRRImm12 {
alu_op,
rd,
rn: reg,
imm12,
rn: fp_reg(),
};
inst.emit(sink, flags);
add.emit(sink, flags, state);
} else {
let const_insts =
Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap());
for inst in const_insts {
inst.emit(sink, flags);
// Use `tmp2` here: `reg` may be `spilltmp` if the `MemArg` on this instruction
// was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
// that no other instructions will be inserted here (we're emitting directly),
// and a live range of `tmp2` should not span this instruction, so this use
// should otherwise be correct.
debug_assert!(rd.to_reg() != tmp2_reg());
debug_assert!(reg != tmp2_reg());
let tmp = writable_tmp2_reg();
for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
insn.emit(sink, flags, state);
}
let inst = Inst::AluRRR {
let add = Inst::AluRRR {
alu_op,
rd,
rn: fp_reg(),
rm: rd.to_reg(),
rn: reg,
rm: tmp.to_reg(),
};
inst.emit(sink, flags);
add.emit(sink, flags, state);
}
}
_ => unimplemented!("{:?}", mem),
},
&Inst::GetPinnedReg { rd } => {
let inst = Inst::Mov {
rd,
rm: xreg(PINNED_REG),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
}
&Inst::SetPinnedReg { rm } => {
let inst = Inst::Mov {
rd: Writable::from_reg(xreg(PINNED_REG)),
rm,
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
}
&Inst::VirtualSPOffsetAdj { offset } => {
debug!(
"virtual sp offset adjusted by {} -> {}",
offset,
state.virtual_sp_offset + offset
);
state.virtual_sp_offset += offset;
}
}
}

View File

@@ -1313,8 +1313,8 @@ fn test_aarch64_binemit() {
mem: MemArg::FPOffset(32768),
srcloc: None,
},
"0F0090D2EF011D8BE10140F9",
"movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]",
"100090D2B063308B010240F9",
"movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
Inst::ULoad64 {
@@ -1322,8 +1322,8 @@ fn test_aarch64_binemit() {
mem: MemArg::FPOffset(-32768),
srcloc: None,
},
"EFFF8F92EF011D8BE10140F9",
"movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]",
"F0FF8F92B063308B010240F9",
"movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
Inst::ULoad64 {
@@ -1331,8 +1331,8 @@ fn test_aarch64_binemit() {
mem: MemArg::FPOffset(1048576), // 2^20
srcloc: None,
},
"0F02A0D2EF011D8BE10140F9",
"movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
"1002A0D2B063308B010240F9",
"movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
Inst::ULoad64 {
@@ -1340,8 +1340,8 @@ fn test_aarch64_binemit() {
mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1
srcloc: None,
},
"2F0080D20F02A0F2EF011D8BE10140F9",
"movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
"300080D21002A0F2B063308B010240F9",
"movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
@@ -2794,7 +2794,7 @@ fn test_aarch64_binemit() {
// Check the encoding is as expected.
let text_size = {
let mut code_sec = MachSectionSize::new(0);
insn.emit(&mut code_sec, &flags);
insn.emit(&mut code_sec, &flags, &mut Default::default());
code_sec.size()
};
@@ -2802,7 +2802,7 @@ fn test_aarch64_binemit() {
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, text_size);
let code_sec = sections.get_section(code_idx);
insn.emit(code_sec, &flags);
insn.emit(code_sec, &flags, &mut Default::default());
sections.emit(&mut sink);
let actual_encoding = &sink.stringify();
assert_eq!(expected_encoding, actual_encoding);

View File

@@ -134,6 +134,11 @@ impl SImm9 {
pub fn bits(&self) -> u32 {
(self.value as u32) & 0x1ff
}
/// Signed value of immediate.
pub fn value(&self) -> i32 {
self.value as i32
}
}
/// An unsigned, scaled 12-bit offset.
@@ -172,6 +177,11 @@ impl UImm12Scaled {
pub fn bits(&self) -> u32 {
(self.value as u32 / self.scale_ty.bytes()) & 0xfff
}
/// Value after scaling.
pub fn value(&self) -> u32 {
self.value as u32 * self.scale_ty.bytes()
}
}
/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted

View File

@@ -13,7 +13,6 @@ use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use regalloc::{RegUsageCollector, RegUsageMapper, Set};
use alloc::vec::Vec;
use core::convert::TryFrom;
use smallvec::{smallvec, SmallVec};
use std::string::{String, ToString};
@@ -741,6 +740,12 @@ pub enum Inst {
SetPinnedReg {
rm: Reg,
},
/// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
/// controls MemArg::NominalSPOffset args are lowered.
VirtualSPOffsetAdj {
offset: i64,
},
}
fn count_zero_half_words(mut value: u64) -> usize {
@@ -876,7 +881,7 @@ fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) {
&MemArg::FPOffset(..) => {
collector.add_use(fp_reg());
}
&MemArg::SPOffset(..) => {
&MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => {
collector.add_use(stack_reg());
}
}
@@ -1135,6 +1140,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
&Inst::SetPinnedReg { rm } => {
collector.add_use(rm);
}
&Inst::VirtualSPOffsetAdj { .. } => {}
}
}
@@ -1186,7 +1192,9 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
&mut MemArg::Label(..) => {}
&mut MemArg::PreIndexed(ref mut r, ..) => map_mod(m, r),
&mut MemArg::PostIndexed(ref mut r, ..) => map_mod(m, r),
&mut MemArg::FPOffset(..) | &mut MemArg::SPOffset(..) => {}
&mut MemArg::FPOffset(..)
| &mut MemArg::SPOffset(..)
| &mut MemArg::NominalSPOffset(..) => {}
};
}
@@ -1706,6 +1714,7 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
&mut Inst::SetPinnedReg { ref mut rm } => {
map_use(mapper, rm);
}
&mut Inst::VirtualSPOffsetAdj { .. } => {}
}
}
@@ -1904,7 +1913,7 @@ impl MachInst for Inst {
// Pretty-printing of instructions.
fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (String, MemArg) {
let (mem_insts, mem) = mem_finalize(0, mem);
let (mem_insts, mem) = mem_finalize(0, mem, &mut Default::default());
let mut mem_str = mem_insts
.into_iter()
.map(|inst| inst.show_rru(mb_rru))
@@ -2618,42 +2627,58 @@ impl ShowWithRRU for Inst {
let rd = rd.show_rru(mb_rru);
format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
}
&Inst::LoadAddr { rd, ref mem } => match *mem {
MemArg::FPOffset(fp_off) => {
let alu_op = if fp_off < 0 {
&Inst::LoadAddr { rd, ref mem } => {
// TODO: we really should find a better way to avoid duplication of
// this logic between `emit()` and `show_rru()` -- a separate 1-to-N
// expansion stage (i.e., legalization, but without the slow edit-in-place
// of the existing legalization framework).
let (mem_insts, mem) = mem_finalize(0, mem, &EmitState::default());
let mut ret = String::new();
for inst in mem_insts.into_iter() {
ret.push_str(&inst.show_rru(mb_rru));
}
let (reg, offset) = match mem {
MemArg::Unscaled(r, simm9) => (r, simm9.value()),
MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
};
let abs_offset = if offset < 0 {
-offset as u64
} else {
offset as u64
};
let alu_op = if offset < 0 {
ALUOp::Sub64
} else {
ALUOp::Add64
};
if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap())
{
let inst = Inst::AluRRImm12 {
if offset == 0 {
let mov = Inst::mov(rd, reg);
ret.push_str(&mov.show_rru(mb_rru));
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
let add = Inst::AluRRImm12 {
alu_op,
rd,
rn: reg,
imm12,
rn: fp_reg(),
};
inst.show_rru(mb_rru)
ret.push_str(&add.show_rru(mb_rru));
} else {
let mut res = String::new();
let const_insts =
Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap());
for inst in const_insts {
res.push_str(&inst.show_rru(mb_rru));
res.push_str("; ");
let tmp = writable_spilltmp_reg();
for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
ret.push_str(&inst.show_rru(mb_rru));
}
let inst = Inst::AluRRR {
let add = Inst::AluRRR {
alu_op,
rd,
rn: fp_reg(),
rm: rd.to_reg(),
rn: reg,
rm: tmp.to_reg(),
};
res.push_str(&inst.show_rru(mb_rru));
res
ret.push_str(&add.show_rru(mb_rru));
}
ret
}
_ => unimplemented!("{:?}", mem),
},
&Inst::GetPinnedReg { rd } => {
let rd = rd.show_rru(mb_rru);
format!("get_pinned_reg {}", rd)
@@ -2662,6 +2687,7 @@ impl ShowWithRRU for Inst {
let rm = rm.show_rru(mb_rru);
format!("set_pinned_reg {}", rm)
}
&Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
}
}
}

View File

@@ -20,23 +20,21 @@ pub const PINNED_REG: u8 = 21;
const XREG_INDICES: [u8; 31] = [
// X0 - X7
32, 33, 34, 35, 36, 37, 38, 39,
// X8 - X14
40, 41, 42, 43, 44, 45, 46,
// X15
59,
// X8 - X15
40, 41, 42, 43, 44, 45, 46, 47,
// X16, X17
47, 48,
58, 59,
// X18
60,
// X19, X20
49, 50,
48, 49,
// X21, put aside because it's the pinned register.
58,
57,
// X22 - X28
51, 52, 53, 54, 55, 56, 57,
// X29
50, 51, 52, 53, 54, 55, 56,
// X29 (FP)
61,
// X30
// X30 (LR)
62,
];
@@ -125,14 +123,17 @@ pub fn writable_fp_reg() -> Writable<Reg> {
Writable::from_reg(fp_reg())
}
/// Get a reference to the "spill temp" register. This register is used to
/// compute the address of a spill slot when a direct offset addressing mode from
/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
/// and reserve it for this purpose for simplicity; otherwise we need a
/// multi-stage analysis where we first determine how many spill slots we have,
/// then perhaps remove the reg from the pool and recompute regalloc.
/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
///
/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
/// to live through call instructions.
pub fn spilltmp_reg() -> Reg {
xreg(15)
xreg(16)
}
/// Get a writable reference to the spilltmp reg.
@@ -140,6 +141,20 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
Writable::from_reg(spilltmp_reg())
}
/// Get a reference to the second temp register. We need this in some edge cases
/// where we need both the spilltmp and another temporary.
///
/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
/// free to use otherwise.
pub fn tmp2_reg() -> Reg {
xreg(17)
}
/// Get a writable reference to the tmp2 reg.
pub fn writable_tmp2_reg() -> Writable<Reg> {
Writable::from_reg(tmp2_reg())
}
/// Create the register universe for AArch64.
pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
let mut regs = vec![];
@@ -173,7 +188,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
for i in 0u8..32u8 {
// See above for excluded registers.
if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
continue;
}
let reg = Reg::new_real(
@@ -211,7 +226,8 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
regs.len()
};
regs.push((xreg(15).to_real_reg(), "x15".to_string()));
regs.push((xreg(16).to_real_reg(), "x16".to_string()));
regs.push((xreg(17).to_real_reg(), "x17".to_string()));
regs.push((xreg(18).to_real_reg(), "x18".to_string()));
regs.push((fp_reg().to_real_reg(), "fp".to_string()));
regs.push((link_reg().to_real_reg(), "lr".to_string()));

View File

@@ -1291,7 +1291,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
assert!(inputs.len() == abi.num_args());
for (i, input) in inputs.iter().enumerate() {
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
for inst in abi.gen_copy_reg_to_arg(ctx, i, arg_reg) {
for inst in abi.gen_copy_reg_to_arg(i, arg_reg) {
ctx.emit(inst);
}
}

View File

@@ -2183,7 +2183,7 @@ fn test_x64_emit() {
// Check the encoding is as expected.
let text_size = {
let mut code_sec = MachSectionSize::new(0);
insn.emit(&mut code_sec, &flags);
insn.emit(&mut code_sec, &flags, &mut Default::default());
code_sec.size()
};
@@ -2191,7 +2191,7 @@ fn test_x64_emit() {
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, text_size);
let code_sec = sections.get_section(code_idx);
insn.emit(code_sec, &flags);
insn.emit(code_sec, &flags, &mut Default::default());
sections.emit(&mut sink);
let actual_encoding = &sink.stringify();
assert_eq!(expected_encoding, actual_encoding);

View File

@@ -950,7 +950,9 @@ impl MachInst for Inst {
}
impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
fn emit(&self, sink: &mut O, _flags: &settings::Flags) {
type State = ();
fn emit(&self, sink: &mut O, _flags: &settings::Flags, _: &mut Self::State) {
emit::emit(self, sink);
}
}

View File

@@ -98,7 +98,10 @@ pub trait ABIBody {
fn gen_epilogue(&self) -> Vec<Self::I>;
/// Returns the full frame size for the given function, after prologue emission has run. This
/// comprises the spill space, incoming argument space, alignment padding, etc.
/// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
/// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
/// This is used for ABI variants where the client generates prologue/epilogue code, as in
/// Baldrdash (SpiderMonkey integration).
fn frame_size(&self) -> u32;
/// Get the spill-slot size.
@@ -133,12 +136,7 @@ pub trait ABICall {
fn num_args(&self) -> usize;
/// Copy an argument value from a source register, prior to the call.
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
&self,
ctx: &mut C,
idx: usize,
from_reg: Reg,
) -> Vec<Self::I>;
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Vec<Self::I>;
/// Copy a return value into a destination register, after the call returns.
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;

View File

@@ -214,8 +214,10 @@ pub enum MachTerminator<'a> {
/// A trait describing the ability to encode a MachInst into binary machine code.
pub trait MachInstEmit<O: MachSectionOutput> {
/// Persistent state carried across `emit` invocations.
type State: Default + Clone + Debug;
/// Emit the instruction.
fn emit(&self, code: &mut O, flags: &Flags);
fn emit(&self, code: &mut O, flags: &Flags, state: &mut Self::State);
}
/// The result of a `MachBackend::compile_function()` call. Contains machine

View File

@@ -526,12 +526,13 @@ impl<I: VCodeInst> VCode<I> {
// Compute block offsets.
let mut code_section = MachSectionSize::new(0);
let mut block_offsets = vec![0; self.num_blocks()];
let mut state = Default::default();
for &block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset);
block_offsets[block as usize] = code_section.offset;
let (start, end) = self.block_ranges[block as usize];
for iix in start..end {
self.insts[iix as usize].emit(&mut code_section, flags);
self.insts[iix as usize].emit(&mut code_section, flags, &mut state);
}
}
@@ -544,13 +545,14 @@ impl<I: VCodeInst> VCode<I> {
// it (so forward references are now possible), and (ii) mutates the
// instructions.
let mut code_section = MachSectionSize::new(0);
let mut state = Default::default();
for &block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset);
let (start, end) = self.block_ranges[block as usize];
for iix in start..end {
self.insts[iix as usize]
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
self.insts[iix as usize].emit(&mut code_section, flags);
self.insts[iix as usize].emit(&mut code_section, flags, &mut state);
}
}
}
@@ -563,6 +565,7 @@ impl<I: VCodeInst> VCode<I> {
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, self.code_size);
let code_section = sections.get_section(code_idx);
let mut state = Default::default();
let flags = self.abi.flags();
let mut cur_srcloc = None;
@@ -571,7 +574,7 @@ impl<I: VCodeInst> VCode<I> {
while new_offset > code_section.cur_offset_from_start() {
// Pad with NOPs up to the aligned block offset.
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
nop.emit(code_section, flags);
nop.emit(code_section, flags, &mut Default::default());
}
assert_eq!(code_section.cur_offset_from_start(), new_offset);
@@ -586,7 +589,7 @@ impl<I: VCodeInst> VCode<I> {
cur_srcloc = Some(srcloc);
}
self.insts[iix as usize].emit(code_section, flags);
self.insts[iix as usize].emit(code_section, flags, &mut state);
}
if cur_srcloc.is_some() {

View File

@@ -11,8 +11,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr x15, 8 ; b 12 ; data
; nextln: blr x15
; nextln: ldr x16, 8 ; b 12 ; data
; nextln: blr x16
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -45,8 +45,8 @@ block0(v0: i64):
; nextln: subs xzr, sp, x0
; nextln: b.hs 8
; nextln: udf
; nextln: ldr x15
; nextln: blr x15
; nextln: ldr x16
; nextln: blr x16
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -64,13 +64,13 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr x15, [x0]
; nextln: ldr x15, [x15, #4]
; nextln: subs xzr, sp, x15
; nextln: ldr x16, [x0]
; nextln: ldr x16, [x16, #4]
; nextln: subs xzr, sp, x16
; nextln: b.hs 8
; nextln: udf
; nextln: ldr x15
; nextln: blr x15
; nextln: ldr x16
; nextln: blr x16
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -84,8 +84,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: add x15, x0, #176
; nextln: subs xzr, sp, x15
; nextln: add x16, x0, #176
; nextln: subs xzr, sp, x16
; nextln: b.hs 8
; nextln: udf
; nextln: sub sp, sp, #176
@@ -104,14 +104,14 @@ block0(v0: i64):
; nextln: subs xzr, sp, x0
; nextln: b.hs 8
; nextln: udf
; nextln: movz x16, #6784
; nextln: movk x16, #6, LSL #16
; nextln: add x15, x0, x16, UXTX
; nextln: subs xzr, sp, x15
; nextln: movz x17, #6784
; nextln: movk x17, #6, LSL #16
; nextln: add x16, x0, x17, UXTX
; nextln: subs xzr, sp, x16
; nextln: b.hs 8
; nextln: udf
; nextln: ldr x15, 8 ; b 12 ; data 400000
; nextln: sub sp, sp, x15, UXTX
; nextln: ldr x16, 8 ; b 12 ; data 400000
; nextln: sub sp, sp, x16, UXTX
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -128,10 +128,10 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr x15, [x0]
; nextln: ldr x15, [x15, #4]
; nextln: add x15, x15, #32
; nextln: subs xzr, sp, x15
; nextln: ldr x16, [x0]
; nextln: ldr x16, [x16, #4]
; nextln: add x16, x16, #32
; nextln: subs xzr, sp, x16
; nextln: b.hs 8
; nextln: udf
; nextln: sub sp, sp, #32
@@ -151,19 +151,19 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr x15, [x0]
; nextln: ldr x15, [x15, #4]
; nextln: subs xzr, sp, x15
; nextln: ldr x16, [x0]
; nextln: ldr x16, [x16, #4]
; nextln: subs xzr, sp, x16
; nextln: b.hs 8
; nextln: udf
; nextln: movz x16, #6784
; nextln: movk x16, #6, LSL #16
; nextln: add x15, x15, x16, UXTX
; nextln: subs xzr, sp, x15
; nextln: movz x17, #6784
; nextln: movk x17, #6, LSL #16
; nextln: add x16, x16, x17, UXTX
; nextln: subs xzr, sp, x16
; nextln: b.hs 8
; nextln: udf
; nextln: ldr x15, 8 ; b 12 ; data 400000
; nextln: sub sp, sp, x15, UXTX
; nextln: ldr x16, 8 ; b 12 ; data 400000
; nextln: sub sp, sp, x16, UXTX
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -179,11 +179,11 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x15, #6784
; nextln: movk x15, #6, LSL #16
; nextln: ldr x15, [x0, x15]
; nextln: add x15, x15, #32
; nextln: subs xzr, sp, x15
; nextln: movz x16, #6784
; nextln: movk x16, #6, LSL #16
; nextln: ldr x16, [x0, x16]
; nextln: add x16, x16, #32
; nextln: subs xzr, sp, x16
; nextln: b.hs 8
; nextln: udf
; nextln: sub sp, sp, #32

View File

@@ -12,7 +12,7 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #16
; nextln: sub x0, fp, #8
; nextln: mov x0, sp
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -29,9 +29,9 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr x15, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x15, UXTX
; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0
; nextln: ldr x16, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x16, UXTX
; nextln: mov x0, sp
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -50,7 +50,7 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #16
; nextln: sub x0, fp, #8
; nextln: mov x0, sp
; nextln: ldur x0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
@@ -68,9 +68,9 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr x15, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x15, UXTX
; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0
; nextln: ldr x16, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x16, UXTX
; nextln: mov x0, sp
; nextln: ldur x0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
@@ -88,7 +88,7 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #16
; nextln: sub x1, fp, #8
; nextln: mov x1, sp
; nextln: stur x0, [x1]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
@@ -106,9 +106,9 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr x15, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x15, UXTX
; nextln: movz x1, #34472; movk x1, #1, LSL #16; sub x1, fp, x1
; nextln: ldr x16, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x16, UXTX
; nextln: mov x1, sp
; nextln: stur x0, [x1]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16