Merge pull request #1607 from cfallin/aarch64-stack-frame

Rework aarch64 stack frame implementation to use positive offsets.
This commit is contained in:
Chris Fallin
2020-05-06 10:29:30 -07:00
committed by GitHub
16 changed files with 496 additions and 320 deletions

View File

@@ -1,4 +1,63 @@
//! Implementation of the standard AArch64 ABI. //! Implementation of the standard AArch64 ABI.
//!
//! We implement the standard AArch64 ABI, as documented by ARM. This ABI
//! specifies how arguments are passed (in registers or on the stack, as
//! appropriate), which registers are caller- and callee-saved, and how a
//! particular part of the stack frame (the FP/LR pair) must be linked through
//! the active stack frames.
//!
//! Note, however, that the exact stack layout is up to us. We settled on the
//! below design based on several requirements. In particular, we need to be
//! able to generate instructions (or instruction sequences) to access
//! arguments, stack slots, and spill slots before we know how many spill slots
//! or clobber-saves there will be, because of our pass structure. We also
//! prefer positive offsets to negative offsets because of an asymmetry in
//! AArch64 addressing modes (positive offsets have a larger possible range
//! without a long-form sequence to synthesize an arbitrary offset). Finally, it
//! is not allowed to access memory below the current SP value.
//!
//! As a result, we keep the FP/LR pair just below stack args so that we can
//! access these args at known offsets from FP, and we access on-stack storage
//! using positive offsets from SP. In order to allow codegen for the latter
//! before knowing how many clobber-saves we have, and also allow it while SP is
//! being adjusted to set up a call, we implement a "nominal SP" tracking
//! feature by which a fixup (distance between actual SP and a "nominal" SP) is
//! known at each instruction. See the documentation for
//! [MemArg::NominalSPOffset] for more on this.
//!
//! The stack looks like:
//!
//! ```plain
//! (high address)
//!
//! +---------------------------+
//! | ... |
//! | stack args |
//! | (accessed via FP) |
//! +---------------------------+
//! SP at function entry -----> | LR (pushed by prologue) |
//! +---------------------------+
//! FP after prologue --------> | FP (pushed by prologue) |
//! +---------------------------+
//! | ... |
//! | spill slots |
//! | (accessed via nominal-SP) |
//! | ... |
//! | stack slots |
//! | (accessed via nominal-SP) |
//! nominal SP ---------------> | (alloc'd by prologue) |
//! +---------------------------+
//! | ... |
//! | clobbered callee-saves |
//! SP at end of prologue ----> | (pushed by prologue) |
//! +---------------------------+
//! | ... |
//! | args for call |
//! SP before making a call --> | (pushed at callsite) |
//! +---------------------------+
//!
//! (low address)
//! ```
use crate::ir; use crate::ir;
use crate::ir::types; use crate::ir::types;
@@ -13,7 +72,7 @@ use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
use log::debug; use log::{debug, trace};
/// A location for an argument or return value. /// A location for an argument or return value.
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
@@ -188,7 +247,7 @@ pub struct AArch64ABIBody {
/// Total number of spillslots, from regalloc. /// Total number of spillslots, from regalloc.
spillslots: Option<usize>, spillslots: Option<usize>,
/// Total frame size. /// Total frame size.
frame_size: Option<u32>, total_frame_size: Option<u32>,
/// Calling convention this function expects. /// Calling convention this function expects.
call_conv: isa::CallConv, call_conv: isa::CallConv,
/// The settings controlling this function's compilation. /// The settings controlling this function's compilation.
@@ -347,7 +406,7 @@ impl AArch64ABIBody {
stackslots_size: stack_offset, stackslots_size: stack_offset,
clobbered: Set::empty(), clobbered: Set::empty(),
spillslots: None, spillslots: None,
frame_size: None, total_frame_size: None,
call_conv, call_conv,
flags, flags,
is_leaf: f.is_leaf(), is_leaf: f.is_leaf(),
@@ -355,9 +414,9 @@ impl AArch64ABIBody {
} }
} }
/// Returns the size of a function call frame (including return address and FP) for this /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
/// function's body. /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
fn frame_size(&self) -> i64 { fn fp_to_arg_offset(&self) -> i64 {
if self.call_conv.extends_baldrdash() { if self.call_conv.extends_baldrdash() {
let num_words = self.flags.baldrdash_prologue_words() as i64; let num_words = self.flags.baldrdash_prologue_words() as i64;
debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
@@ -383,8 +442,8 @@ impl AArch64ABIBody {
/// happening so late in the pipeline (e.g. after register allocation). This /// happening so late in the pipeline (e.g. after register allocation). This
/// means that we need to do manual register allocation here and also be /// means that we need to do manual register allocation here and also be
/// careful to not clobber any callee-saved or argument registers. For now /// careful to not clobber any callee-saved or argument registers. For now
/// this routine makes do with the `writable_spilltmp_reg` as one temporary /// this routine makes do with the `spilltmp_reg` as one temporary
/// register, and a second register of `x16` which is caller-saved. This /// register, and a second register of `tmp2` which is caller-saved. This
/// should be fine for us since no spills should happen in this sequence of /// should be fine for us since no spills should happen in this sequence of
/// instructions, so our register won't get accidentally clobbered. /// instructions, so our register won't get accidentally clobbered.
/// ///
@@ -413,9 +472,9 @@ impl AArch64ABIBody {
// Note though that `stack_limit`'s register may be the same as // Note though that `stack_limit`'s register may be the same as
// `scratch`. If our stack size doesn't fit into an immediate this // `scratch`. If our stack size doesn't fit into an immediate this
// means we need a second scratch register for loading the stack size // means we need a second scratch register for loading the stack size
// into a register. We use `x16` here since it's caller-saved and we're // into a register.
// in the function prologue and nothing else is allocated to it yet.
let scratch = writable_spilltmp_reg(); let scratch = writable_spilltmp_reg();
let scratch2 = writable_tmp2_reg();
let stack_size = u64::from(stack_size); let stack_size = u64::from(stack_size);
if let Some(imm12) = Imm12::maybe_from_u64(stack_size) { if let Some(imm12) = Imm12::maybe_from_u64(stack_size) {
insts.push(Inst::AluRRImm12 { insts.push(Inst::AluRRImm12 {
@@ -425,16 +484,12 @@ impl AArch64ABIBody {
imm12, imm12,
}); });
} else { } else {
let scratch2 = 16; insts.extend(Inst::load_constant(scratch2, stack_size.into()));
insts.extend(Inst::load_constant(
Writable::from_reg(xreg(scratch2)),
stack_size.into(),
));
insts.push(Inst::AluRRRExtend { insts.push(Inst::AluRRRExtend {
alu_op: ALUOp::Add64, alu_op: ALUOp::Add64,
rd: scratch, rd: scratch,
rn: stack_limit, rn: stack_limit,
rm: xreg(scratch2), rm: scratch2.to_reg(),
extendop: ExtendOp::UXTX, extendop: ExtendOp::UXTX,
}); });
} }
@@ -460,8 +515,7 @@ impl AArch64ABIBody {
} }
} }
fn load_stack_from_fp(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst { fn load_stack(mem: MemArg, into_reg: Writable<Reg>, ty: Type) -> Inst {
let mem = MemArg::FPOffset(fp_offset);
match ty { match ty {
types::B1 types::B1
| types::B8 | types::B8
@@ -486,15 +540,11 @@ fn load_stack_from_fp(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst
mem, mem,
srcloc: None, srcloc: None,
}, },
_ => unimplemented!("load_stack_from_fp({})", ty), _ => unimplemented!("load_stack({})", ty),
} }
} }
fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
debug_assert!(match &mem {
MemArg::SPOffset(off) => SImm9::maybe_from_i64(*off).is_some(),
_ => true,
});
match ty { match ty {
types::B1 types::B1
| types::B8 | types::B8
@@ -523,50 +573,6 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
} }
} }
fn store_stack_fp(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
store_stack(MemArg::FPOffset(fp_offset), from_reg, ty)
}
fn store_stack_sp<C: LowerCtx<I = Inst>>(
ctx: &mut C,
sp_offset: i64,
from_reg: Reg,
ty: Type,
) -> Vec<Inst> {
if SImm9::maybe_from_i64(sp_offset).is_some() {
vec![store_stack(MemArg::SPOffset(sp_offset), from_reg, ty)]
} else {
// mem_finalize will try to generate an add, but in an addition, x31 is the zero register,
// not sp! So we have to synthesize the full add here.
let tmp1 = ctx.tmp(RegClass::I64, I64);
let tmp2 = ctx.tmp(RegClass::I64, I64);
let mut result = Vec::new();
// tmp1 := sp
result.push(Inst::Mov {
rd: tmp1,
rm: stack_reg(),
});
// tmp2 := offset
for inst in Inst::load_constant(tmp2, sp_offset as u64) {
result.push(inst);
}
// tmp1 := add tmp1, tmp2
result.push(Inst::AluRRR {
alu_op: ALUOp::Add64,
rd: tmp1,
rn: tmp1.to_reg(),
rm: tmp2.to_reg(),
});
// Actual store.
result.push(store_stack(
MemArg::Unscaled(tmp1.to_reg(), SImm9::maybe_from_i64(0).unwrap()),
from_reg,
ty,
));
result
}
}
fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool { fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool {
if call_conv.extends_baldrdash() { if call_conv.extends_baldrdash() {
match r.get_class() { match r.get_class() {
@@ -706,7 +712,11 @@ impl ABIBody for AArch64ABIBody {
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst { fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
match &self.sig.args[idx] { match &self.sig.args[idx] {
&ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty), &ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty),
&ABIArg::Stack(off, ty) => load_stack_from_fp(off + self.frame_size(), into_reg, ty), &ABIArg::Stack(off, ty) => load_stack(
MemArg::FPOffset(self.fp_to_arg_offset() + off),
into_reg,
ty,
),
} }
} }
@@ -767,8 +777,8 @@ impl ABIBody for AArch64ABIBody {
} }
_ => {} _ => {}
}; };
ret.push(store_stack_fp( ret.push(store_stack(
off + self.frame_size(), MemArg::FPOffset(self.fp_to_arg_offset() + off),
from_reg.to_reg(), from_reg.to_reg(),
ty, ty,
)) ))
@@ -793,6 +803,7 @@ impl ABIBody for AArch64ABIBody {
self.clobbered = clobbered; self.clobbered = clobbered;
} }
/// Load from a stackslot.
fn load_stackslot( fn load_stackslot(
&self, &self,
slot: StackSlot, slot: StackSlot,
@@ -800,47 +811,54 @@ impl ABIBody for AArch64ABIBody {
ty: Type, ty: Type,
into_reg: Writable<Reg>, into_reg: Writable<Reg>,
) -> Inst { ) -> Inst {
// Offset from beginning of stackslot area, which is at FP - stackslots_size. // Offset from beginning of stackslot area, which is at nominal-SP (see
// [MemArg::NominalSPOffset] for more details on nominal-SP tracking).
let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); let sp_off: i64 = stack_off + (offset as i64);
load_stack_from_fp(fp_off, into_reg, ty) trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off);
load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty)
} }
/// Store to a stackslot.
fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst { fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst {
// Offset from beginning of stackslot area, which is at FP - stackslots_size. // Offset from beginning of stackslot area, which is at nominal-SP (see
// [MemArg::NominalSPOffset] for more details on nominal-SP tracking).
let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); let sp_off: i64 = stack_off + (offset as i64);
store_stack_fp(fp_off, from_reg, ty) trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off);
store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty)
} }
/// Produce an instruction that computes a stackslot address.
fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Inst { fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Inst {
// Offset from beginning of stackslot area, which is at FP - stackslots_size. // Offset from beginning of stackslot area, which is at nominal-SP (see
// [MemArg::NominalSPOffset] for more details on nominal-SP tracking).
let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); let sp_off: i64 = stack_off + (offset as i64);
Inst::LoadAddr { Inst::LoadAddr {
rd: into_reg, rd: into_reg,
mem: MemArg::FPOffset(fp_off), mem: MemArg::NominalSPOffset(sp_off),
} }
} }
// Load from a spillslot. /// Load from a spillslot.
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst { fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst {
// Note that when spills/fills are generated, we don't yet know how many // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
// spillslots there will be, so we allocate *downward* from the beginning
// of the stackslot area. Hence: FP - stackslot_size - 8*spillslot -
// sizeof(ty).
let islot = slot.get() as i64; let islot = slot.get() as i64;
let ty_size = self.get_spillslot_size(into_reg.to_reg().get_class(), ty) * 8; let spill_off = islot * 8;
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64; let sp_off = self.stackslots_size as i64 + spill_off;
load_stack_from_fp(fp_off, into_reg, ty) trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty)
} }
// Store to a spillslot. /// Store to a spillslot.
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst { fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst {
// Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
let islot = slot.get() as i64; let islot = slot.get() as i64;
let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8; let spill_off = islot * 8;
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64; let sp_off = self.stackslots_size as i64 + spill_off;
store_stack_fp(fp_off, from_reg, ty) trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty)
} }
fn gen_prologue(&mut self) -> Vec<Inst> { fn gen_prologue(&mut self) -> Vec<Inst> {
@@ -916,9 +934,18 @@ impl ABIBody for AArch64ABIBody {
} }
} }
// N.B.: "nominal SP", which we use to refer to stackslots
// and spillslots, is *here* (the value of SP at this program point).
// If we push any clobbers below, we emit a virtual-SP adjustment
// meta-instruction so that the nominal-SP references behave as if SP
// were still at this point. See documentation for
// [crate::isa::aarch64::abi](this module) for more details on
// stackframe layout and nominal-SP maintenance.
// Save clobbered registers. // Save clobbered registers.
let (clobbered_int, clobbered_vec) = let (clobbered_int, clobbered_vec) =
get_callee_saves(self.call_conv, self.clobbered.to_vec()); get_callee_saves(self.call_conv, self.clobbered.to_vec());
let mut clobber_size = 0;
for reg_pair in clobbered_int.chunks(2) { for reg_pair in clobbered_int.chunks(2) {
let (r1, r2) = if reg_pair.len() == 2 { let (r1, r2) = if reg_pair.len() == 2 {
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg // .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
@@ -939,6 +966,7 @@ impl ABIBody for AArch64ABIBody {
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
), ),
}); });
clobber_size += 16;
} }
let vec_save_bytes = clobbered_vec.len() * 16; let vec_save_bytes = clobbered_vec.len() * 16;
if vec_save_bytes != 0 { if vec_save_bytes != 0 {
@@ -948,6 +976,7 @@ impl ABIBody for AArch64ABIBody {
rn: stack_reg(), rn: stack_reg(),
imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(), imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
}); });
clobber_size += vec_save_bytes;
} }
for (i, reg) in clobbered_vec.iter().enumerate() { for (i, reg) in clobbered_vec.iter().enumerate() {
insts.push(Inst::FpuStore128 { insts.push(Inst::FpuStore128 {
@@ -957,7 +986,13 @@ impl ABIBody for AArch64ABIBody {
}); });
} }
self.frame_size = Some(total_stacksize); if clobber_size > 0 {
insts.push(Inst::VirtualSPOffsetAdj {
offset: clobber_size as i64,
});
}
self.total_frame_size = Some(total_stacksize);
insts insts
} }
@@ -1009,6 +1044,12 @@ impl ABIBody for AArch64ABIBody {
}); });
} }
// N.B.: we do *not* emit a nominal-SP adjustment here, because (i) there will be no
// references to nominal-SP offsets before the return below, and (ii) the instruction
// emission tracks running SP offset linearly (in straight-line order), not according to
// the CFG, so early returns in the middle of function bodies would cause an incorrect
// offset for the rest of the body.
if !self.call_conv.extends_baldrdash() { if !self.call_conv.extends_baldrdash() {
// The MOV (alias of ORR) interprets x31 as XZR, so use an ADD here. // The MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
// MOV to SP is an alias of ADD. // MOV to SP is an alias of ADD.
@@ -1037,7 +1078,7 @@ impl ABIBody for AArch64ABIBody {
} }
fn frame_size(&self) -> u32 { fn frame_size(&self) -> u32 {
self.frame_size self.total_frame_size
.expect("frame size not computed before prologue generation") .expect("frame size not computed before prologue generation")
} }
@@ -1138,20 +1179,32 @@ impl AArch64ABICall {
} }
} }
fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> { fn adjust_stack(amount: u64, is_sub: bool) -> Vec<Inst> {
if amt > 0 { if amount > 0 {
let sp_adjustment = if is_sub {
amount as i64
} else {
-(amount as i64)
};
let adj_meta_insn = Inst::VirtualSPOffsetAdj {
offset: sp_adjustment,
};
let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
if let Some(imm12) = Imm12::maybe_from_u64(amt) { if let Some(imm12) = Imm12::maybe_from_u64(amount) {
vec![Inst::AluRRImm12 { vec![
alu_op, adj_meta_insn,
rd: writable_stack_reg(), Inst::AluRRImm12 {
rn: stack_reg(), alu_op,
imm12, rd: writable_stack_reg(),
}] rn: stack_reg(),
imm12,
},
]
} else { } else {
let const_load = Inst::LoadConst64 { let const_load = Inst::LoadConst64 {
rd: writable_spilltmp_reg(), rd: writable_spilltmp_reg(),
const_data: amt, const_data: amount,
}; };
let adj = Inst::AluRRRExtend { let adj = Inst::AluRRRExtend {
alu_op, alu_op,
@@ -1160,7 +1213,7 @@ fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> {
rm: spilltmp_reg(), rm: spilltmp_reg(),
extendop: ExtendOp::UXTX, extendop: ExtendOp::UXTX,
}; };
vec![const_load, adj] vec![adj_meta_insn, const_load, adj]
} }
} else { } else {
vec![] vec![]
@@ -1182,19 +1235,14 @@ impl ABICall for AArch64ABICall {
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false) adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false)
} }
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>( fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Vec<Inst> {
&self,
ctx: &mut C,
idx: usize,
from_reg: Reg,
) -> Vec<Inst> {
match &self.sig.args[idx] { match &self.sig.args[idx] {
&ABIArg::Reg(reg, ty) => vec![Inst::gen_move( &ABIArg::Reg(reg, ty) => vec![Inst::gen_move(
Writable::from_reg(reg.to_reg()), Writable::from_reg(reg.to_reg()),
from_reg, from_reg,
ty, ty,
)], )],
&ABIArg::Stack(off, ty) => store_stack_sp(ctx, off, from_reg, ty), &ABIArg::Stack(off, ty) => vec![store_stack(MemArg::SPOffset(off), from_reg, ty)],
} }
} }

View File

@@ -112,7 +112,9 @@ pub enum MemLabel {
/// A memory argument to load/store, encapsulating the possible addressing modes. /// A memory argument to load/store, encapsulating the possible addressing modes.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum MemArg { pub enum MemArg {
Label(MemLabel), //
// Real ARM64 addressing modes:
//
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation. /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
PostIndexed(Writable<Reg>, SImm9), PostIndexed(Writable<Reg>, SImm9),
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation. /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
@@ -137,11 +139,31 @@ pub enum MemArg {
/// Scaled (by size of a type) unsigned 12-bit immediate offset from reg. /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
UnsignedOffset(Reg, UImm12Scaled), UnsignedOffset(Reg, UImm12Scaled),
/// Offset from the stack pointer. Lowered into a real amode at emission. //
// virtual addressing modes that are lowered at emission time:
//
/// Reference to a "label": e.g., a symbol.
Label(MemLabel),
/// Offset from the stack pointer.
SPOffset(i64), SPOffset(i64),
/// Offset from the frame pointer. Lowered into a real amode at emission. /// Offset from the frame pointer.
FPOffset(i64), FPOffset(i64),
/// Offset from the "nominal stack pointer", which is where the real SP is
/// just after stack and spill slots are allocated in the function prologue.
/// At emission time, this is converted to `SPOffset` with a fixup added to
/// the offset constant. The fixup is a running value that is tracked as
/// emission iterates through instructions in linear order, and can be
/// adjusted up and down with [Inst::VirtualSPOffsetAdj].
///
/// The standard ABI is in charge of handling this (by emitting the
/// adjustment meta-instructions). It maintains the invariant that "nominal
/// SP" is where the actual SP is after the function prologue and before
/// clobber pushes. See the diagram in the documentation for
/// [crate::isa::aarch64::abi](the ABI module) for more details.
NominalSPOffset(i64),
} }
impl MemArg { impl MemArg {
@@ -443,7 +465,7 @@ impl ShowWithRRU for MemArg {
simm9.show_rru(mb_rru) simm9.show_rru(mb_rru)
), ),
// Eliminated by `mem_finalize()`. // Eliminated by `mem_finalize()`.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { &MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => {
panic!("Unexpected stack-offset mem-arg mode!") panic!("Unexpected stack-offset mem-arg mode!")
} }
} }

View File

@@ -10,6 +10,7 @@ use regalloc::{Reg, RegClass, Writable};
use alloc::vec::Vec; use alloc::vec::Vec;
use core::convert::TryFrom; use core::convert::TryFrom;
use log::debug;
/// Memory label/reference finalization: convert a MemLabel to a PC-relative /// Memory label/reference finalization: convert a MemLabel to a PC-relative
/// offset, possibly emitting relocation(s) as necessary. /// offset, possibly emitting relocation(s) as necessary.
@@ -23,33 +24,44 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
/// generic arbitrary stack offset) into real addressing modes, possibly by /// generic arbitrary stack offset) into real addressing modes, possibly by
/// emitting some helper instructions that come immediately before the use /// emitting some helper instructions that come immediately before the use
/// of this amode. /// of this amode.
pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) { pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg, state: &EmitState) -> (Vec<Inst>, MemArg) {
match mem { match mem {
&MemArg::SPOffset(off) | &MemArg::FPOffset(off) => { &MemArg::SPOffset(off) | &MemArg::FPOffset(off) | &MemArg::NominalSPOffset(off) => {
let basereg = match mem { let basereg = match mem {
&MemArg::SPOffset(..) => stack_reg(), &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(),
&MemArg::FPOffset(..) => fp_reg(), &MemArg::FPOffset(..) => fp_reg(),
_ => unreachable!(), _ => unreachable!(),
}; };
let adj = match mem {
&MemArg::NominalSPOffset(..) => {
debug!(
"mem_finalize: nominal SP offset {} + adj {} -> {}",
off,
state.virtual_sp_offset,
off + state.virtual_sp_offset
);
state.virtual_sp_offset
}
_ => 0,
};
let off = off + adj;
if let Some(simm9) = SImm9::maybe_from_i64(off) { if let Some(simm9) = SImm9::maybe_from_i64(off) {
let mem = MemArg::Unscaled(basereg, simm9); let mem = MemArg::Unscaled(basereg, simm9);
(vec![], mem) (vec![], mem)
} else { } else {
// In an addition, x31 is the zero register, not sp; we have only one temporary
// so we can't do the proper add here.
debug_assert_ne!(
basereg,
stack_reg(),
"should have diverted SP before mem_finalize"
);
let tmp = writable_spilltmp_reg(); let tmp = writable_spilltmp_reg();
let mut const_insts = Inst::load_constant(tmp, off as u64); let mut const_insts = Inst::load_constant(tmp, off as u64);
let add_inst = Inst::AluRRR { // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
// (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
// is a valid base (for SPOffset) which we must handle here.
// Also, SP needs to be the first arg, not second.
let add_inst = Inst::AluRRRExtend {
alu_op: ALUOp::Add64, alu_op: ALUOp::Add64,
rd: tmp, rd: tmp,
rn: tmp.to_reg(), rn: basereg,
rm: basereg, rm: tmp.to_reg(),
extendop: ExtendOp::UXTX,
}; };
const_insts.push(add_inst); const_insts.push(add_inst);
(const_insts.to_vec(), MemArg::reg(tmp.to_reg())) (const_insts.to_vec(), MemArg::reg(tmp.to_reg()))
@@ -322,8 +334,16 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
} }
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
virtual_sp_offset: i64,
}
impl<O: MachSectionOutput> MachInstEmit<O> for Inst { impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
fn emit(&self, sink: &mut O, flags: &settings::Flags) { type State = EmitState;
fn emit(&self, sink: &mut O, flags: &settings::Flags, state: &mut EmitState) {
match self { match self {
&Inst::AluRRR { alu_op, rd, rn, rm } => { &Inst::AluRRR { alu_op, rd, rn, rm } => {
let top11 = match alu_op { let top11 = match alu_op {
@@ -596,10 +616,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ref mem, ref mem,
srcloc, srcloc,
} => { } => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem); let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
for inst in mem_insts.into_iter() { for inst in mem_insts.into_iter() {
inst.emit(sink, flags); inst.emit(sink, flags, state);
} }
// ldst encoding helpers take Reg, not Writable<Reg>. // ldst encoding helpers take Reg, not Writable<Reg>.
@@ -697,9 +717,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
} }
// Eliminated by `mem_finalize()` above. // Eliminated by `mem_finalize()` above.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { &MemArg::SPOffset(..)
panic!("Should not see stack-offset here!") | &MemArg::FPOffset(..)
} | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
} }
} }
@@ -739,10 +759,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ref mem, ref mem,
srcloc, srcloc,
} => { } => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem); let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
for inst in mem_insts.into_iter() { for inst in mem_insts.into_iter() {
inst.emit(sink, flags); inst.emit(sink, flags, state);
} }
let op = match self { let op = match self {
@@ -794,9 +814,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
} }
// Eliminated by `mem_finalize()` above. // Eliminated by `mem_finalize()` above.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { &MemArg::SPOffset(..)
panic!("Should not see stack-offset here!") | &MemArg::FPOffset(..)
} | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
} }
} }
@@ -980,11 +1000,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)), mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None, srcloc: None,
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
let inst = Inst::Jump { let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(8), dest: BranchTarget::ResolvedOffset(8),
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
sink.put4(const_data.to_bits()); sink.put4(const_data.to_bits());
} }
&Inst::LoadFpuConst64 { rd, const_data } => { &Inst::LoadFpuConst64 { rd, const_data } => {
@@ -993,11 +1013,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)), mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None, srcloc: None,
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
let inst = Inst::Jump { let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12), dest: BranchTarget::ResolvedOffset(12),
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
sink.put8(const_data.to_bits()); sink.put8(const_data.to_bits());
} }
&Inst::FpuCSel32 { rd, rn, rm, cond } => { &Inst::FpuCSel32 { rd, rn, rm, cond } => {
@@ -1084,7 +1104,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
if top22 != 0 { if top22 != 0 {
sink.put4(enc_extend(top22, rd, rn)); sink.put4(enc_extend(top22, rd, rn));
} else { } else {
Inst::mov32(rd, rn).emit(sink, flags); Inst::mov32(rd, rn).emit(sink, flags, state);
} }
} }
&Inst::Extend { &Inst::Extend {
@@ -1107,7 +1127,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
rn: zero_reg(), rn: zero_reg(),
rm: rd.to_reg(), rm: rd.to_reg(),
}; };
sub_inst.emit(sink, flags); sub_inst.emit(sink, flags, state);
} }
&Inst::Extend { &Inst::Extend {
rd, rd,
@@ -1248,13 +1268,13 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
// Save index in a tmp (the live range of ridx only goes to start of this // Save index in a tmp (the live range of ridx only goes to start of this
// sequence; rtmp1 or rtmp2 may overwrite it). // sequence; rtmp1 or rtmp2 may overwrite it).
let inst = Inst::gen_move(rtmp2, ridx, I64); let inst = Inst::gen_move(rtmp2, ridx, I64);
inst.emit(sink, flags); inst.emit(sink, flags, state);
// Load address of jump table // Load address of jump table
let inst = Inst::Adr { let inst = Inst::Adr {
rd: rtmp1, rd: rtmp1,
label: MemLabel::PCRel(16), label: MemLabel::PCRel(16),
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
// Load value out of jump table // Load value out of jump table
let inst = Inst::SLoad32 { let inst = Inst::SLoad32 {
rd: rtmp2, rd: rtmp2,
@@ -1266,7 +1286,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
), ),
srcloc: None, // can't cause a user trap. srcloc: None, // can't cause a user trap.
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
// Add base of jump table to jump-table-sourced block offset // Add base of jump table to jump-table-sourced block offset
let inst = Inst::AluRRR { let inst = Inst::AluRRR {
alu_op: ALUOp::Add64, alu_op: ALUOp::Add64,
@@ -1274,14 +1294,14 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
rn: rtmp1.to_reg(), rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(), rm: rtmp2.to_reg(),
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
// Branch to computed address. (`targets` here is only used for successor queries // Branch to computed address. (`targets` here is only used for successor queries
// and is not needed for emission.) // and is not needed for emission.)
let inst = Inst::IndirectBr { let inst = Inst::IndirectBr {
rn: rtmp1.to_reg(), rn: rtmp1.to_reg(),
targets: vec![], targets: vec![],
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
// Emit jump table (table of 32-bit offsets). // Emit jump table (table of 32-bit offsets).
for target in targets { for target in targets {
let off = target.as_offset_words() * 4; let off = target.as_offset_words() * 4;
@@ -1297,11 +1317,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)), mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None, // can't cause a user trap. srcloc: None, // can't cause a user trap.
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
let inst = Inst::Jump { let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12), dest: BranchTarget::ResolvedOffset(12),
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
sink.put8(const_data); sink.put8(const_data);
} }
&Inst::LoadExtName { &Inst::LoadExtName {
@@ -1315,11 +1335,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)), mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None, // can't cause a user trap. srcloc: None, // can't cause a user trap.
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
let inst = Inst::Jump { let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12), dest: BranchTarget::ResolvedOffset(12),
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
sink.add_reloc(srcloc, Reloc::Abs8, name, offset); sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
if flags.emit_all_ones_funcaddrs() { if flags.emit_all_ones_funcaddrs() {
sink.put8(u64::max_value()); sink.put8(u64::max_value());
@@ -1327,52 +1347,81 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put8(0); sink.put8(0);
} }
} }
&Inst::LoadAddr { rd, ref mem } => match *mem { &Inst::LoadAddr { rd, ref mem } => {
MemArg::FPOffset(fp_off) => { let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
let alu_op = if fp_off < 0 { for inst in mem_insts.into_iter() {
ALUOp::Sub64 inst.emit(sink, flags, state);
} else {
ALUOp::Add64
};
if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap())
{
let inst = Inst::AluRRImm12 {
alu_op,
rd,
imm12,
rn: fp_reg(),
};
inst.emit(sink, flags);
} else {
let const_insts =
Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap());
for inst in const_insts {
inst.emit(sink, flags);
}
let inst = Inst::AluRRR {
alu_op,
rd,
rn: fp_reg(),
rm: rd.to_reg(),
};
inst.emit(sink, flags);
}
} }
_ => unimplemented!("{:?}", mem),
}, let (reg, offset) = match mem {
MemArg::Unscaled(r, simm9) => (r, simm9.value()),
MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
};
let abs_offset = if offset < 0 {
-offset as u64
} else {
offset as u64
};
let alu_op = if offset < 0 {
ALUOp::Sub64
} else {
ALUOp::Add64
};
if offset == 0 {
let mov = Inst::mov(rd, reg);
mov.emit(sink, flags, state);
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
let add = Inst::AluRRImm12 {
alu_op,
rd,
rn: reg,
imm12,
};
add.emit(sink, flags, state);
} else {
// Use `tmp2` here: `reg` may be `spilltmp` if the `MemArg` on this instruction
// was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
// that no other instructions will be inserted here (we're emitting directly),
// and a live range of `tmp2` should not span this instruction, so this use
// should otherwise be correct.
debug_assert!(rd.to_reg() != tmp2_reg());
debug_assert!(reg != tmp2_reg());
let tmp = writable_tmp2_reg();
for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
insn.emit(sink, flags, state);
}
let add = Inst::AluRRR {
alu_op,
rd,
rn: reg,
rm: tmp.to_reg(),
};
add.emit(sink, flags, state);
}
}
&Inst::GetPinnedReg { rd } => { &Inst::GetPinnedReg { rd } => {
let inst = Inst::Mov { let inst = Inst::Mov {
rd, rd,
rm: xreg(PINNED_REG), rm: xreg(PINNED_REG),
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
} }
&Inst::SetPinnedReg { rm } => { &Inst::SetPinnedReg { rm } => {
let inst = Inst::Mov { let inst = Inst::Mov {
rd: Writable::from_reg(xreg(PINNED_REG)), rd: Writable::from_reg(xreg(PINNED_REG)),
rm, rm,
}; };
inst.emit(sink, flags); inst.emit(sink, flags, state);
}
&Inst::VirtualSPOffsetAdj { offset } => {
debug!(
"virtual sp offset adjusted by {} -> {}",
offset,
state.virtual_sp_offset + offset
);
state.virtual_sp_offset += offset;
} }
} }
} }

View File

@@ -1313,8 +1313,8 @@ fn test_aarch64_binemit() {
mem: MemArg::FPOffset(32768), mem: MemArg::FPOffset(32768),
srcloc: None, srcloc: None,
}, },
"0F0090D2EF011D8BE10140F9", "100090D2B063308B010240F9",
"movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]", "movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
)); ));
insns.push(( insns.push((
Inst::ULoad64 { Inst::ULoad64 {
@@ -1322,8 +1322,8 @@ fn test_aarch64_binemit() {
mem: MemArg::FPOffset(-32768), mem: MemArg::FPOffset(-32768),
srcloc: None, srcloc: None,
}, },
"EFFF8F92EF011D8BE10140F9", "F0FF8F92B063308B010240F9",
"movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]", "movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
)); ));
insns.push(( insns.push((
Inst::ULoad64 { Inst::ULoad64 {
@@ -1331,8 +1331,8 @@ fn test_aarch64_binemit() {
mem: MemArg::FPOffset(1048576), // 2^20 mem: MemArg::FPOffset(1048576), // 2^20
srcloc: None, srcloc: None,
}, },
"0F02A0D2EF011D8BE10140F9", "1002A0D2B063308B010240F9",
"movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]", "movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
)); ));
insns.push(( insns.push((
Inst::ULoad64 { Inst::ULoad64 {
@@ -1340,8 +1340,8 @@ fn test_aarch64_binemit() {
mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1 mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1
srcloc: None, srcloc: None,
}, },
"2F0080D20F02A0F2EF011D8BE10140F9", "300080D21002A0F2B063308B010240F9",
"movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]", "movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
)); ));
insns.push(( insns.push((
@@ -2794,7 +2794,7 @@ fn test_aarch64_binemit() {
// Check the encoding is as expected. // Check the encoding is as expected.
let text_size = { let text_size = {
let mut code_sec = MachSectionSize::new(0); let mut code_sec = MachSectionSize::new(0);
insn.emit(&mut code_sec, &flags); insn.emit(&mut code_sec, &flags, &mut Default::default());
code_sec.size() code_sec.size()
}; };
@@ -2802,7 +2802,7 @@ fn test_aarch64_binemit() {
let mut sections = MachSections::new(); let mut sections = MachSections::new();
let code_idx = sections.add_section(0, text_size); let code_idx = sections.add_section(0, text_size);
let code_sec = sections.get_section(code_idx); let code_sec = sections.get_section(code_idx);
insn.emit(code_sec, &flags); insn.emit(code_sec, &flags, &mut Default::default());
sections.emit(&mut sink); sections.emit(&mut sink);
let actual_encoding = &sink.stringify(); let actual_encoding = &sink.stringify();
assert_eq!(expected_encoding, actual_encoding); assert_eq!(expected_encoding, actual_encoding);

View File

@@ -134,6 +134,11 @@ impl SImm9 {
pub fn bits(&self) -> u32 { pub fn bits(&self) -> u32 {
(self.value as u32) & 0x1ff (self.value as u32) & 0x1ff
} }
/// Signed value of immediate.
pub fn value(&self) -> i32 {
self.value as i32
}
} }
/// An unsigned, scaled 12-bit offset. /// An unsigned, scaled 12-bit offset.
@@ -172,6 +177,11 @@ impl UImm12Scaled {
pub fn bits(&self) -> u32 { pub fn bits(&self) -> u32 {
(self.value as u32 / self.scale_ty.bytes()) & 0xfff (self.value as u32 / self.scale_ty.bytes()) & 0xfff
} }
/// Value after scaling.
pub fn value(&self) -> u32 {
self.value as u32 * self.scale_ty.bytes()
}
} }
/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted /// A shifted immediate value in 'imm12' format: supports 12 bits, shifted

View File

@@ -13,7 +13,6 @@ use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use regalloc::{RegUsageCollector, RegUsageMapper, Set}; use regalloc::{RegUsageCollector, RegUsageMapper, Set};
use alloc::vec::Vec; use alloc::vec::Vec;
use core::convert::TryFrom;
use smallvec::{smallvec, SmallVec}; use smallvec::{smallvec, SmallVec};
use std::string::{String, ToString}; use std::string::{String, ToString};
@@ -741,6 +740,12 @@ pub enum Inst {
SetPinnedReg { SetPinnedReg {
rm: Reg, rm: Reg,
}, },
/// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
/// controls MemArg::NominalSPOffset args are lowered.
VirtualSPOffsetAdj {
offset: i64,
},
} }
fn count_zero_half_words(mut value: u64) -> usize { fn count_zero_half_words(mut value: u64) -> usize {
@@ -876,7 +881,7 @@ fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) {
&MemArg::FPOffset(..) => { &MemArg::FPOffset(..) => {
collector.add_use(fp_reg()); collector.add_use(fp_reg());
} }
&MemArg::SPOffset(..) => { &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => {
collector.add_use(stack_reg()); collector.add_use(stack_reg());
} }
} }
@@ -1135,6 +1140,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
&Inst::SetPinnedReg { rm } => { &Inst::SetPinnedReg { rm } => {
collector.add_use(rm); collector.add_use(rm);
} }
&Inst::VirtualSPOffsetAdj { .. } => {}
} }
} }
@@ -1186,7 +1192,9 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
&mut MemArg::Label(..) => {} &mut MemArg::Label(..) => {}
&mut MemArg::PreIndexed(ref mut r, ..) => map_mod(m, r), &mut MemArg::PreIndexed(ref mut r, ..) => map_mod(m, r),
&mut MemArg::PostIndexed(ref mut r, ..) => map_mod(m, r), &mut MemArg::PostIndexed(ref mut r, ..) => map_mod(m, r),
&mut MemArg::FPOffset(..) | &mut MemArg::SPOffset(..) => {} &mut MemArg::FPOffset(..)
| &mut MemArg::SPOffset(..)
| &mut MemArg::NominalSPOffset(..) => {}
}; };
} }
@@ -1706,6 +1714,7 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
&mut Inst::SetPinnedReg { ref mut rm } => { &mut Inst::SetPinnedReg { ref mut rm } => {
map_use(mapper, rm); map_use(mapper, rm);
} }
&mut Inst::VirtualSPOffsetAdj { .. } => {}
} }
} }
@@ -1904,7 +1913,7 @@ impl MachInst for Inst {
// Pretty-printing of instructions. // Pretty-printing of instructions.
fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (String, MemArg) { fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (String, MemArg) {
let (mem_insts, mem) = mem_finalize(0, mem); let (mem_insts, mem) = mem_finalize(0, mem, &mut Default::default());
let mut mem_str = mem_insts let mut mem_str = mem_insts
.into_iter() .into_iter()
.map(|inst| inst.show_rru(mb_rru)) .map(|inst| inst.show_rru(mb_rru))
@@ -2618,42 +2627,58 @@ impl ShowWithRRU for Inst {
let rd = rd.show_rru(mb_rru); let rd = rd.show_rru(mb_rru);
format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset) format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
} }
&Inst::LoadAddr { rd, ref mem } => match *mem { &Inst::LoadAddr { rd, ref mem } => {
MemArg::FPOffset(fp_off) => { // TODO: we really should find a better way to avoid duplication of
let alu_op = if fp_off < 0 { // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
ALUOp::Sub64 // expansion stage (i.e., legalization, but without the slow edit-in-place
} else { // of the existing legalization framework).
ALUOp::Add64 let (mem_insts, mem) = mem_finalize(0, mem, &EmitState::default());
}; let mut ret = String::new();
if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap()) for inst in mem_insts.into_iter() {
{ ret.push_str(&inst.show_rru(mb_rru));
let inst = Inst::AluRRImm12 {
alu_op,
rd,
imm12,
rn: fp_reg(),
};
inst.show_rru(mb_rru)
} else {
let mut res = String::new();
let const_insts =
Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap());
for inst in const_insts {
res.push_str(&inst.show_rru(mb_rru));
res.push_str("; ");
}
let inst = Inst::AluRRR {
alu_op,
rd,
rn: fp_reg(),
rm: rd.to_reg(),
};
res.push_str(&inst.show_rru(mb_rru));
res
}
} }
_ => unimplemented!("{:?}", mem), let (reg, offset) = match mem {
}, MemArg::Unscaled(r, simm9) => (r, simm9.value()),
MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
};
let abs_offset = if offset < 0 {
-offset as u64
} else {
offset as u64
};
let alu_op = if offset < 0 {
ALUOp::Sub64
} else {
ALUOp::Add64
};
if offset == 0 {
let mov = Inst::mov(rd, reg);
ret.push_str(&mov.show_rru(mb_rru));
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
let add = Inst::AluRRImm12 {
alu_op,
rd,
rn: reg,
imm12,
};
ret.push_str(&add.show_rru(mb_rru));
} else {
let tmp = writable_spilltmp_reg();
for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
ret.push_str(&inst.show_rru(mb_rru));
}
let add = Inst::AluRRR {
alu_op,
rd,
rn: reg,
rm: tmp.to_reg(),
};
ret.push_str(&add.show_rru(mb_rru));
}
ret
}
&Inst::GetPinnedReg { rd } => { &Inst::GetPinnedReg { rd } => {
let rd = rd.show_rru(mb_rru); let rd = rd.show_rru(mb_rru);
format!("get_pinned_reg {}", rd) format!("get_pinned_reg {}", rd)
@@ -2662,6 +2687,7 @@ impl ShowWithRRU for Inst {
let rm = rm.show_rru(mb_rru); let rm = rm.show_rru(mb_rru);
format!("set_pinned_reg {}", rm) format!("set_pinned_reg {}", rm)
} }
&Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
} }
} }
} }

View File

@@ -20,23 +20,21 @@ pub const PINNED_REG: u8 = 21;
const XREG_INDICES: [u8; 31] = [ const XREG_INDICES: [u8; 31] = [
// X0 - X7 // X0 - X7
32, 33, 34, 35, 36, 37, 38, 39, 32, 33, 34, 35, 36, 37, 38, 39,
// X8 - X14 // X8 - X15
40, 41, 42, 43, 44, 45, 46, 40, 41, 42, 43, 44, 45, 46, 47,
// X15
59,
// X16, X17 // X16, X17
47, 48, 58, 59,
// X18 // X18
60, 60,
// X19, X20 // X19, X20
49, 50, 48, 49,
// X21, put aside because it's the pinned register. // X21, put aside because it's the pinned register.
58, 57,
// X22 - X28 // X22 - X28
51, 52, 53, 54, 55, 56, 57, 50, 51, 52, 53, 54, 55, 56,
// X29 // X29 (FP)
61, 61,
// X30 // X30 (LR)
62, 62,
]; ];
@@ -125,14 +123,17 @@ pub fn writable_fp_reg() -> Writable<Reg> {
Writable::from_reg(fp_reg()) Writable::from_reg(fp_reg())
} }
/// Get a reference to the "spill temp" register. This register is used to /// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
/// compute the address of a spill slot when a direct offset addressing mode from /// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc /// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
/// and reserve it for this purpose for simplicity; otherwise we need a /// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
/// multi-stage analysis where we first determine how many spill slots we have, /// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
/// then perhaps remove the reg from the pool and recompute regalloc. ///
/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
/// to live through call instructions.
pub fn spilltmp_reg() -> Reg { pub fn spilltmp_reg() -> Reg {
xreg(15) xreg(16)
} }
/// Get a writable reference to the spilltmp reg. /// Get a writable reference to the spilltmp reg.
@@ -140,6 +141,20 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
Writable::from_reg(spilltmp_reg()) Writable::from_reg(spilltmp_reg())
} }
/// Get a reference to the second temp register. We need this in some edge cases
/// where we need both the spilltmp and another temporary.
///
/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
/// free to use otherwise.
pub fn tmp2_reg() -> Reg {
xreg(17)
}
/// Get a writable reference to the tmp2 reg.
pub fn writable_tmp2_reg() -> Writable<Reg> {
Writable::from_reg(tmp2_reg())
}
/// Create the register universe for AArch64. /// Create the register universe for AArch64.
pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse { pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
let mut regs = vec![]; let mut regs = vec![];
@@ -173,7 +188,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
for i in 0u8..32u8 { for i in 0u8..32u8 {
// See above for excluded registers. // See above for excluded registers.
if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG { if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
continue; continue;
} }
let reg = Reg::new_real( let reg = Reg::new_real(
@@ -211,7 +226,8 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
regs.len() regs.len()
}; };
regs.push((xreg(15).to_real_reg(), "x15".to_string())); regs.push((xreg(16).to_real_reg(), "x16".to_string()));
regs.push((xreg(17).to_real_reg(), "x17".to_string()));
regs.push((xreg(18).to_real_reg(), "x18".to_string())); regs.push((xreg(18).to_real_reg(), "x18".to_string()));
regs.push((fp_reg().to_real_reg(), "fp".to_string())); regs.push((fp_reg().to_real_reg(), "fp".to_string()));
regs.push((link_reg().to_real_reg(), "lr".to_string())); regs.push((link_reg().to_real_reg(), "lr".to_string()));

View File

@@ -1291,7 +1291,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
assert!(inputs.len() == abi.num_args()); assert!(inputs.len() == abi.num_args());
for (i, input) in inputs.iter().enumerate() { for (i, input) in inputs.iter().enumerate() {
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None); let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
for inst in abi.gen_copy_reg_to_arg(ctx, i, arg_reg) { for inst in abi.gen_copy_reg_to_arg(i, arg_reg) {
ctx.emit(inst); ctx.emit(inst);
} }
} }

View File

@@ -2183,7 +2183,7 @@ fn test_x64_emit() {
// Check the encoding is as expected. // Check the encoding is as expected.
let text_size = { let text_size = {
let mut code_sec = MachSectionSize::new(0); let mut code_sec = MachSectionSize::new(0);
insn.emit(&mut code_sec, &flags); insn.emit(&mut code_sec, &flags, &mut Default::default());
code_sec.size() code_sec.size()
}; };
@@ -2191,7 +2191,7 @@ fn test_x64_emit() {
let mut sections = MachSections::new(); let mut sections = MachSections::new();
let code_idx = sections.add_section(0, text_size); let code_idx = sections.add_section(0, text_size);
let code_sec = sections.get_section(code_idx); let code_sec = sections.get_section(code_idx);
insn.emit(code_sec, &flags); insn.emit(code_sec, &flags, &mut Default::default());
sections.emit(&mut sink); sections.emit(&mut sink);
let actual_encoding = &sink.stringify(); let actual_encoding = &sink.stringify();
assert_eq!(expected_encoding, actual_encoding); assert_eq!(expected_encoding, actual_encoding);

View File

@@ -950,7 +950,9 @@ impl MachInst for Inst {
} }
impl<O: MachSectionOutput> MachInstEmit<O> for Inst { impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
fn emit(&self, sink: &mut O, _flags: &settings::Flags) { type State = ();
fn emit(&self, sink: &mut O, _flags: &settings::Flags, _: &mut Self::State) {
emit::emit(self, sink); emit::emit(self, sink);
} }
} }

View File

@@ -98,7 +98,10 @@ pub trait ABIBody {
fn gen_epilogue(&self) -> Vec<Self::I>; fn gen_epilogue(&self) -> Vec<Self::I>;
/// Returns the full frame size for the given function, after prologue emission has run. This /// Returns the full frame size for the given function, after prologue emission has run. This
/// comprises the spill space, incoming argument space, alignment padding, etc. /// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
/// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
/// This is used for ABI variants where the client generates prologue/epilogue code, as in
/// Baldrdash (SpiderMonkey integration).
fn frame_size(&self) -> u32; fn frame_size(&self) -> u32;
/// Get the spill-slot size. /// Get the spill-slot size.
@@ -133,12 +136,7 @@ pub trait ABICall {
fn num_args(&self) -> usize; fn num_args(&self) -> usize;
/// Copy an argument value from a source register, prior to the call. /// Copy an argument value from a source register, prior to the call.
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>( fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Vec<Self::I>;
&self,
ctx: &mut C,
idx: usize,
from_reg: Reg,
) -> Vec<Self::I>;
/// Copy a return value into a destination register, after the call returns. /// Copy a return value into a destination register, after the call returns.
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I; fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;

View File

@@ -214,8 +214,10 @@ pub enum MachTerminator<'a> {
/// A trait describing the ability to encode a MachInst into binary machine code. /// A trait describing the ability to encode a MachInst into binary machine code.
pub trait MachInstEmit<O: MachSectionOutput> { pub trait MachInstEmit<O: MachSectionOutput> {
/// Persistent state carried across `emit` invocations.
type State: Default + Clone + Debug;
/// Emit the instruction. /// Emit the instruction.
fn emit(&self, code: &mut O, flags: &Flags); fn emit(&self, code: &mut O, flags: &Flags, state: &mut Self::State);
} }
/// The result of a `MachBackend::compile_function()` call. Contains machine /// The result of a `MachBackend::compile_function()` call. Contains machine

View File

@@ -526,12 +526,13 @@ impl<I: VCodeInst> VCode<I> {
// Compute block offsets. // Compute block offsets.
let mut code_section = MachSectionSize::new(0); let mut code_section = MachSectionSize::new(0);
let mut block_offsets = vec![0; self.num_blocks()]; let mut block_offsets = vec![0; self.num_blocks()];
let mut state = Default::default();
for &block in &self.final_block_order { for &block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset); code_section.offset = I::align_basic_block(code_section.offset);
block_offsets[block as usize] = code_section.offset; block_offsets[block as usize] = code_section.offset;
let (start, end) = self.block_ranges[block as usize]; let (start, end) = self.block_ranges[block as usize];
for iix in start..end { for iix in start..end {
self.insts[iix as usize].emit(&mut code_section, flags); self.insts[iix as usize].emit(&mut code_section, flags, &mut state);
} }
} }
@@ -544,13 +545,14 @@ impl<I: VCodeInst> VCode<I> {
// it (so forward references are now possible), and (ii) mutates the // it (so forward references are now possible), and (ii) mutates the
// instructions. // instructions.
let mut code_section = MachSectionSize::new(0); let mut code_section = MachSectionSize::new(0);
let mut state = Default::default();
for &block in &self.final_block_order { for &block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset); code_section.offset = I::align_basic_block(code_section.offset);
let (start, end) = self.block_ranges[block as usize]; let (start, end) = self.block_ranges[block as usize];
for iix in start..end { for iix in start..end {
self.insts[iix as usize] self.insts[iix as usize]
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]); .with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
self.insts[iix as usize].emit(&mut code_section, flags); self.insts[iix as usize].emit(&mut code_section, flags, &mut state);
} }
} }
} }
@@ -563,6 +565,7 @@ impl<I: VCodeInst> VCode<I> {
let mut sections = MachSections::new(); let mut sections = MachSections::new();
let code_idx = sections.add_section(0, self.code_size); let code_idx = sections.add_section(0, self.code_size);
let code_section = sections.get_section(code_idx); let code_section = sections.get_section(code_idx);
let mut state = Default::default();
let flags = self.abi.flags(); let flags = self.abi.flags();
let mut cur_srcloc = None; let mut cur_srcloc = None;
@@ -571,7 +574,7 @@ impl<I: VCodeInst> VCode<I> {
while new_offset > code_section.cur_offset_from_start() { while new_offset > code_section.cur_offset_from_start() {
// Pad with NOPs up to the aligned block offset. // Pad with NOPs up to the aligned block offset.
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize); let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
nop.emit(code_section, flags); nop.emit(code_section, flags, &mut Default::default());
} }
assert_eq!(code_section.cur_offset_from_start(), new_offset); assert_eq!(code_section.cur_offset_from_start(), new_offset);
@@ -586,7 +589,7 @@ impl<I: VCodeInst> VCode<I> {
cur_srcloc = Some(srcloc); cur_srcloc = Some(srcloc);
} }
self.insts[iix as usize].emit(code_section, flags); self.insts[iix as usize].emit(code_section, flags, &mut state);
} }
if cur_srcloc.is_some() { if cur_srcloc.is_some() {

View File

@@ -11,8 +11,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x15, 8 ; b 12 ; data ; nextln: ldr x16, 8 ; b 12 ; data
; nextln: blr x15 ; nextln: blr x16
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret

View File

@@ -45,8 +45,8 @@ block0(v0: i64):
; nextln: subs xzr, sp, x0 ; nextln: subs xzr, sp, x0
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: ldr x15 ; nextln: ldr x16
; nextln: blr x15 ; nextln: blr x16
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -64,13 +64,13 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x15, [x0] ; nextln: ldr x16, [x0]
; nextln: ldr x15, [x15, #4] ; nextln: ldr x16, [x16, #4]
; nextln: subs xzr, sp, x15 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: ldr x15 ; nextln: ldr x16
; nextln: blr x15 ; nextln: blr x16
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -84,8 +84,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: add x15, x0, #176 ; nextln: add x16, x0, #176
; nextln: subs xzr, sp, x15 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: sub sp, sp, #176 ; nextln: sub sp, sp, #176
@@ -104,14 +104,14 @@ block0(v0: i64):
; nextln: subs xzr, sp, x0 ; nextln: subs xzr, sp, x0
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: movz x16, #6784 ; nextln: movz x17, #6784
; nextln: movk x16, #6, LSL #16 ; nextln: movk x17, #6, LSL #16
; nextln: add x15, x0, x16, UXTX ; nextln: add x16, x0, x17, UXTX
; nextln: subs xzr, sp, x15 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: ldr x15, 8 ; b 12 ; data 400000 ; nextln: ldr x16, 8 ; b 12 ; data 400000
; nextln: sub sp, sp, x15, UXTX ; nextln: sub sp, sp, x16, UXTX
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -128,10 +128,10 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x15, [x0] ; nextln: ldr x16, [x0]
; nextln: ldr x15, [x15, #4] ; nextln: ldr x16, [x16, #4]
; nextln: add x15, x15, #32 ; nextln: add x16, x16, #32
; nextln: subs xzr, sp, x15 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: sub sp, sp, #32 ; nextln: sub sp, sp, #32
@@ -151,19 +151,19 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x15, [x0] ; nextln: ldr x16, [x0]
; nextln: ldr x15, [x15, #4] ; nextln: ldr x16, [x16, #4]
; nextln: subs xzr, sp, x15 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: movz x16, #6784 ; nextln: movz x17, #6784
; nextln: movk x16, #6, LSL #16 ; nextln: movk x17, #6, LSL #16
; nextln: add x15, x15, x16, UXTX ; nextln: add x16, x16, x17, UXTX
; nextln: subs xzr, sp, x15 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: ldr x15, 8 ; b 12 ; data 400000 ; nextln: ldr x16, 8 ; b 12 ; data 400000
; nextln: sub sp, sp, x15, UXTX ; nextln: sub sp, sp, x16, UXTX
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -179,11 +179,11 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: movz x15, #6784 ; nextln: movz x16, #6784
; nextln: movk x15, #6, LSL #16 ; nextln: movk x16, #6, LSL #16
; nextln: ldr x15, [x0, x15] ; nextln: ldr x16, [x0, x16]
; nextln: add x15, x15, #32 ; nextln: add x16, x16, #32
; nextln: subs xzr, sp, x15 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
; nextln: sub sp, sp, #32 ; nextln: sub sp, sp, #32

View File

@@ -12,7 +12,7 @@ block0:
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: sub sp, sp, #16 ; nextln: sub sp, sp, #16
; nextln: sub x0, fp, #8 ; nextln: mov x0, sp
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -29,9 +29,9 @@ block0:
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x15, 8 ; b 12 ; data 100016 ; nextln: ldr x16, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x15, UXTX ; nextln: sub sp, sp, x16, UXTX
; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0 ; nextln: mov x0, sp
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -50,7 +50,7 @@ block0:
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: sub sp, sp, #16 ; nextln: sub sp, sp, #16
; nextln: sub x0, fp, #8 ; nextln: mov x0, sp
; nextln: ldur x0, [x0] ; nextln: ldur x0, [x0]
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
@@ -68,9 +68,9 @@ block0:
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x15, 8 ; b 12 ; data 100016 ; nextln: ldr x16, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x15, UXTX ; nextln: sub sp, sp, x16, UXTX
; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0 ; nextln: mov x0, sp
; nextln: ldur x0, [x0] ; nextln: ldur x0, [x0]
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
@@ -88,7 +88,7 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: sub sp, sp, #16 ; nextln: sub sp, sp, #16
; nextln: sub x1, fp, #8 ; nextln: mov x1, sp
; nextln: stur x0, [x1] ; nextln: stur x0, [x1]
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
@@ -106,9 +106,9 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x15, 8 ; b 12 ; data 100016 ; nextln: ldr x16, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x15, UXTX ; nextln: sub sp, sp, x16, UXTX
; nextln: movz x1, #34472; movk x1, #1, LSL #16; sub x1, fp, x1 ; nextln: mov x1, sp
; nextln: stur x0, [x1] ; nextln: stur x0, [x1]
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16