Merge pull request #2142 from cfallin/machinst-abi-x64

x64 new backend: port ABI implementation to shared infrastructure with AArch64.
This commit is contained in:
Chris Fallin
2020-09-08 18:35:02 -07:00
committed by GitHub
17 changed files with 747 additions and 1192 deletions

View File

@@ -13,16 +13,15 @@ use alloc::boxed::Box;
use alloc::vec::Vec; use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable}; use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec; use smallvec::SmallVec;
use std::convert::TryFrom;
// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
// these ABIs are very similar. // these ABIs are very similar.
/// Support for the AArch64 ABI from the callee side (within a function body). /// Support for the AArch64 ABI from the callee side (within a function body).
pub type AArch64ABIBody = ABIBodyImpl<AArch64MachineImpl>; pub(crate) type AArch64ABICallee = ABICalleeImpl<AArch64MachineDeps>;
/// Support for the AArch64 ABI from the caller side (at a callsite). /// Support for the AArch64 ABI from the caller side (at a callsite).
pub type AArch64ABICall = ABICallImpl<AArch64MachineImpl>; pub(crate) type AArch64ABICaller = ABICallerImpl<AArch64MachineDeps>;
// Spidermonkey specific ABI convention. // Spidermonkey specific ABI convention.
@@ -105,9 +104,9 @@ impl Into<AMode> for StackAMode {
/// AArch64-specific ABI behavior. This struct just serves as an implementation /// AArch64-specific ABI behavior. This struct just serves as an implementation
/// point for the trait; it is never actually instantiated. /// point for the trait; it is never actually instantiated.
pub struct AArch64MachineImpl; pub(crate) struct AArch64MachineDeps;
impl ABIMachineImpl for AArch64MachineImpl { impl ABIMachineSpec for AArch64MachineDeps {
type I = Inst; type I = Inst;
fn compute_arg_locs( fn compute_arg_locs(
@@ -285,7 +284,8 @@ impl ABIMachineImpl for AArch64MachineImpl {
Inst::Ret Inst::Ret
} }
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u64) -> SmallVec<[Inst; 4]> { fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
let imm = imm as u64;
let mut insts = SmallVec::new(); let mut insts = SmallVec::new();
if let Some(imm12) = Imm12::maybe_from_u64(imm) { if let Some(imm12) = Imm12::maybe_from_u64(imm) {
insts.push(Inst::AluRRImm12 { insts.push(Inst::AluRRImm12 {
@@ -296,6 +296,7 @@ impl ABIMachineImpl for AArch64MachineImpl {
}); });
} else { } else {
let scratch2 = writable_tmp2_reg(); let scratch2 = writable_tmp2_reg();
assert_ne!(scratch2.to_reg(), from_reg);
insts.extend(Inst::load_constant(scratch2, imm.into())); insts.extend(Inst::load_constant(scratch2, imm.into()));
insts.push(Inst::AluRRRExtend { insts.push(Inst::AluRRRExtend {
alu_op: ALUOp::Add64, alu_op: ALUOp::Add64,
@@ -334,29 +335,29 @@ impl ABIMachineImpl for AArch64MachineImpl {
Inst::LoadAddr { rd: into_reg, mem } Inst::LoadAddr { rd: into_reg, mem }
} }
fn get_fixed_tmp_reg() -> Reg { fn get_stacklimit_reg() -> Reg {
spilltmp_reg() spilltmp_reg()
} }
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i64, ty: Type) -> Inst { fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
let mem = AMode::RegOffset(base, offset, ty); let mem = AMode::RegOffset(base, offset as i64, ty);
Inst::gen_load(into_reg, mem, ty) Inst::gen_load(into_reg, mem, ty)
} }
fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Inst { fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
let mem = AMode::RegOffset(base, offset, ty); let mem = AMode::RegOffset(base, offset as i64, ty);
Inst::gen_store(mem, from_reg, ty) Inst::gen_store(mem, from_reg, ty)
} }
fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Inst; 2]> { fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
if amount == 0 { if amount == 0 {
return SmallVec::new(); return SmallVec::new();
} }
let (amount, is_sub) = if amount > 0 { let (amount, is_sub) = if amount > 0 {
(u64::try_from(amount).unwrap(), false) (amount as u64, false)
} else { } else {
(u64::try_from(-amount).unwrap(), true) (-amount as u64, true)
}; };
let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
@@ -389,8 +390,10 @@ impl ABIMachineImpl for AArch64MachineImpl {
ret ret
} }
fn gen_nominal_sp_adj(offset: i64) -> Inst { fn gen_nominal_sp_adj(offset: i32) -> Inst {
Inst::VirtualSPOffsetAdj { offset } Inst::VirtualSPOffsetAdj {
offset: offset as i64,
}
} }
fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> { fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
@@ -553,11 +556,12 @@ impl ABIMachineImpl for AArch64MachineImpl {
defs: Vec<Writable<Reg>>, defs: Vec<Writable<Reg>>,
loc: SourceLoc, loc: SourceLoc,
opcode: ir::Opcode, opcode: ir::Opcode,
) -> SmallVec<[(/* is_safepoint = */ bool, Inst); 2]> { tmp: Writable<Reg>,
) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
let mut insts = SmallVec::new(); let mut insts = SmallVec::new();
match &dest { match &dest {
&CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(( &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
true, InstIsSafepoint::Yes,
Inst::Call { Inst::Call {
info: Box::new(CallInfo { info: Box::new(CallInfo {
dest: name.clone(), dest: name.clone(),
@@ -570,19 +574,19 @@ impl ABIMachineImpl for AArch64MachineImpl {
)), )),
&CallDest::ExtName(ref name, RelocDistance::Far) => { &CallDest::ExtName(ref name, RelocDistance::Far) => {
insts.push(( insts.push((
false, InstIsSafepoint::No,
Inst::LoadExtName { Inst::LoadExtName {
rd: writable_spilltmp_reg(), rd: tmp,
name: Box::new(name.clone()), name: Box::new(name.clone()),
offset: 0, offset: 0,
srcloc: loc, srcloc: loc,
}, },
)); ));
insts.push(( insts.push((
true, InstIsSafepoint::Yes,
Inst::CallInd { Inst::CallInd {
info: Box::new(CallIndInfo { info: Box::new(CallIndInfo {
rn: spilltmp_reg(), rn: tmp.to_reg(),
uses, uses,
defs, defs,
loc, loc,
@@ -592,7 +596,7 @@ impl ABIMachineImpl for AArch64MachineImpl {
)); ));
} }
&CallDest::Reg(reg) => insts.push(( &CallDest::Reg(reg) => insts.push((
true, InstIsSafepoint::Yes,
Inst::CallInd { Inst::CallInd {
info: Box::new(CallIndInfo { info: Box::new(CallIndInfo {
rn: *reg, rn: *reg,
@@ -608,7 +612,7 @@ impl ABIMachineImpl for AArch64MachineImpl {
insts insts
} }
fn get_spillslot_size(rc: RegClass, ty: Type) -> u32 { fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots. // We allocate in terms of 8-byte slots.
match (rc, ty) { match (rc, ty) {
(RegClass::I64, _) => 1, (RegClass::I64, _) => 1,
@@ -698,9 +702,10 @@ fn get_callee_saves(
} }
} }
} }
// Sort registers for deterministic code output. // Sort registers for deterministic code output. We can do an unstable sort because the
int_saves.sort_by_key(|r| r.to_reg().get_index()); // registers will be unique (there are no dups).
vec_saves.sort_by_key(|r| r.to_reg().get_index()); int_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
(int_saves, vec_saves) (int_saves, vec_saves)
} }

View File

@@ -440,7 +440,7 @@ pub struct EmitState {
} }
impl MachInstEmitState<Inst> for EmitState { impl MachInstEmitState<Inst> for EmitState {
fn new(abi: &dyn ABIBody<I = Inst>) -> Self { fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
EmitState { EmitState {
virtual_sp_offset: 0, virtual_sp_offset: 0,
nominal_sp_to_fp: abi.frame_size() as i64, nominal_sp_to_fp: abi.frame_size() as i64,

View File

@@ -1837,7 +1837,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert!(inputs.len() == sig.params.len()); assert!(inputs.len() == sig.params.len());
assert!(outputs.len() == sig.returns.len()); assert!(outputs.len() == sig.returns.len());
( (
AArch64ABICall::from_func(sig, &extname, dist, loc)?, AArch64ABICaller::from_func(sig, &extname, dist, loc)?,
&inputs[..], &inputs[..],
) )
} }
@@ -1846,7 +1846,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let sig = ctx.call_sig(insn).unwrap(); let sig = ctx.call_sig(insn).unwrap();
assert!(inputs.len() - 1 == sig.params.len()); assert!(inputs.len() - 1 == sig.params.len());
assert!(outputs.len() == sig.returns.len()); assert!(outputs.len() == sig.returns.len());
(AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) (AArch64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
} }
_ => unreachable!(), _ => unreachable!(),
}; };

View File

@@ -47,7 +47,7 @@ impl AArch64Backend {
func: &Function, func: &Function,
flags: settings::Flags, flags: settings::Flags,
) -> CodegenResult<VCode<inst::Inst>> { ) -> CodegenResult<VCode<inst::Inst>> {
let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?); let abi = Box::new(abi::AArch64ABICallee::new(func, flags)?);
compile::compile::<AArch64Backend>(func, self, abi) compile::compile::<AArch64Backend>(func, self, abi)
} }
} }

File diff suppressed because it is too large Load Diff

View File

@@ -2498,7 +2498,7 @@ impl MachInstEmit for Inst {
} }
impl MachInstEmitState<Inst> for EmitState { impl MachInstEmitState<Inst> for EmitState {
fn new(abi: &dyn ABIBody<I = Inst>) -> Self { fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
EmitState { EmitState {
virtual_sp_offset: 0, virtual_sp_offset: 0,
nominal_sp_to_fp: abi.frame_size() as i64, nominal_sp_to_fp: abi.frame_size() as i64,

View File

@@ -396,7 +396,7 @@ fn emit_vm_call<C: LowerCtx<I = Inst>>(
let sig = make_libcall_sig(ctx, insn, call_conv, types::I64); let sig = make_libcall_sig(ctx, insn, call_conv, types::I64);
let loc = ctx.srcloc(insn); let loc = ctx.srcloc(insn);
let mut abi = X64ABICall::from_func(&sig, &extname, dist, loc)?; let mut abi = X64ABICaller::from_func(&sig, &extname, dist, loc)?;
abi.emit_stack_pre_adjust(ctx); abi.emit_stack_pre_adjust(ctx);
@@ -1277,7 +1277,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len(), sig.params.len()); assert_eq!(inputs.len(), sig.params.len());
assert_eq!(outputs.len(), sig.returns.len()); assert_eq!(outputs.len(), sig.returns.len());
( (
X64ABICall::from_func(sig, &extname, dist, loc)?, X64ABICaller::from_func(sig, &extname, dist, loc)?,
&inputs[..], &inputs[..],
) )
} }
@@ -1287,7 +1287,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let sig = ctx.call_sig(insn).unwrap(); let sig = ctx.call_sig(insn).unwrap();
assert_eq!(inputs.len() - 1, sig.params.len()); assert_eq!(inputs.len() - 1, sig.params.len());
assert_eq!(outputs.len(), sig.returns.len()); assert_eq!(outputs.len(), sig.returns.len());
(X64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) (X64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
} }
_ => unreachable!(), _ => unreachable!(),

View File

@@ -41,7 +41,7 @@ impl X64Backend {
fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> { fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
// This performs lowering to VCode, register-allocates the code, computes // This performs lowering to VCode, register-allocates the code, computes
// block layout and finalizes branches. The result is ready for binary emission. // block layout and finalizes branches. The result is ready for binary emission.
let abi = Box::new(abi::X64ABIBody::new(&func, flags)?); let abi = Box::new(abi::X64ABICallee::new(&func, flags)?);
compile::compile::<Self>(&func, self, abi) compile::compile::<Self>(&func, self, abi)
} }
} }

View File

@@ -9,7 +9,7 @@ use regalloc::{Reg, Set, SpillSlot, Writable};
/// Trait implemented by an object that tracks ABI-related state (e.g., stack /// Trait implemented by an object that tracks ABI-related state (e.g., stack
/// layout) and can generate code while emitting the *body* of a function. /// layout) and can generate code while emitting the *body* of a function.
pub trait ABIBody { pub trait ABICallee {
/// The instruction type for the ISA associated with this ABI. /// The instruction type for the ISA associated with this ABI.
type I: VCodeInst; type I: VCodeInst;
@@ -17,7 +17,7 @@ pub trait ABIBody {
/// as the `maybe_tmp` arg if so. /// as the `maybe_tmp` arg if so.
fn temp_needed(&self) -> bool; fn temp_needed(&self) -> bool;
/// Initialize. This is called after the ABIBody is constructed because it /// Initialize. This is called after the ABICallee is constructed because it
/// may be provided with a temp vreg, which can only be allocated once the /// may be provided with a temp vreg, which can only be allocated once the
/// lowering context exists. /// lowering context exists.
fn init(&mut self, maybe_tmp: Option<Writable<Reg>>); fn init(&mut self, maybe_tmp: Option<Writable<Reg>>);
@@ -155,14 +155,14 @@ pub trait ABIBody {
/// callsite. It will usually be computed from the called function's /// callsite. It will usually be computed from the called function's
/// signature. /// signature.
/// ///
/// Unlike `ABIBody` above, methods on this trait are not invoked directly /// Unlike `ABICallee` above, methods on this trait are not invoked directly
/// by the machine-independent code. Rather, the machine-specific lowering /// by the machine-independent code. Rather, the machine-specific lowering
/// code will typically create an `ABICall` when creating machine instructions /// code will typically create an `ABICaller` when creating machine instructions
/// for an IR call instruction inside `lower()`, directly emit the arg and /// for an IR call instruction inside `lower()`, directly emit the arg and
/// and retval copies, and attach the register use/def info to the call. /// and retval copies, and attach the register use/def info to the call.
/// ///
/// This trait is thus provided for convenience to the backends. /// This trait is thus provided for convenience to the backends.
pub trait ABICall { pub trait ABICaller {
/// The instruction type for the ISA associated with this ABI. /// The instruction type for the ISA associated with this ABI.
type I: VCodeInst; type I: VCodeInst;
@@ -203,6 +203,6 @@ pub trait ABICall {
/// sense.) /// sense.)
/// ///
/// This function should only be called once, as it is allowed to re-use /// This function should only be called once, as it is allowed to re-use
/// parts of the ABICall object in emitting instructions. /// parts of the ABICaller object in emitting instructions.
fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C); fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C);
} }

View File

@@ -119,6 +119,7 @@ use crate::{ir, isa};
use alloc::vec::Vec; use alloc::vec::Vec;
use log::{debug, trace}; use log::{debug, trace};
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
use std::convert::TryFrom;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::mem; use std::mem;
@@ -142,6 +143,16 @@ pub enum ArgsOrRets {
Rets, Rets,
} }
/// Is an instruction returned by an ABI machine-specific backend a safepoint,
/// or not?
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InstIsSafepoint {
/// The instruction is a safepoint.
Yes,
/// The instruction is not a safepoint.
No,
}
/// Abstract location for a machine-specific ABI impl to translate into the /// Abstract location for a machine-specific ABI impl to translate into the
/// appropriate addressing mode. /// appropriate addressing mode.
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
@@ -160,7 +171,7 @@ pub enum StackAMode {
/// Trait implemented by machine-specific backend to provide information about /// Trait implemented by machine-specific backend to provide information about
/// register assignments and to allow generating the specific instructions for /// register assignments and to allow generating the specific instructions for
/// stack loads/saves, prologues/epilogues, etc. /// stack loads/saves, prologues/epilogues, etc.
pub trait ABIMachineImpl { pub trait ABIMachineSpec {
/// The instruction type. /// The instruction type.
type I: VCodeInst; type I: VCodeInst;
@@ -207,13 +218,15 @@ pub trait ABIMachineImpl {
fn gen_epilogue_placeholder() -> Self::I; fn gen_epilogue_placeholder() -> Self::I;
/// Generate an add-with-immediate. Note that even if this uses a scratch /// Generate an add-with-immediate. Note that even if this uses a scratch
/// register, the sequence must still be correct if the given source or dest /// register, it must satisfy two requirements:
/// is the register returned by `get_fixed_tmp_reg()`; hence, for machines ///
/// that may need a scratch register to synthesize an arbitrary constant, /// - The add-imm sequence must only clobber caller-save registers, because
/// the machine backend should reserve *another* fixed temp register for /// it will be placed in the prologue before the clobbered callee-save
/// this purpose. (E.g., on AArch64, x16 is the ordinary fixed tmp, and x17 /// registers are saved.
/// is the secondary fixed tmp used to implement this.) ///
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u64) -> SmallVec<[Self::I; 4]>; /// - The add-imm sequence must work correctly when `from_reg` and/or
/// `into_reg` are the register returned by `get_stacklimit_reg()`.
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]>;
/// Generate a sequence that traps with a `TrapCode::StackOverflow` code if /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if
/// the stack pointer is less than the given limit register (assuming the /// the stack pointer is less than the given limit register (assuming the
@@ -224,21 +237,30 @@ pub trait ABIMachineImpl {
/// SP-based offset). /// SP-based offset).
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I; fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
/// Get a fixed (not used by regalloc) temp. This is needed for certain /// Get a fixed register to use to compute a stack limit. This is needed for
/// sequences generated after the register allocator has already run. /// certain sequences generated after the register allocator has already
fn get_fixed_tmp_reg() -> Reg; /// run. This must satisfy two requirements:
///
/// - It must be a caller-save register, because it will be clobbered in the
/// prologue before the clobbered callee-save registers are saved.
///
/// - It must be safe to pass as an argument and/or destination to
/// `gen_add_imm()`. This is relevant when an addition with a large
/// immediate needs its own temporary; it cannot use the same fixed
/// temporary as this one.
fn get_stacklimit_reg() -> Reg;
/// Generate a store to the given [base+offset] address. /// Generate a store to the given [base+offset] address.
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i64, ty: Type) -> Self::I; fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I;
/// Generate a load from the given [base+offset] address. /// Generate a load from the given [base+offset] address.
fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Self::I; fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I;
/// Adjust the stack pointer up or down. /// Adjust the stack pointer up or down.
fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Self::I; 2]>; fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]>;
/// Generate a meta-instruction that adjusts the nominal SP offset. /// Generate a meta-instruction that adjusts the nominal SP offset.
fn gen_nominal_sp_adj(amount: i64) -> Self::I; fn gen_nominal_sp_adj(amount: i32) -> Self::I;
/// Generate the usual frame-setup sequence for this architecture: e.g., /// Generate the usual frame-setup sequence for this architecture: e.g.,
/// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on
@@ -272,18 +294,20 @@ pub trait ABIMachineImpl {
clobbers: &Set<Writable<RealReg>>, clobbers: &Set<Writable<RealReg>>,
) -> SmallVec<[Self::I; 16]>; ) -> SmallVec<[Self::I; 16]>;
/// Generate a call instruction/sequence. /// Generate a call instruction/sequence. This method is provided one
/// temporary register to use to synthesize the called address, if needed.
fn gen_call( fn gen_call(
dest: &CallDest, dest: &CallDest,
uses: Vec<Reg>, uses: Vec<Reg>,
defs: Vec<Writable<Reg>>, defs: Vec<Writable<Reg>>,
loc: SourceLoc, loc: SourceLoc,
opcode: ir::Opcode, opcode: ir::Opcode,
) -> SmallVec<[(/* is_safepoint = */ bool, Self::I); 2]>; tmp: Writable<Reg>,
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
/// Get the number of spillslots required for the given register-class and /// Get the number of spillslots required for the given register-class and
/// type. /// type.
fn get_spillslot_size(rc: RegClass, ty: Type) -> u32; fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32;
/// Get the current virtual-SP offset from an instruction-emission state. /// Get the current virtual-SP offset from an instruction-emission state.
fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64; fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64;
@@ -314,7 +338,7 @@ struct ABISig {
} }
impl ABISig { impl ABISig {
fn from_func_sig<M: ABIMachineImpl>(sig: &ir::Signature) -> CodegenResult<ABISig> { fn from_func_sig<M: ABIMachineSpec>(sig: &ir::Signature) -> CodegenResult<ABISig> {
// Compute args and retvals from signature. Handle retvals first, // Compute args and retvals from signature. Handle retvals first,
// because we may need to add a return-area arg to the args. // because we may need to add a return-area arg to the args.
let (rets, stack_ret_space, _) = M::compute_arg_locs( let (rets, stack_ret_space, _) = M::compute_arg_locs(
@@ -353,7 +377,7 @@ impl ABISig {
} }
/// ABI object for a function body. /// ABI object for a function body.
pub struct ABIBodyImpl<M: ABIMachineImpl> { pub struct ABICalleeImpl<M: ABIMachineSpec> {
/// Signature: arg and retval regs. /// Signature: arg and retval regs.
sig: ABISig, sig: ABISig,
/// Offsets to each stackslot. /// Offsets to each stackslot.
@@ -405,7 +429,7 @@ fn get_special_purpose_param_register(
} }
} }
impl<M: ABIMachineImpl> ABIBodyImpl<M> { impl<M: ABIMachineSpec> ABICalleeImpl<M> {
/// Create a new body ABI instance. /// Create a new body ABI instance.
pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult<Self> { pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult<Self> {
debug!("ABI: func signature {:?}", f.signature); debug!("ABI: func signature {:?}", f.signature);
@@ -506,8 +530,7 @@ impl<M: ABIMachineImpl> ABIBodyImpl<M> {
// `scratch`. If our stack size doesn't fit into an immediate this // `scratch`. If our stack size doesn't fit into an immediate this
// means we need a second scratch register for loading the stack size // means we need a second scratch register for loading the stack size
// into a register. // into a register.
let scratch = Writable::from_reg(M::get_fixed_tmp_reg()); let scratch = Writable::from_reg(M::get_stacklimit_reg());
let stack_size = u64::from(stack_size);
insts.extend(M::gen_add_imm(scratch, stack_limit, stack_size).into_iter()); insts.extend(M::gen_add_imm(scratch, stack_limit, stack_size).into_iter());
insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg())); insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg()));
} }
@@ -532,7 +555,7 @@ impl<M: ABIMachineImpl> ABIBodyImpl<M> {
/// temporary register to store values in if necessary. Currently after we write /// temporary register to store values in if necessary. Currently after we write
/// to this register there's guaranteed to be no spilled values between where /// to this register there's guaranteed to be no spilled values between where
/// it's used, because we're not participating in register allocation anyway! /// it's used, because we're not participating in register allocation anyway!
fn gen_stack_limit<M: ABIMachineImpl>( fn gen_stack_limit<M: ABIMachineSpec>(
f: &ir::Function, f: &ir::Function,
abi: &ABISig, abi: &ABISig,
gv: ir::GlobalValue, gv: ir::GlobalValue,
@@ -542,7 +565,7 @@ fn gen_stack_limit<M: ABIMachineImpl>(
return (reg, insts); return (reg, insts);
} }
fn generate_gv<M: ABIMachineImpl>( fn generate_gv<M: ABIMachineSpec>(
f: &ir::Function, f: &ir::Function,
abi: &ABISig, abi: &ABISig,
gv: ir::GlobalValue, gv: ir::GlobalValue,
@@ -563,7 +586,7 @@ fn generate_gv<M: ABIMachineImpl>(
readonly: _, readonly: _,
} => { } => {
let base = generate_gv::<M>(f, abi, base, insts); let base = generate_gv::<M>(f, abi, base, insts);
let into_reg = Writable::from_reg(M::get_fixed_tmp_reg()); let into_reg = Writable::from_reg(M::get_stacklimit_reg());
insts.push(M::gen_load_base_offset(into_reg, base, offset.into(), I64)); insts.push(M::gen_load_base_offset(into_reg, base, offset.into(), I64));
return into_reg.to_reg(); return into_reg.to_reg();
} }
@@ -591,7 +614,7 @@ fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option<Type>) -> Type {
} }
} }
impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> { impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
type I = M::I; type I = M::I;
fn temp_needed(&self) -> bool { fn temp_needed(&self) -> bool {
@@ -676,6 +699,11 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
} }
&ABIArg::Stack(off, mut ty, ext) => { &ABIArg::Stack(off, mut ty, ext) => {
let from_bits = ty_bits(ty) as u8; let from_bits = ty_bits(ty) as u8;
// A machine ABI implementation should ensure that stack frames
// have "reasonable" size. All current ABIs for machinst
// backends (aarch64 and x64) enforce a 128MB limit.
let off = i32::try_from(off)
.expect("Argument stack offset greater than 2GB; should hit impl limit first");
// Trash the from_reg; it should be its last use. // Trash the from_reg; it should be its last use.
match (ext, from_bits) { match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < 64 => { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < 64 => {
@@ -864,7 +892,7 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
if total_sp_adjust > 0 { if total_sp_adjust > 0 {
// sub sp, sp, #total_stacksize // sub sp, sp, #total_stacksize
let adj = total_sp_adjust as i64; let adj = total_sp_adjust as i32;
insts.extend(M::gen_sp_reg_adjust(-adj)); insts.extend(M::gen_sp_reg_adjust(-adj));
} }
@@ -873,7 +901,7 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
insts.extend(clobber_insts); insts.extend(clobber_insts);
if clobber_size > 0 { if clobber_size > 0 {
insts.push(M::gen_nominal_sp_adj(clobber_size as i64)); insts.push(M::gen_nominal_sp_adj(clobber_size as i32));
} }
self.total_frame_size = Some(total_stacksize); self.total_frame_size = Some(total_stacksize);
@@ -911,7 +939,7 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
} }
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
M::get_spillslot_size(rc, ty) M::get_number_of_spillslots_for_value(rc, ty)
} }
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I { fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I {
@@ -930,7 +958,7 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
} }
} }
fn abisig_to_uses_and_defs<M: ABIMachineImpl>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) { fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
// Compute uses: all arg regs. // Compute uses: all arg regs.
let mut uses = Vec::new(); let mut uses = Vec::new();
for arg in &sig.args { for arg in &sig.args {
@@ -953,7 +981,7 @@ fn abisig_to_uses_and_defs<M: ABIMachineImpl>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
} }
/// ABI object for a callsite. /// ABI object for a callsite.
pub struct ABICallImpl<M: ABIMachineImpl> { pub struct ABICallerImpl<M: ABIMachineSpec> {
/// The called function's signature. /// The called function's signature.
sig: ABISig, sig: ABISig,
/// All uses for the callsite, i.e., function args. /// All uses for the callsite, i.e., function args.
@@ -979,17 +1007,17 @@ pub enum CallDest {
Reg(Reg), Reg(Reg),
} }
impl<M: ABIMachineImpl> ABICallImpl<M> { impl<M: ABIMachineSpec> ABICallerImpl<M> {
/// Create a callsite ABI object for a call directly to the specified function. /// Create a callsite ABI object for a call directly to the specified function.
pub fn from_func( pub fn from_func(
sig: &ir::Signature, sig: &ir::Signature,
extname: &ir::ExternalName, extname: &ir::ExternalName,
dist: RelocDistance, dist: RelocDistance,
loc: ir::SourceLoc, loc: ir::SourceLoc,
) -> CodegenResult<ABICallImpl<M>> { ) -> CodegenResult<ABICallerImpl<M>> {
let sig = ABISig::from_func_sig::<M>(sig)?; let sig = ABISig::from_func_sig::<M>(sig)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig); let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
Ok(ABICallImpl { Ok(ABICallerImpl {
sig, sig,
uses, uses,
defs, defs,
@@ -1007,10 +1035,10 @@ impl<M: ABIMachineImpl> ABICallImpl<M> {
ptr: Reg, ptr: Reg,
loc: ir::SourceLoc, loc: ir::SourceLoc,
opcode: ir::Opcode, opcode: ir::Opcode,
) -> CodegenResult<ABICallImpl<M>> { ) -> CodegenResult<ABICallerImpl<M>> {
let sig = ABISig::from_func_sig::<M>(sig)?; let sig = ABISig::from_func_sig::<M>(sig)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig); let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
Ok(ABICallImpl { Ok(ABICallerImpl {
sig, sig,
uses, uses,
defs, defs,
@@ -1022,15 +1050,14 @@ impl<M: ABIMachineImpl> ABICallImpl<M> {
} }
} }
fn adjust_stack_and_nominal_sp<M: ABIMachineImpl, C: LowerCtx<I = M::I>>( fn adjust_stack_and_nominal_sp<M: ABIMachineSpec, C: LowerCtx<I = M::I>>(
ctx: &mut C, ctx: &mut C,
off: u64, off: i32,
is_sub: bool, is_sub: bool,
) { ) {
if off == 0 { if off == 0 {
return; return;
} }
let off = off as i64;
let amt = if is_sub { -off } else { off }; let amt = if is_sub { -off } else { off };
for inst in M::gen_sp_reg_adjust(amt) { for inst in M::gen_sp_reg_adjust(amt) {
ctx.emit(inst); ctx.emit(inst);
@@ -1038,7 +1065,7 @@ fn adjust_stack_and_nominal_sp<M: ABIMachineImpl, C: LowerCtx<I = M::I>>(
ctx.emit(M::gen_nominal_sp_adj(-amt)); ctx.emit(M::gen_nominal_sp_adj(-amt));
} }
impl<M: ABIMachineImpl> ABICall for ABICallImpl<M> { impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
type I = M::I; type I = M::I;
fn num_args(&self) -> usize { fn num_args(&self) -> usize {
@@ -1051,12 +1078,12 @@ impl<M: ABIMachineImpl> ABICall for ABICallImpl<M> {
fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) { fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
let off = self.sig.stack_arg_space + self.sig.stack_ret_space; let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
adjust_stack_and_nominal_sp::<M, C>(ctx, off as u64, /* is_sub = */ true) adjust_stack_and_nominal_sp::<M, C>(ctx, off as i32, /* is_sub = */ true)
} }
fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) { fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
let off = self.sig.stack_arg_space + self.sig.stack_ret_space; let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
adjust_stack_and_nominal_sp::<M, C>(ctx, off as u64, /* is_sub = */ false) adjust_stack_and_nominal_sp::<M, C>(ctx, off as i32, /* is_sub = */ false)
} }
fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>( fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
@@ -1152,13 +1179,13 @@ impl<M: ABIMachineImpl> ABICall for ABICallImpl<M> {
)); ));
self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
} }
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
for (is_safepoint, inst) in for (is_safepoint, inst) in
M::gen_call(&self.dest, uses, defs, self.loc, self.opcode).into_iter() M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter()
{ {
if is_safepoint { match is_safepoint {
ctx.emit_safepoint(inst); InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
} else { InstIsSafepoint::No => ctx.emit(inst),
ctx.emit(inst);
} }
} }
} }

View File

@@ -13,7 +13,7 @@ use regalloc::{allocate_registers_with_opts, Algorithm, Options};
pub fn compile<B: LowerBackend + MachBackend>( pub fn compile<B: LowerBackend + MachBackend>(
f: &Function, f: &Function,
b: &B, b: &B,
abi: Box<dyn ABIBody<I = B::MInst>>, abi: Box<dyn ABICallee<I = B::MInst>>,
) -> CodegenResult<VCode<B::MInst>> ) -> CodegenResult<VCode<B::MInst>>
where where
B::MInst: ShowWithRRU, B::MInst: ShowWithRRU,

View File

@@ -13,7 +13,7 @@ use crate::ir::{
ValueDef, ValueDef,
}; };
use crate::machinst::{ use crate::machinst::{
ABIBody, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder, ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
VCodeInst, VCodeInst,
}; };
use crate::CodegenResult; use crate::CodegenResult;
@@ -61,8 +61,8 @@ pub trait LowerCtx {
// Function-level queries: // Function-level queries:
/// Get the `ABIBody`. /// Get the `ABICallee`.
fn abi(&mut self) -> &dyn ABIBody<I = Self::I>; fn abi(&mut self) -> &dyn ABICallee<I = Self::I>;
/// Get the (virtual) register that receives the return value. A return /// Get the (virtual) register that receives the return value. A return
/// instruction should lower into a sequence that fills this register. (Why /// instruction should lower into a sequence that fills this register. (Why
/// not allow the backend to specify its own result register for the return? /// not allow the backend to specify its own result register for the return?
@@ -312,7 +312,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
/// Prepare a new lowering context for the given IR function. /// Prepare a new lowering context for the given IR function.
pub fn new( pub fn new(
f: &'func Function, f: &'func Function,
abi: Box<dyn ABIBody<I = I>>, abi: Box<dyn ABICallee<I = I>>,
block_order: BlockLoweringOrder, block_order: BlockLoweringOrder,
) -> CodegenResult<Lower<'func, I>> { ) -> CodegenResult<Lower<'func, I>> {
let mut vcode = VCodeBuilder::new(abi, block_order); let mut vcode = VCodeBuilder::new(abi, block_order);
@@ -844,7 +844,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
type I = I; type I = I;
fn abi(&mut self) -> &dyn ABIBody<I = I> { fn abi(&mut self) -> &dyn ABICallee<I = I> {
self.vcode.abi() self.vcode.abi()
} }

View File

@@ -282,7 +282,7 @@ pub trait MachInstEmit: MachInst {
/// emitting a function body. /// emitting a function body.
pub trait MachInstEmitState<I: MachInst>: Default + Clone + Debug { pub trait MachInstEmitState<I: MachInst>: Default + Clone + Debug {
/// Create a new emission state given the ABI object. /// Create a new emission state given the ABI object.
fn new(abi: &dyn ABIBody<I = I>) -> Self; fn new(abi: &dyn ABICallee<I = I>) -> Self;
/// Update the emission state before emitting an instruction that is a /// Update the emission state before emitting an instruction that is a
/// safepoint. /// safepoint.
fn pre_safepoint(&mut self, _stack_map: StackMap) {} fn pre_safepoint(&mut self, _stack_map: StackMap) {}

View File

@@ -86,7 +86,7 @@ pub struct VCode<I: VCodeInst> {
block_order: BlockLoweringOrder, block_order: BlockLoweringOrder,
/// ABI object. /// ABI object.
abi: Box<dyn ABIBody<I = I>>, abi: Box<dyn ABICallee<I = I>>,
/// Safepoint instruction indices. Filled in post-regalloc. (Prior to /// Safepoint instruction indices. Filled in post-regalloc. (Prior to
/// regalloc, the safepoint instructions are listed in the separate /// regalloc, the safepoint instructions are listed in the separate
@@ -132,7 +132,7 @@ pub struct VCodeBuilder<I: VCodeInst> {
impl<I: VCodeInst> VCodeBuilder<I> { impl<I: VCodeInst> VCodeBuilder<I> {
/// Create a new VCodeBuilder. /// Create a new VCodeBuilder.
pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> { pub fn new(abi: Box<dyn ABICallee<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
let reftype_class = I::ref_type_regclass(abi.flags()); let reftype_class = I::ref_type_regclass(abi.flags());
let vcode = VCode::new(abi, block_order); let vcode = VCode::new(abi, block_order);
let stack_map_info = StackmapRequestInfo { let stack_map_info = StackmapRequestInfo {
@@ -151,7 +151,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
} }
/// Access the ABI object. /// Access the ABI object.
pub fn abi(&mut self) -> &mut dyn ABIBody<I = I> { pub fn abi(&mut self) -> &mut dyn ABICallee<I = I> {
&mut *self.vcode.abi &mut *self.vcode.abi
} }
@@ -263,7 +263,7 @@ fn is_reftype(ty: Type) -> bool {
impl<I: VCodeInst> VCode<I> { impl<I: VCodeInst> VCode<I> {
/// New empty VCode. /// New empty VCode.
fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> { fn new(abi: Box<dyn ABICallee<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
VCode { VCode {
liveins: abi.liveins(), liveins: abi.liveins(),
liveouts: abi.liveouts(), liveouts: abi.liveouts(),

View File

@@ -11,8 +11,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x16, 8 ; b 12 ; data ; nextln: ldr x1, 8 ; b 12 ; data
; nextln: blr x16 ; nextln: blr x1
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -28,8 +28,8 @@ block0(v0: i32):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: mov w0, w0 ; nextln: mov w0, w0
; nextln: ldr x16, 8 ; b 12 ; data ; nextln: ldr x1, 8 ; b 12 ; data
; nextln: blr x16 ; nextln: blr x1
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -57,8 +57,8 @@ block0(v0: i32):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: sxtw x0, w0 ; nextln: sxtw x0, w0
; nextln: ldr x16, 8 ; b 12 ; data ; nextln: ldr x1, 8 ; b 12 ; data
; nextln: blr x16 ; nextln: blr x1
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -99,8 +99,8 @@ block0(v0: i8):
; nextln: movz x7, #42 ; nextln: movz x7, #42
; nextln: sxtb x8, w8 ; nextln: sxtb x8, w8
; nextln: stur x8, [sp] ; nextln: stur x8, [sp]
; nextln: ldr x16, 8 ; b 12 ; data ; nextln: ldr x8, 8 ; b 12 ; data
; nextln: blr x16 ; nextln: blr x8
; nextln: add sp, sp, #16 ; nextln: add sp, sp, #16
; nextln: virtual_sp_offset_adjust -16 ; nextln: virtual_sp_offset_adjust -16
; nextln: mov sp, fp ; nextln: mov sp, fp

View File

@@ -83,12 +83,12 @@ block3(v7: r64, v8: r64):
; nextln: mov x19, x0 ; nextln: mov x19, x0
; nextln: mov x20, x1 ; nextln: mov x20, x1
; nextln: mov x0, x19 ; nextln: mov x0, x19
; nextln: ldr x16, 8 ; b 12 ; data ; nextln: ldr x1, 8 ; b 12 ; data
; nextln: stur x0, [sp, #24] ; nextln: stur x0, [sp, #24]
; nextln: stur x19, [sp, #32] ; nextln: stur x19, [sp, #32]
; nextln: stur x20, [sp, #40] ; nextln: stur x20, [sp, #40]
; nextln: (safepoint: slots [S0, S1, S2] ; nextln: (safepoint: slots [S0, S1, S2]
; nextln: blr x16 ; nextln: blr x1
; nextln: ldur x19, [sp, #32] ; nextln: ldur x19, [sp, #32]
; nextln: ldur x20, [sp, #40] ; nextln: ldur x20, [sp, #40]
; nextln: add x1, sp, #16 ; nextln: add x1, sp, #16

View File

@@ -44,8 +44,8 @@ block0(v0: i64):
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: subs xzr, sp, x0 ; nextln: subs xzr, sp, x0
; nextln: b.hs 8 ; udf ; nextln: b.hs 8 ; udf
; nextln: ldr x16 ; nextln: ldr x0
; nextln: blr x16 ; nextln: blr x0
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
@@ -67,8 +67,8 @@ block0(v0: i64):
; nextln: ldur x16, [x16, #4] ; nextln: ldur x16, [x16, #4]
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; udf ; nextln: b.hs 8 ; udf
; nextln: ldr x16 ; nextln: ldr x0
; nextln: blr x16 ; nextln: blr x0
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret