Cranelift: Simplify leaf functions that do not use the stack (#2960)

* Cranelift AArch64: Simplify leaf functions that do not use the stack Leaf functions that do not use the stack (e.g. do not clobber any callee-saved registers) do not need a frame record. Copyright (c) 2021, Arm Limited.
2021-08-27 11:12:37 +01:00
parent 12515e6646
commit 7b98be1bee
34 changed files with 650 additions and 1385 deletions
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -693,20 +693,30 @@ impl ABIMachineSpec for AArch64MachineDeps {
    // nominal SP offset; abi_impl generic code will do that.
    fn gen_clobber_save(
        call_conv: isa::CallConv,
+        setup_frame: bool,
        flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
        fixed_frame_storage_size: u32,
        _outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Inst; 16]>) {
-        let mut insts = SmallVec::new();
-        let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
+        let mut clobbered_int = vec![];
+        let mut clobbered_vec = vec![];
+
+        for &reg in clobbered_callee_saves.iter() {
+            match reg.to_reg().get_class() {
+                RegClass::I64 => clobbered_int.push(reg),
+                RegClass::V128 => clobbered_vec.push(reg),
+                class => panic!("Unexpected RegClass: {:?}", class),
+            }
+        }

        let (int_save_bytes, vec_save_bytes) =
            saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec);
        let total_save_bytes = int_save_bytes + vec_save_bytes;
        let clobber_size = total_save_bytes as i32;
+        let mut insts = SmallVec::new();

-        if flags.unwind_info() {
+        if flags.unwind_info() && setup_frame {
            // The *unwind* frame (but not the actual frame) starts at the
            // clobbers, just below the saved FP/LR pair.
            insts.push(Inst::Unwind {
@@ -916,7 +926,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
        _outgoing_args_size: u32,
    ) -> SmallVec<[Inst; 16]> {
        let mut insts = SmallVec::new();
-        let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
+        let (clobbered_int, clobbered_vec) = get_regs_restored_in_epilogue(call_conv, clobbers);

        // Free the fixed frame if necessary.
        if fixed_frame_storage_size > 0 {
@@ -1180,6 +1190,36 @@ impl ABIMachineSpec for AArch64MachineDeps {
            ir::ArgumentExtension::None
        }
    }
+
+    fn get_clobbered_callee_saves(
+        call_conv: isa::CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>> {
+        let mut regs: Vec<Writable<RealReg>> = regs
+            .iter()
+            .cloned()
+            .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))
+            .collect();
+
+        // Sort registers for deterministic code output. We can do an unstable
+        // sort because the registers will be unique (there are no dups).
+        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs
+    }
+
+    fn is_frame_setup_needed(
+        is_leaf: bool,
+        stack_args_size: u32,
+        num_clobbered_callee_saves: usize,
+        fixed_frame_storage_size: u32,
+    ) -> bool {
+        !is_leaf
+            // The function arguments that are passed on the stack are addressed
+            // relative to the Frame Pointer.
+            || stack_args_size > 0
+            || num_clobbered_callee_saves > 0
+            || fixed_frame_storage_size > 0
+    }
 }

 /// Is this type supposed to be seen on this machine? E.g. references of the
@@ -1224,7 +1264,7 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
 /// Return the set of all integer and vector registers that must be saved in the
 /// prologue and restored in the epilogue, given the set of all registers
 /// written by the function's body.
-fn get_regs_saved_in_prologue(
+fn get_regs_restored_in_epilogue(
    call_conv: isa::CallConv,
    regs: &Set<Writable<RealReg>>,
 ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
--- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
@@ -66,3 +66,107 @@ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
        Some(8)
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::{
+        types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
+        StackSlotKind,
+    };
+    use crate::isa::{lookup, CallConv};
+    use crate::settings::{builder, Flags};
+    use crate::Context;
+    use gimli::write::Address;
+    use std::str::FromStr;
+    use target_lexicon::triple;
+
+    #[test]
+    fn test_simple_func() {
+        let isa = lookup(triple!("aarch64"))
+            .expect("expect aarch64 ISA")
+            .finish(Flags::new(builder()));
+
+        let mut context = Context::for_function(create_function(
+            CallConv::SystemV,
+            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+        ));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let fde = match context
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(1234))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] })), (4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+    }
+
+    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+        let mut func =
+            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+        let block0 = func.dfg.make_block();
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().return_(&[]);
+
+        if let Some(stack_slot) = stack_slot {
+            func.stack_slots.push(stack_slot);
+        }
+
+        func
+    }
+
+    #[test]
+    fn test_multi_return_func() {
+        let isa = lookup(triple!("aarch64"))
+            .expect("expect aarch64 ISA")
+            .finish(Flags::new(builder()));
+
+        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let fde = match context
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(4321))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }");
+    }
+
+    fn create_multi_return_function(call_conv: CallConv) -> Function {
+        let mut sig = Signature::new(call_conv);
+        sig.params.push(AbiParam::new(types::I32));
+        let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+        let block0 = func.dfg.make_block();
+        let v0 = func.dfg.append_block_param(block0, types::I32);
+        let block1 = func.dfg.make_block();
+        let block2 = func.dfg.make_block();
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().brnz(v0, block2, &[]);
+        pos.ins().jump(block1, &[]);
+
+        pos.insert_block(block1);
+        pos.ins().return_(&[]);
+
+        pos.insert_block(block2);
+        pos.ins().return_(&[]);
+
+        func
+    }
+}
--- a/cranelift/codegen/src/isa/aarch64/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/mod.rs
@@ -218,15 +218,11 @@ mod test {
        let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
        let code = &buffer.data[..];

-        // stp x29, x30, [sp, #-16]!
-        // mov x29, sp
        // mov x1, #0x1234
        // add w0, w0, w1
-        // ldp x29, x30, [sp], #16
        // ret
        let golden = vec![
-            0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
-            0x01, 0x0b, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
+            0x81, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x01, 0x0b, 0xc0, 0x03, 0x5f, 0xd6,
        ];

        assert_eq!(code, &golden[..]);
@@ -277,8 +273,6 @@ mod test {
            .unwrap();
        let code = &result.buffer.data[..];

-        // stp	x29, x30, [sp, #-16]!
-        // mov	x29, sp
        // mov	x1, #0x1234                	// #4660
        // add	w0, w0, w1
        // mov	w1, w0
@@ -291,13 +285,11 @@ mod test {
        // cbnz	x1, 0x18
        // mov	x1, #0x1234                	// #4660
        // sub	w0, w0, w1
-        // ldp	x29, x30, [sp], #16
        // ret
        let golden = vec![
-            253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
-            0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
-            0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 253, 123, 193, 168, 192, 3,
-            95, 214,
+            129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161, 0, 0, 181, 129, 70, 130, 210, 1, 0,
+            1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3, 0, 42, 97, 255, 255, 181, 129, 70,
+            130, 210, 0, 0, 1, 75, 192, 3, 95, 214,
        ];

        assert_eq!(code, &golden[..]);
--- a/cranelift/codegen/src/isa/arm32/abi.rs
+++ b/cranelift/codegen/src/isa/arm32/abi.rs
@@ -316,8 +316,9 @@ impl ABIMachineSpec for Arm32MachineDeps {
    /// nominal SP offset; caller will do that.
    fn gen_clobber_save(
        _call_conv: isa::CallConv,
+        _setup_frame: bool,
        _flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
        fixed_frame_storage_size: u32,
        _outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Inst; 16]>) {
@@ -325,8 +326,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
        if fixed_frame_storage_size > 0 {
            insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32)).into_iter());
        }
-        let clobbered_vec = get_callee_saves(clobbers);
-        let mut clobbered_vec: Vec<_> = clobbered_vec
+        let mut clobbered_vec: Vec<_> = clobbered_callee_saves
            .into_iter()
            .map(|r| r.to_reg().to_reg())
            .collect();
@@ -345,14 +345,14 @@ impl ABIMachineSpec for Arm32MachineDeps {
    }

    fn gen_clobber_restore(
-        _call_conv: isa::CallConv,
+        call_conv: isa::CallConv,
        _flags: &settings::Flags,
        clobbers: &Set<Writable<RealReg>>,
        _fixed_frame_storage_size: u32,
        _outgoing_args_size: u32,
    ) -> SmallVec<[Inst; 16]> {
        let mut insts = SmallVec::new();
-        let clobbered_vec = get_callee_saves(clobbers);
+        let clobbered_vec = Self::get_clobbered_callee_saves(call_conv, clobbers);
        let mut clobbered_vec: Vec<_> = clobbered_vec
            .into_iter()
            .map(|r| Writable::from_reg(r.to_reg().to_reg()))
@@ -468,14 +468,11 @@ impl ABIMachineSpec for Arm32MachineDeps {
    ) -> ir::ArgumentExtension {
        specified
    }
-}

-fn is_callee_save(r: RealReg) -> bool {
-    let enc = r.get_hw_encoding();
-    4 <= enc && enc <= 10
-}
-
-fn get_callee_saves(regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
+    fn get_clobbered_callee_saves(
+        _call_conv: isa::CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>> {
        let mut ret = Vec::new();
        for &reg in regs.iter() {
            if is_callee_save(reg.to_reg()) {
@@ -486,6 +483,21 @@ fn get_callee_saves(regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
        // Sort registers for deterministic code output.
        ret.sort_by_key(|r| r.to_reg().get_index());
        ret
+    }
+
+    fn is_frame_setup_needed(
+        _is_leaf: bool,
+        _stack_args_size: u32,
+        _num_clobbered_callee_saves: usize,
+        _fixed_frame_storage_size: u32,
+    ) -> bool {
+        true
+    }
+}
+
+fn is_callee_save(r: RealReg) -> bool {
+    let enc = r.get_hw_encoding();
+    4 <= enc && enc <= 10
 }

 fn is_reg_clobbered_by_call(r: RealReg) -> bool {
--- a/cranelift/codegen/src/isa/s390x/abi.rs
+++ b/cranelift/codegen/src/isa/s390x/abi.rs
@@ -459,16 +459,25 @@ impl ABIMachineSpec for S390xMachineDeps {
    // Returns stack bytes used as well as instructions. Does not adjust
    // nominal SP offset; abi_impl generic code will do that.
    fn gen_clobber_save(
-        call_conv: isa::CallConv,
+        _call_conv: isa::CallConv,
+        _setup_frame: bool,
        flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
        fixed_frame_storage_size: u32,
        outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Inst; 16]>) {
        let mut insts = SmallVec::new();
+        let mut clobbered_fpr = vec![];
+        let mut clobbered_gpr = vec![];
+
+        for &reg in clobbered_callee_saves.iter() {
+            match reg.to_reg().get_class() {
+                RegClass::I64 => clobbered_gpr.push(reg),
+                RegClass::F64 => clobbered_fpr.push(reg),
+                class => panic!("Unexpected RegClass: {:?}", class),
+            }
+        }

-        // Collect clobbered registers.
-        let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
        let mut first_clobbered_gpr = 16;
        for reg in clobbered_gpr {
            let enc = reg.to_reg().get_hw_encoding();
@@ -718,6 +727,32 @@ impl ABIMachineSpec for S390xMachineDeps {
    ) -> ir::ArgumentExtension {
        specified
    }
+
+    fn get_clobbered_callee_saves(
+        call_conv: isa::CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>> {
+        let mut regs: Vec<Writable<RealReg>> = regs
+            .iter()
+            .cloned()
+            .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))
+            .collect();
+
+        // Sort registers for deterministic code output. We can do an unstable
+        // sort because the registers will be unique (there are no dups).
+        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs
+    }
+
+    fn is_frame_setup_needed(
+        _is_leaf: bool,
+        _stack_args_size: u32,
+        _num_clobbered_callee_saves: usize,
+        _fixed_frame_storage_size: u32,
+    ) -> bool {
+        // The call frame set-up is handled by gen_clobber_save().
+        false
+    }
 }

 fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool {
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -496,18 +496,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
    }

    fn gen_clobber_save(
-        call_conv: isa::CallConv,
+        _call_conv: isa::CallConv,
+        setup_frame: bool,
        flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
        fixed_frame_storage_size: u32,
        _outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Self::I; 16]>) {
        let mut insts = SmallVec::new();
-        // Find all clobbered registers that are callee-save.
-        let clobbered = get_callee_saves(&call_conv, clobbers);
-        let clobbered_size = compute_clobber_size(&clobbered);
+        let clobbered_size = compute_clobber_size(&clobbered_callee_saves);

-        if flags.unwind_info() {
+        if flags.unwind_info() && setup_frame {
            // Emit unwind info: start the frame. The frame (from unwind
            // consumers' point of view) starts at clobbbers, just below
            // the FP and return address. Spill slots and stack slots are
@@ -534,7 +533,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        // Store each clobbered register in order at offsets from RSP,
        // placing them above the fixed frame slots.
        let mut cur_offset = fixed_frame_storage_size;
-        for reg in &clobbered {
+        for reg in clobbered_callee_saves {
            let r_reg = reg.to_reg();
            let off = cur_offset;
            match r_reg.get_class() {
@@ -579,14 +578,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
    ) -> SmallVec<[Self::I; 16]> {
        let mut insts = SmallVec::new();

-        let clobbered = get_callee_saves(&call_conv, clobbers);
-        let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered);
+        let clobbered_callee_saves = Self::get_clobbered_callee_saves(call_conv, clobbers);
+        let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves);

        // Restore regs by loading from offsets of RSP. RSP will be
        // returned to nominal-RSP at this point, so we can use the
        // same offsets that we used when saving clobbers above.
        let mut cur_offset = fixed_frame_storage_size;
-        for reg in &clobbered {
+        for reg in &clobbered_callee_saves {
            let rreg = reg.to_reg();
            match rreg.get_class() {
                RegClass::I64 => {
@@ -797,6 +796,47 @@ impl ABIMachineSpec for X64ABIMachineSpec {
            ir::ArgumentExtension::None
        }
    }
+
+    fn get_clobbered_callee_saves(
+        call_conv: CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>> {
+        let mut regs: Vec<Writable<RealReg>> = match call_conv {
+            CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
+                .iter()
+                .cloned()
+                .filter(|r| is_callee_save_baldrdash(r.to_reg()))
+                .collect(),
+            CallConv::BaldrdashWindows => {
+                todo!("baldrdash windows");
+            }
+            CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
+                .iter()
+                .cloned()
+                .filter(|r| is_callee_save_systemv(r.to_reg()))
+                .collect(),
+            CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
+                .iter()
+                .cloned()
+                .filter(|r| is_callee_save_fastcall(r.to_reg()))
+                .collect(),
+            CallConv::Probestack => todo!("probestack?"),
+            CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
+        };
+        // Sort registers for deterministic code output. We can do an unstable sort because the
+        // registers will be unique (there are no dups).
+        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs
+    }
+
+    fn is_frame_setup_needed(
+        _is_leaf: bool,
+        _stack_args_size: u32,
+        _num_clobbered_callee_saves: usize,
+        _fixed_frame_storage_size: u32,
+    ) -> bool {
+        true
+    }
 }

 impl From<StackAMode> for SyntheticAmode {
@@ -984,35 +1024,6 @@ fn is_callee_save_fastcall(r: RealReg) -> bool {
    }
 }

-fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
-    let mut regs: Vec<Writable<RealReg>> = match call_conv {
-        CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
-            .iter()
-            .cloned()
-            .filter(|r| is_callee_save_baldrdash(r.to_reg()))
-            .collect(),
-        CallConv::BaldrdashWindows => {
-            todo!("baldrdash windows");
-        }
-        CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
-            .iter()
-            .cloned()
-            .filter(|r| is_callee_save_systemv(r.to_reg()))
-            .collect(),
-        CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
-            .iter()
-            .cloned()
-            .filter(|r| is_callee_save_fastcall(r.to_reg()))
-            .collect(),
-        CallConv::Probestack => todo!("probestack?"),
-        CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
-    };
-    // Sort registers for deterministic code output. We can do an unstable sort because the
-    // registers will be unique (there are no dups).
-    regs.sort_unstable_by_key(|r| r.to_reg().get_index());
-    regs
-}
-
 fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
    let mut clobbered_size = 0;
    for reg in clobbers {
--- a/cranelift/codegen/src/machinst/abi_impl.rs
+++ b/cranelift/codegen/src/machinst/abi_impl.rs
@@ -428,20 +428,35 @@ pub trait ABIMachineSpec {
    /// Generate a probestack call.
    fn gen_probestack(_frame_size: u32) -> SmallInstVec<Self::I>;

-    /// Generate a clobber-save sequence. This takes the list of *all* registers
-    /// written/modified by the function body. The implementation here is
-    /// responsible for determining which of these are callee-saved according to
-    /// the ABI. It should return a sequence of instructions that "push" or
-    /// otherwise save these values to the stack. The sequence of instructions
-    /// should adjust the stack pointer downward, and should align as necessary
-    /// according to ABI requirements.
+    /// Get all clobbered registers that are callee-saved according to the ABI; the result
+    /// contains the registers in a sorted order.
+    fn get_clobbered_callee_saves(
+        call_conv: isa::CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>>;
+
+    /// Determine whether it is necessary to generate the usual frame-setup
+    /// sequence (refer to gen_prologue_frame_setup()).
+    fn is_frame_setup_needed(
+        is_leaf: bool,
+        stack_args_size: u32,
+        num_clobbered_callee_saves: usize,
+        fixed_frame_storage_size: u32,
+    ) -> bool;
+
+    /// Generate a clobber-save sequence. The implementation here should return
+    /// a sequence of instructions that "push" or otherwise save to the stack all
+    /// registers written/modified by the function body that are callee-saved.
+    /// The sequence of instructions should adjust the stack pointer downward,
+    /// and should align as necessary according to ABI requirements.
    ///
    /// Returns stack bytes used as well as instructions. Does not adjust
    /// nominal SP offset; caller will do that.
    fn gen_clobber_save(
        call_conv: isa::CallConv,
+        setup_frame: bool,
        flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
        fixed_frame_storage_size: u32,
        outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Self::I; 16]>);
@@ -615,6 +630,8 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
    /// Are we to invoke the probestack function in the prologue? If so,
    /// what is the minimum size at which we must invoke it?
    probestack_min_frame: Option<u32>,
+    /// Whether it is necessary to generate the usual frame-setup sequence.
+    setup_frame: bool,

    _mach: PhantomData<M>,
 }
@@ -706,6 +723,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
            is_leaf: f.is_leaf(),
            stack_limit,
            probestack_min_frame,
+            setup_frame: true,
            _mach: PhantomData,
        })
    }
@@ -1248,12 +1266,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
    }

    fn gen_prologue(&mut self) -> SmallInstVec<Self::I> {
-        let mut insts = smallvec![];
-        if !self.call_conv.extends_baldrdash() {
-            // set up frame
-            insts.extend(M::gen_prologue_frame_setup(&self.flags).into_iter());
-        }
-
        let bytes = M::word_bytes();
        let mut total_stacksize = self.stackslots_size + bytes * self.spillslots.unwrap() as u32;
        if self.call_conv.extends_baldrdash() {
@@ -1265,8 +1277,23 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
        }
        let mask = M::stack_align(self.call_conv) - 1;
        let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
+        let clobbered_callee_saves = M::get_clobbered_callee_saves(self.call_conv, &self.clobbered);
+        let mut insts = smallvec![];

        if !self.call_conv.extends_baldrdash() {
+            self.fixed_frame_storage_size += total_stacksize;
+            self.setup_frame = M::is_frame_setup_needed(
+                self.is_leaf,
+                self.stack_args_size(),
+                clobbered_callee_saves.len(),
+                self.fixed_frame_storage_size,
+            );
+
+            if self.setup_frame {
+                // set up frame
+                insts.extend(M::gen_prologue_frame_setup(&self.flags).into_iter());
+            }
+
            // Leaf functions with zero stack don't need a stack check if one's
            // specified, otherwise always insert the stack check.
            if total_stacksize > 0 || !self.is_leaf {
@@ -1280,16 +1307,14 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                    }
                }
            }
-            if total_stacksize > 0 {
-                self.fixed_frame_storage_size += total_stacksize;
-            }
        }

        // Save clobbered registers.
        let (clobber_size, clobber_insts) = M::gen_clobber_save(
            self.call_conv,
+            self.setup_frame,
            &self.flags,
-            &self.clobbered,
+            &clobbered_callee_saves,
            self.fixed_frame_storage_size,
            self.outgoing_args_size,
        );
@@ -1329,7 +1354,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
        // offset for the rest of the body.

        if !self.call_conv.extends_baldrdash() {
+            if self.setup_frame {
                insts.extend(M::gen_epilogue_frame_restore(&self.flags));
+            }
+
            insts.push(M::gen_ret());
        }

--- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif
@@ -9,10 +9,7 @@ block0(v0: i64, v1: i32):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, UXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, UXTW]
 ; nextln: ret

 function %f2(i64, i32) -> i32 {
@@ -22,10 +19,7 @@ block0(v0: i64, v1: i32):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, UXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, UXTW]
 ; nextln: ret

 function %f3(i64, i32) -> i32 {
@@ -35,10 +29,7 @@ block0(v0: i64, v1: i32):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, SXTW]
 ; nextln: ret

 function %f4(i64, i32) -> i32 {
@@ -48,10 +39,7 @@ block0(v0: i64, v1: i32):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, SXTW]
 ; nextln: ret

 function %f5(i64, i32) -> i32 {
@@ -62,10 +50,7 @@ block0(v0: i64, v1: i32):
  return v4
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, SXTW]
 ; nextln: ret

 function %f6(i64, i32) -> i32 {
@@ -76,10 +61,7 @@ block0(v0: i64, v1: i32):
  return v4
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, SXTW]
 ; nextln: ret

 function %f7(i32, i32) -> i32 {
@@ -91,11 +73,8 @@ block0(v0: i32, v1: i32):
  return v5
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w0, w0
+; check: mov w0, w0
 ; nextln: ldr w0, [x0, w1, UXTW]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f8(i64, i32) -> i32 {
@@ -112,13 +91,10 @@ block0(v0: i64, v1: i32):
 ; v6+4 = 2*v5 = 2*v4 + 2*v0 + 4 = 2*v2 + 2*v3 + 2*v0 + 4
 ;      = 2*sextend($x1) + 2*$x0 + 68

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x2, x0, #68
+; check: add x2, x0, #68
 ; nextln: add x0, x2, x0
 ; nextln: add x0, x0, x1, SXTW
 ; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f9(i64, i64, i64) -> i32 {
@@ -133,12 +109,9 @@ block0(v0: i64, v1: i64, v2: i64):

 ; v6 = $x0 + $x1 + $x2 + 48

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, x2
+; check: add x0, x0, x2
 ; nextln: add x0, x0, x1
 ; nextln: ldur w0, [x0, #48]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f10(i64, i64, i64) -> i32 {
@@ -153,13 +126,10 @@ block0(v0: i64, v1: i64, v2: i64):

 ; v6 = $x0 + $x1 + $x2 + 4100

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x3, #4100
+; check: movz x3, #4100
 ; nextln: add x1, x3, x1
 ; nextln: add x1, x1, x2
 ; nextln: ldr w0, [x1, x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f10() -> i32 {
@@ -171,11 +141,8 @@ block0:

 ; v6 = $x0 + $x1 + $x2 + 48

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #1234
+; check: movz x0, #1234
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f11(i64) -> i32 {
@@ -186,11 +153,8 @@ block0(v0: i64):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, #8388608
+; check: add x0, x0, #8388608
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f12(i64) -> i32 {
@@ -201,11 +165,8 @@ block0(v0: i64):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub x0, x0, #4
+; check: sub x0, x0, #4
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f13(i64) -> i32 {
@@ -216,13 +177,10 @@ block0(v0: i64):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz w1, #51712
+; check: movz w1, #51712
 ; nextln: movk w1, #15258, LSL #16
 ; nextln: add x0, x1, x0
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f14(i32) -> i32 {
@@ -232,11 +190,8 @@ block0(v0: i32):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
+; check: sxtw x0, w0
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f15(i32, i32) -> i32 {
@@ -248,11 +203,8 @@ block0(v0: i32, v1: i32):
  return v5
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
+; check: sxtw x0, w0
 ; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f16(i64) -> i32 {
@@ -263,10 +215,7 @@ block0(v0: i64):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0]
 ; nextln: ret

 function %f17(i64) -> i32 {
@@ -277,10 +226,7 @@ block0(v0: i64):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldur w0, [x0, #4]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldur w0, [x0, #4]
 ; nextln: ret

 function %f18(i64, i32) -> i16x8 {
@@ -290,11 +236,8 @@ block0(v0: i64, v1: i32):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr d0, [x0, w1, UXTW]
+; check: ldr d0, [x0, w1, UXTW]
 ; nextln: sxtl v0.8h, v0.8b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f19(i64, i64) -> i32x4 {
@@ -303,12 +246,9 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, x1
+; check: add x0, x0, x1
 ; nextln: ldr d0, [x0, #8]
 ; nextln: uxtl v0.4s, v0.4h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f20(i64, i32) -> i64x2 {
@@ -318,11 +258,8 @@ block0(v0: i64, v1: i32):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr d0, [x0, w1, SXTW]
+; check: ldr d0, [x0, w1, SXTW]
 ; nextln: uxtl v0.2d, v0.2s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f18(i64, i64, i64) -> i32 {
@@ -333,11 +270,8 @@ block0(v0: i64, v1: i64, v2: i64):
  return v5
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn w0, #4097
+; check: movn w0, #4097
 ; nextln: ldrsh x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f19(i64, i64, i64) -> i32 {
@@ -348,11 +282,8 @@ block0(v0: i64, v1: i64, v2: i64):
  return v5
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #4098
+; check: movz x0, #4098
 ; nextln: ldrsh x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f20(i64, i64, i64) -> i32 {
@@ -363,12 +294,9 @@ block0(v0: i64, v1: i64, v2: i64):
  return v5
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn w0, #4097
+; check: movn w0, #4097
 ; nextln: sxtw x0, w0
 ; nextln: ldrsh x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f21(i64, i64, i64) -> i32 {
@@ -379,12 +307,9 @@ block0(v0: i64, v1: i64, v2: i64):
  return v5
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #4098
+; check: movz x0, #4098
 ; nextln: sxtw x0, w0
 ; nextln: ldrsh x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -395,13 +320,10 @@ block0(v0: i64):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1]
 ; nextln: stp x2, x1, [x0]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -412,13 +334,10 @@ block0(v0: i64):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1, #16]
 ; nextln: stp x2, x1, [x0, #16]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %i128_imm_offset_large(i64) -> i128 {
@@ -428,13 +347,10 @@ block0(v0: i64):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1, #504]
 ; nextln: stp x2, x1, [x0, #504]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %i128_imm_offset_negative_large(i64) -> i128 {
@@ -444,13 +360,10 @@ block0(v0: i64):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1, #-512]
 ; nextln: stp x2, x1, [x0, #-512]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -462,13 +375,10 @@ block0(v0: i64):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1, #32]
 ; nextln: stp x2, x1, [x0, #32]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -481,14 +391,11 @@ block0(v0: i32):
 }

 ; TODO: We should be able to deduplicate the sxtw instruction
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x1, w0
+; check: sxtw x1, w0
 ; nextln: ldp x2, x1, [x1]
 ; nextln: sxtw x0, w0
 ; nextln: stp x2, x1, [x0]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -502,14 +409,11 @@ block0(v0: i64, v1: i32):
  return v5
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x2, x0
+; check: mov x2, x0
 ; nextln: add x2, x2, x1, SXTW
 ; nextln: ldp x3, x2, [x2, #24]
 ; nextln: add x0, x0, x1, SXTW
 ; nextln: stp x3, x2, [x0, #24]
 ; nextln: mov x0, x3
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
@@ -8,10 +8,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  add x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  add x0, x0, x1
 ; nextln:  ret


@@ -21,10 +18,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  sub x0, x0, x1
 ; nextln:  ret

 function %f3(i64, i64) -> i64 {
@@ -33,10 +27,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  madd x0, x0, x1, xzr
-; nextln:  ldp fp, lr, [sp], #16
+; check:  madd x0, x0, x1, xzr
 ; nextln:  ret

 function %f4(i64, i64) -> i64 {
@@ -45,10 +36,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  umulh x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  umulh x0, x0, x1
 ; nextln:  ret

 function %f5(i64, i64) -> i64 {
@@ -57,10 +45,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  smulh x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  smulh x0, x0, x1
 ; nextln:  ret

 function %f6(i64, i64) -> i64 {
@@ -69,15 +54,12 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sdiv x2, x0, x1
+; check:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  adds xzr, x1, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
 ; nextln:  mov x0, x2
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f7(i64) -> i64 {
@@ -87,16 +69,13 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x2, #2
+; check:  movz x2, #2
 ; nextln:  sdiv x1, x0, x2
 ; nextln:  cbnz x2, 8 ; udf
 ; nextln:  adds xzr, x2, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
 ; nextln:  mov x0, x1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f8(i64, i64) -> i64 {
@@ -105,11 +84,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  udiv x0, x0, x1
+; check:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f9(i64) -> i64 {
@@ -119,12 +95,9 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x1, #2
+; check:  movz x1, #2
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f10(i64, i64) -> i64 {
@@ -133,12 +106,9 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sdiv x2, x0, x1
+; check:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  msub x0, x2, x1, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f11(i64, i64) -> i64 {
@@ -147,12 +117,9 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  udiv x2, x0, x1
+; check:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  msub x0, x2, x1, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret


@@ -162,16 +129,13 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sxtw x3, w0
+; check:  sxtw x3, w0
 ; nextln:  sxtw x2, w1
 ; nextln:  sdiv x0, x3, x2
 ; nextln:  cbnz x2, 8 ; udf
 ; nextln:  adds wzr, w2, #1
 ; nextln:  ccmp w3, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f13(i32) -> i32 {
@@ -181,9 +145,7 @@ block0(v0: i32):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
+; check: sxtw x0, w0
 ; nextln: movz x1, #2
 ; nextln: sxtw x2, w1
 ; nextln: sdiv x1, x0, x2
@@ -192,7 +154,6 @@ block0(v0: i32):
 ; nextln: ccmp w0, #1, #nzcv, eq
 ; nextln: b.vc 8 ; udf
 ; nextln: mov x0, x1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f14(i32, i32) -> i32 {
@@ -201,13 +162,10 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w0, w0
+; check: mov w0, w0
 ; nextln: mov w1, w1
 ; nextln: udiv x0, x0, x1
 ; nextln: cbnz x1, 8 ; udf
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -218,13 +176,10 @@ block0(v0: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mov w0, w0
+; check:  mov w0, w0
 ; nextln:  movz x1, #2
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f16(i32, i32) -> i32 {
@@ -233,14 +188,11 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sxtw x0, w0
+; check:  sxtw x0, w0
 ; nextln:  sxtw x1, w1
 ; nextln:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  msub x0, x2, x1, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f17(i32, i32) -> i32 {
@@ -249,14 +201,11 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mov w0, w0
+; check:  mov w0, w0
 ; nextln:  mov w1, w1
 ; nextln:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  msub x0, x2, x1, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f18(i64, i64) -> i64 {
@@ -265,10 +214,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  and x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  and x0, x0, x1
 ; nextln:  ret

 function %f19(i64, i64) -> i64 {
@@ -277,10 +223,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  orr x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  orr x0, x0, x1
 ; nextln:  ret

 function %f20(i64, i64) -> i64 {
@@ -289,10 +232,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  eor x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  eor x0, x0, x1
 ; nextln:  ret

 function %f21(i64, i64) -> i64 {
@@ -301,10 +241,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  bic x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  bic x0, x0, x1
 ; nextln:  ret

 function %f22(i64, i64) -> i64 {
@@ -313,10 +250,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  orn x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  orn x0, x0, x1
 ; nextln:  ret

 function %f23(i64, i64) -> i64 {
@@ -325,10 +259,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  eon x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  eon x0, x0, x1
 ; nextln:  ret

 function %f24(i64, i64) -> i64 {
@@ -337,10 +268,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  orn x0, xzr, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  orn x0, xzr, x0
 ; nextln:  ret

 function %f25(i32, i32) -> i32 {
@@ -351,10 +279,7 @@ block0(v0: i32, v1: i32):
  return v4
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub w0, w1, w0, LSL 21
-; nextln:  ldp fp, lr, [sp], #16
+; check:  sub w0, w1, w0, LSL 21
 ; nextln:  ret

 function %f26(i32) -> i32 {
@@ -364,10 +289,7 @@ block0(v0: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub w0, w0, #1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  sub w0, w0, #1
 ; nextln:  ret

 function %f27(i32) -> i32 {
@@ -377,10 +299,7 @@ block0(v0: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  add w0, w0, #1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  add w0, w0, #1
 ; nextln:  ret

 function %f28(i64) -> i64 {
@@ -390,10 +309,7 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  add x0, x0, #1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  add x0, x0, #1
 ; nextln:  ret

 function %f29(i64) -> i64 {
@@ -403,11 +319,8 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #1
+; check:  movz x0, #1
 ; nextln:  sub x0, xzr, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f30(i8x16) -> i8x16 {
@@ -417,13 +330,10 @@ block0(v0: i8x16):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #1
+; check:  movz x0, #1
 ; nextln: sub w0, wzr, w0
 ; nextln: dup v1.16b, w0
 ; nextln: ushl v0.16b, v0.16b, v1.16b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -433,11 +343,8 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds x0, x0, x2
+; check: adds x0, x0, x2
 ; nextln: adc x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %sub_i128(i128, i128) -> i128 {
@@ -446,11 +353,8 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs x0, x0, x2
+; check:  subs x0, x0, x2
 ; nextln: sbc x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %mul_i128(i128, i128) -> i128 {
@@ -459,12 +363,9 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: umulh x4, x0, x2
+; check:  umulh x4, x0, x2
 ; nextln: madd x3, x0, x3, x4
 ; nextln: madd x1, x1, x2, x3
 ; nextln: madd x0, x0, x2, xzr
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

--- a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
@@ -8,7 +8,6 @@ block0(v0: i64):
 }

 ; check: ldar x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_load_i32(i64) -> i32 {
@@ -18,7 +17,6 @@ block0(v0: i64):
 }

 ; check: ldar w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_load_i16(i64) -> i16 {
@@ -28,7 +26,6 @@ block0(v0: i64):
 }

 ; check: ldarh w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_load_i8(i64) -> i8 {
@@ -38,7 +35,6 @@ block0(v0: i64):
 }

 ; check: ldarb w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_load_i32_i64(i64) -> i64 {
@@ -49,7 +45,6 @@ block0(v0: i64):
 }

 ; check: ldar w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_load_i16_i64(i64) -> i64 {
@@ -60,7 +55,6 @@ block0(v0: i64):
 }

 ; check: ldarh w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_load_i8_i64(i64) -> i64 {
@@ -71,7 +65,6 @@ block0(v0: i64):
 }

 ; check: ldarb w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_load_i16_i32(i64) -> i32 {
@@ -82,7 +75,6 @@ block0(v0: i64):
 }

 ; check: ldarh w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_load_i8_i32(i64) -> i32 {
@@ -93,5 +85,4 @@ block0(v0: i64):
 }

 ; check: ldarb w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
@@ -8,7 +8,6 @@ block0(v0: i64, v1: i64):
 }

 ; check: stlr x0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_store_i32(i32, i64) {
@@ -18,7 +17,6 @@ block0(v0: i32, v1: i64):
 }

 ; check: stlr w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_store_i16(i16, i64) {
@@ -28,7 +26,6 @@ block0(v0: i16, v1: i64):
 }

 ; check: stlrh w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_store_i8(i8, i64) {
@@ -38,7 +35,6 @@ block0(v0: i8, v1: i64):
 }

 ; check: stlrb w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_store_i64_i32(i64, i64) {
@@ -50,7 +46,6 @@ block0(v0: i64, v1: i64):

 ; check-not: uxt
 ; check: stlr w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_store_i64_i16(i64, i64) {
@@ -62,7 +57,6 @@ block0(v0: i64, v1: i64):

 ; check-not: uxt
 ; check: stlrh w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_store_i64_i8(i64, i64) {
@@ -74,7 +68,6 @@ block0(v0: i64, v1: i64):

 ; check-not: uxt
 ; check: stlrb w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_store_i32_i16(i32, i64) {
@@ -86,7 +79,6 @@ block0(v0: i32, v1: i64):

 ; check-not: uxt
 ; check: stlrh w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %atomic_store_i32_i8(i32, i64) {
@@ -98,5 +90,4 @@ block0(v0: i32, v1: i64):

 ; check-not: uxt
 ; check: stlrb w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/basic1.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/basic1.clif
@@ -4,11 +4,8 @@ target aarch64

 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v2 = iadd v0, v1
    ; check: add w0, w0, w1
    return v2
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -8,11 +8,8 @@ block0(v0: i8):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: lsr w0, w0, #24
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %a(i16) -> i16 {
@@ -21,11 +18,8 @@ block0(v0: i16):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: lsr w0, w0, #16
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %a(i32) -> i32 {
@@ -34,10 +28,7 @@ block0(v0: i32):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: rbit w0, w0
 ; nextln: ret

 function %a(i64) -> i64 {
@@ -46,10 +37,7 @@ block0(v0: i64):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit x0, x0
-; nextln: ldp fp, lr, [sp], #16
+; check: rbit x0, x0
 ; nextln: ret

 function %a(i128) -> i128 {
@@ -58,12 +46,9 @@ block0(v0: i128):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit x2, x0
+; check: rbit x2, x0
 ; nextln: rbit x0, x1
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %b(i8) -> i8 {
@@ -72,11 +57,8 @@ block0(v0: i8):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
+; check: uxtb w0, w0
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %b(i16) -> i16 {
@@ -85,11 +67,8 @@ block0(v0: i16):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
+; check: uxth w0, w0
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %b(i32) -> i32 {
@@ -98,10 +77,7 @@ block0(v0: i32):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: clz w0, w0
 ; nextln: ret

 function %b(i64) -> i64 {
@@ -110,10 +86,7 @@ block0(v0: i64):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: clz x0, x0
-; nextln: ldp fp, lr, [sp], #16
+; check: clz x0, x0
 ; nextln: ret

 function %b(i128) -> i128 {
@@ -122,14 +95,11 @@ block0(v0: i128):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: clz x1, x1
+; check: clz x1, x1
 ; nextln: clz x0, x0
 ; nextln: lsr x2, x1, #6
 ; nextln: madd x0, x0, x2, x1
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %c(i8) -> i8 {
@@ -138,11 +108,8 @@ block0(v0: i8):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
+; check: uxtb w0, w0
 ; nextln: cls w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %c(i16) -> i16 {
@@ -151,11 +118,8 @@ block0(v0: i16):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
+; check: uxth w0, w0
 ; nextln: cls w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %c(i32) -> i32 {
@@ -164,10 +128,7 @@ block0(v0: i32):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: cls w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: cls w0, w0
 ; nextln: ret

 function %c(i64) -> i64 {
@@ -176,10 +137,7 @@ block0(v0: i64):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: cls x0, x0
-; nextln: ldp fp, lr, [sp], #16
+; check: cls x0, x0
 ; nextln: ret

 function %c(i128) -> i128 {
@@ -188,9 +146,7 @@ block0(v0: i128):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: cls x2, x0
+; check: cls x2, x0
 ; nextln: cls x3, x1
 ; nextln: eon x0, x1, x0
 ; nextln: lsr x0, x0, #63
@@ -199,7 +155,6 @@ block0(v0: i128):
 ; nextln: csel x0, x0, xzr, eq
 ; nextln: add x0, x0, x3
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i8) -> i8 {
@@ -208,12 +163,9 @@ block0(v0: i8):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: lsr w0, w0, #24
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i16) -> i16 {
@@ -222,12 +174,9 @@ block0(v0: i16):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: lsr w0, w0, #16
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i32) -> i32 {
@@ -236,11 +185,8 @@ block0(v0: i32):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i64) -> i64 {
@@ -249,11 +195,8 @@ block0(v0: i64):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit x0, x0
+; check: rbit x0, x0
 ; nextln: clz x0, x0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i128) -> i128 {
@@ -262,16 +205,13 @@ block0(v0: i128):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit x0, x0
+; check: rbit x0, x0
 ; nextln: rbit x1, x1
 ; nextln: clz x0, x0
 ; nextln: clz x1, x1
 ; nextln: lsr x2, x0, #6
 ; nextln: madd x0, x1, x2, x0
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i128) -> i128 {
@@ -280,15 +220,12 @@ block0(v0: i128):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov d0, x0
+; check: fmov d0, x0
 ; nextln: mov v0.d[1], x1
 ; nextln: cnt v0.16b, v0.16b
 ; nextln: addv b0, v0.16b
 ; nextln: umov w0, v0.b[0]
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -298,13 +235,10 @@ block0(v0: i64):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov d0, x0
+; check: fmov d0, x0
 ; nextln: cnt v0.8b, v0.8b
 ; nextln: addv b0, v0.8b
 ; nextln: umov w0, v0.b[0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i32) -> i32 {
@@ -313,13 +247,10 @@ block0(v0: i32):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov s0, w0
+; check: fmov s0, w0
 ; nextln: cnt v0.8b, v0.8b
 ; nextln: addv b0, v0.8b
 ; nextln: umov w0, v0.b[0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i16) -> i16 {
@@ -328,13 +259,10 @@ block0(v0: i16):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov s0, w0
+; check: fmov s0, w0
 ; nextln: cnt v0.8b, v0.8b
 ; nextln: addp v0.8b, v0.8b, v0.8b
 ; nextln: umov w0, v0.b[0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %d(i8) -> i8 {
@@ -343,12 +271,9 @@ block0(v0: i8):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov s0, w0
+; check: fmov s0, w0
 ; nextln: cnt v0.8b, v0.8b
 ; nextln: umov w0, v0.b[0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %bextend_b8() -> b32 {
@@ -358,11 +283,8 @@ block0:
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #255
+; check: movz x0, #255
 ; nextln: sxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %bextend_b1() -> b32 {
@@ -372,11 +294,8 @@ block0:
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #1
+; check: movz x0, #1
 ; nextln: sbfx w0, w0, #0, #1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %bnot_i128(i128) -> i128 {
@@ -385,11 +304,8 @@ block0(v0: i128):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn x0, xzr, x0
+; check: orn x0, xzr, x0
 ; nextln: orn x1, xzr, x1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %band_i128(i128, i128) -> i128 {
@@ -398,11 +314,8 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: and x0, x0, x2
+; check: and x0, x0, x2
 ; nextln: and x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %bor_i128(i128, i128) -> i128 {
@@ -411,11 +324,8 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orr x0, x0, x2
+; check: orr x0, x0, x2
 ; nextln: orr x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %bxor_i128(i128, i128) -> i128 {
@@ -424,11 +334,8 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %band_not_i128(i128, i128) -> i128 {
@@ -437,11 +344,8 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: bic x0, x0, x2
+; check: bic x0, x0, x2
 ; nextln: bic x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %bor_not_i128(i128, i128) -> i128 {
@@ -450,11 +354,8 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn x0, x0, x2
+; check: orn x0, x0, x2
 ; nextln: orn x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %bxor_not_i128(i128, i128) -> i128 {
@@ -463,11 +364,8 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eon x0, x0, x2
+; check: eon x0, x0, x2
 ; nextln: eon x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -477,9 +375,7 @@ block0(v0: i128, v1: i8):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsr x4, x0, #1
 ; nextln: lsl x1, x1, x2
 ; nextln: lsr x3, x4, x3
@@ -488,7 +384,6 @@ block0(v0: i128, v1: i8):
 ; nextln: orr x1, x1, x3
 ; nextln: csel x1, x0, x1, ne
 ; nextln: csel x0, xzr, x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -498,9 +393,7 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsr x4, x0, #1
 ; nextln: lsl x1, x1, x2
 ; nextln: lsr x3, x4, x3
@@ -509,7 +402,6 @@ block0(v0: i128, v1: i128):
 ; nextln: orr x1, x1, x3
 ; nextln: csel x1, x0, x1, ne
 ; nextln: csel x0, xzr, x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -519,9 +411,7 @@ block0(v0: i128, v1: i8):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsl x4, x1, #1
 ; nextln: lsr x0, x0, x2
 ; nextln: lsl x3, x4, x3
@@ -531,7 +421,6 @@ block0(v0: i128, v1: i8):
 ; nextln: csel x2, xzr, x1, ne
 ; nextln: csel x0, x1, x0, ne
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -541,9 +430,7 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsl x4, x1, #1
 ; nextln: lsr x0, x0, x2
 ; nextln: lsl x3, x4, x3
@@ -553,7 +440,6 @@ block0(v0: i128, v1: i128):
 ; nextln: csel x2, xzr, x1, ne
 ; nextln: csel x0, x1, x0, ne
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -563,9 +449,7 @@ block0(v0: i128, v1: i8):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsl x4, x1, #1
 ; nextln: lsr x0, x0, x2
 ; nextln: lsl x4, x4, x3
@@ -575,7 +459,6 @@ block0(v0: i128, v1: i8):
 ; nextln: orr x0, x0, x4
 ; nextln: csel x1, x1, x3, ne
 ; nextln: csel x0, x3, x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -585,9 +468,7 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsl x4, x1, #1
 ; nextln: lsr x0, x0, x2
 ; nextln: lsl x4, x4, x3
@@ -597,5 +478,4 @@ block0(v0: i128, v1: i128):
 ; nextln: orr x0, x0, x4
 ; nextln: csel x1, x1, x3, ne
 ; nextln: csel x0, x3, x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/call.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/call.clif
@@ -100,9 +100,7 @@ block0(v0: i8):
    return v1, v1, v1, v1, v1, v1, v1, v1, v0
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mov x9, x0
+; check:  mov x9, x0
 ; nextln:  mov x8, x1
 ; nextln:  movz x0, #42
 ; nextln:  movz x1, #42
@@ -113,7 +111,6 @@ block0(v0: i8):
 ; nextln:  movz x6, #42
 ; nextln:  movz x7, #42
 ; nextln:  sturb w9, [x8]
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f8() {
@@ -259,10 +256,7 @@ block0(v0: i128, v1: i64):
    return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x0, x1
-; nextln: ldp fp, lr, [sp], #16
+; check: mov x0, x1
 ; nextln: ret


@@ -295,14 +289,10 @@ block0(v0: i64, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
+; check: mov x0, x2
 ; nextln: ret


-
 function %f12_call(i64) -> i64 {
    fn0 = %f12(i64, i128) -> i64

@@ -333,10 +323,7 @@ block0(v0: i64, v1: i128):
    return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x0, x1
-; nextln: ldp fp, lr, [sp], #16
+; check: mov x0, x1
 ; nextln: ret


@@ -470,12 +457,9 @@ block0:
    return v0, v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: movz x0, #0
 ; nextln: movz x2, #1
 ; nextln: stur w2, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

--- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif
@@ -8,11 +8,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x1
+; check: subs xzr, x0, x1
 ; nextln: cset x0, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %icmp_eq_i128(i128, i128) -> b1 {
@@ -21,13 +18,10 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
 ; nextln: adds xzr, x0, x1
 ; nextln: cset x0, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -37,13 +31,10 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
 ; nextln: adds xzr, x0, x1
 ; nextln: cset x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -53,14 +44,11 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, lo
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, lt
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -70,14 +58,11 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, lo
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, lo
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %icmp_sle_i128(i128, i128) -> b1 {
@@ -86,14 +71,11 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, ls
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, le
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %icmp_ule_i128(i128, i128) -> b1 {
@@ -102,14 +84,11 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, ls
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, ls
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %icmp_sgt_i128(i128, i128) -> b1 {
@@ -118,14 +97,11 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hi
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, gt
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %icmp_ugt_i128(i128, i128) -> b1 {
@@ -134,14 +110,11 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hi
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, hi
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -151,14 +124,11 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hs
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, ge
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %icmp_uge_i128(i128, i128) -> b1 {
@@ -167,14 +137,11 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hs
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, hs
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %icmp_of_i128(i128, i128) -> b1 {
@@ -183,12 +150,9 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, x2
+; check: adds xzr, x0, x2
 ; nextln: adcs xzr, x1, x3
 ; nextln: cset x0, vs
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %icmp_nof_i128(i128, i128) -> b1 {
@@ -197,12 +161,9 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, x2
+; check: adds xzr, x0, x2
 ; nextln: adcs xzr, x1, x3
 ; nextln: cset x0, vc
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -222,17 +183,13 @@ block2:
 }

 ; check: Block 0:
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x1
+; check: subs xzr, x0, x1
 ; nextln: b.eq label1 ; b label2
 ; check: Block 1:
 ; check: movz x0, #1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 ; check: Block 2:
 ; check: movz x0, #2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f(i64, i64) -> i64 {
@@ -246,12 +203,9 @@ block1:
  return v4
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x1
+; check: subs xzr, x0, x1
 ; check: Block 1:
 ; check: movz x0, #1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -265,17 +219,14 @@ block1:
    return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orr x0, x0, x1
+; check: orr x0, x0, x1
 ; nextln: cbz x0, label1 ; b label2
 ; check: Block 1:
 ; check: b label3
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret


 function %i128_brnz(i128){
@@ -288,17 +239,14 @@ block1:
    return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orr x0, x0, x1
+; check: orr x0, x0, x1
 ; nextln: cbnz x0, label1 ; b label2
 ; check: Block 1:
 ; check: b label3
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret



@@ -311,9 +259,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
 ; nextln: adds xzr, x0, x1
 ; nextln: b.eq label1 ; b label2
@@ -322,8 +268,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret


 function %i128_bricmp_ne(i128, i128) {
@@ -335,9 +280,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
 ; nextln: adds xzr, x0, x1
 ; nextln: b.ne label1 ; b label2
@@ -346,8 +289,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret


 function %i128_bricmp_slt(i128, i128) {
@@ -359,9 +301,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, lo
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, lt
@@ -373,8 +313,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret


 function %i128_bricmp_ult(i128, i128) {
@@ -386,9 +325,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, lo
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, lo
@@ -400,8 +337,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %i128_bricmp_sle(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -412,9 +348,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, ls
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, le
@@ -427,8 +361,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %i128_bricmp_ule(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -439,9 +372,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, ls
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, ls
@@ -454,8 +385,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %i128_bricmp_sgt(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -466,9 +396,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hi
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, gt
@@ -480,8 +408,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %i128_bricmp_ugt(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -492,9 +419,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hi
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, hi
@@ -506,8 +431,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret


 function %i128_bricmp_sge(i128, i128) {
@@ -519,9 +443,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hs
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, ge
@@ -534,8 +456,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %i128_bricmp_uge(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -546,9 +467,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hs
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, hs
@@ -561,8 +480,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %i128_bricmp_of(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -573,9 +491,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, x2
+; check: adds xzr, x0, x2
 ; nextln: adcs xzr, x1, x3
 ; nextln: b.vs label1 ; b label2
 ; check: Block 1:
@@ -583,8 +499,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %i128_bricmp_nof(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -595,9 +510,7 @@ block1:
  return
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, x2
+; check: adds xzr, x0, x2
 ; nextln: adcs xzr, x1, x3
 ; nextln: b.vc label1 ; b label2
 ; check: Block 1:
@@ -605,5 +518,4 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
--- a/cranelift/filetests/filetests/isa/aarch64/constants.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif
@@ -8,10 +8,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #255
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #255
 ; nextln: ret

 function %f() -> b16 {
@@ -20,10 +17,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #0
 ; nextln: ret

 function %f() -> i64 {
@@ -32,10 +26,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #0
 ; nextln: ret

 function %f() -> i64 {
@@ -44,10 +35,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #65535
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #65535
 ; nextln: ret

 function %f() -> i64 {
@@ -56,10 +44,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #65535, LSL #16
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #65535, LSL #16
 ; nextln: ret

 function %f() -> i64 {
@@ -68,10 +53,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #65535, LSL #32
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #65535, LSL #32
 ; nextln: ret

 function %f() -> i64 {
@@ -80,10 +62,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #65535, LSL #48
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #65535, LSL #48
 ; nextln: ret

 function %f() -> i64 {
@@ -92,10 +71,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #0
 ; nextln: ret

 function %f() -> i64 {
@@ -104,10 +80,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #65535
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #65535
 ; nextln: ret

 function %f() -> i64 {
@@ -116,10 +89,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #65535, LSL #16
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #65535, LSL #16
 ; nextln: ret

 function %f() -> i64 {
@@ -128,10 +98,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #65535, LSL #32
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #65535, LSL #32
 ; nextln: ret

 function %f() -> i64 {
@@ -140,10 +107,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #65535, LSL #48
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #65535, LSL #48
 ; nextln: ret

 function %f() -> i64 {
@@ -152,13 +116,10 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #58
+; check: movz x0, #58
 ; nextln: movk x0, #4626, LSL #16
 ; nextln: movk x0, #61603, LSL #32
 ; nextln: movk x0, #62283, LSL #48
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f() -> i64 {
@@ -167,11 +128,8 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #7924, LSL #16
+; check: movz x0, #7924, LSL #16
 ; nextln: movk x0, #4841, LSL #48
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f() -> i64 {
@@ -180,11 +138,8 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #57611, LSL #16
+; check: movn x0, #57611, LSL #16
 ; nextln: movk x0, #4841, LSL #48
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f() -> i32 {
@@ -193,10 +148,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orr x0, xzr, #4294967295
-; nextln: ldp fp, lr, [sp], #16
+; check: orr x0, xzr, #4294967295
 ; nextln: ret

 function %f() -> i32 {
@@ -205,10 +157,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn w0, #8
-; nextln: ldp fp, lr, [sp], #16
+; check: movn w0, #8
 ; nextln: ret

 function %f() -> i64 {
@@ -217,10 +166,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn w0, #8
-; nextln: ldp fp, lr, [sp], #16
+; check: movn w0, #8
 ; nextln: ret

 function %f() -> i64 {
@@ -229,8 +175,5 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #8
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #8
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
@@ -10,11 +10,8 @@ block0(v0: i8):
  return v3
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x1, #42
+; check:  movz x1, #42
 ; nextln:  add x0, x1, x0, SXTB
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret


@@ -24,10 +21,7 @@ block0(v0: i64):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x1, #0
 ; nextln: ret

 function %i128_sextend_i64(i64) -> i128 {
@@ -36,10 +30,7 @@ block0(v0: i64):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: asr x1, x0, #63
-; nextln: ldp fp, lr, [sp], #16
+; check: asr x1, x0, #63
 ; nextln: ret


@@ -49,11 +40,8 @@ block0(v0: i32):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w0, w0
+; check: mov w0, w0
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %i128_sextend_i32(i32) -> i128 {
@@ -62,11 +50,8 @@ block0(v0: i32):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
+; check: sxtw x0, w0
 ; nextln: asr x1, x0, #63
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -76,11 +61,8 @@ block0(v0: i16):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
+; check: uxth w0, w0
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %i128_sextend_i16(i16) -> i128 {
@@ -89,11 +71,8 @@ block0(v0: i16):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxth x0, w0
+; check: sxth x0, w0
 ; nextln: asr x1, x0, #63
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret


@@ -103,11 +82,8 @@ block0(v0: i8):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
+; check: uxtb w0, w0
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %i128_sextend_i8(i8) -> i128 {
@@ -116,9 +92,6 @@ block0(v0: i8):
    return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtb x0, w0
+; check: sxtb x0, w0
 ; nextln: asr x1, x0, #63
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
@@ -4,56 +4,42 @@ target aarch64

 function u0:0(i8) -> f32 {
 block0(v0: i8):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v1 = fcvt_from_uint.f32 v0
    ; check: uxtb w0, w0
    ; check: ucvtf s0, w0
    return v1
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }

 function u0:0(i8) -> f64 {
 block0(v0: i8):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v1 = fcvt_from_uint.f64 v0
    ; check: uxtb w0, w0
    ; check: ucvtf d0, w0
    return v1
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }

 function u0:0(i16) -> f32 {
 block0(v0: i16):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v1 = fcvt_from_uint.f32 v0
    ; check: uxth w0, w0
    ; check: ucvtf s0, w0
    return v1
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }

 function u0:0(i16) -> f64 {
 block0(v0: i16):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v1 = fcvt_from_uint.f64 v0
    ; check: uxth w0, w0
    ; check: ucvtf d0, w0
    return v1
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }

 function u0:0(f32) -> i8 {
 block0(v0: f32):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v1 = fcvt_to_uint.i8 v0
    ; check: fcmp s0, s0
    ; check: b.vc 8 ; udf
@@ -67,14 +53,11 @@ block0(v0: f32):
    ; check: b.mi 8 ; udf
    ; check: fcvtzu w0, s0
    return v1
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }

 function u0:0(f64) -> i8 {
 block0(v0: f64):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v1 = fcvt_to_uint.i8 v0
    ; check: fcmp d0, d0
    ; check: b.vc 8 ; udf
@@ -88,14 +71,11 @@ block0(v0: f64):
    ; check: b.mi 8 ; udf
    ; check: fcvtzu w0, d0
    return v1
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }

 function u0:0(f32) -> i16 {
 block0(v0: f32):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v1 = fcvt_to_uint.i16 v0
    ; check: fcmp s0, s0
    ; check: b.vc 8 ; udf
@@ -109,14 +89,11 @@ block0(v0: f32):
    ; check: b.mi 8 ; udf
    ; check: fcvtzu w0, s0
    return v1
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }

 function u0:0(f64) -> i16 {
 block0(v0: f64):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
    v1 = fcvt_to_uint.i16 v0
    ; check: fcmp d0, d0
    ; check: b.vc 8 ; udf
@@ -130,6 +107,5 @@ block0(v0: f64):
    ; check: b.mi 8 ; udf
    ; check: fcvtzu w0, d0
    return v1
-    ; check: ldp fp, lr, [sp], #16
    ; check: ret
 }
--- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
@@ -8,10 +8,7 @@ block0(v0: f32, v1: f32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fadd s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fadd s0, s0, s1
 ; nextln:  ret

 function %f2(f64, f64) -> f64 {
@@ -20,10 +17,7 @@ block0(v0: f64, v1: f64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fadd d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fadd d0, d0, d1
 ; nextln:  ret

 function %f3(f32, f32) -> f32 {
@@ -32,10 +26,7 @@ block0(v0: f32, v1: f32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fsub s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fsub s0, s0, s1
 ; nextln:  ret

 function %f4(f64, f64) -> f64 {
@@ -44,10 +35,7 @@ block0(v0: f64, v1: f64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fsub d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fsub d0, d0, d1
 ; nextln:  ret

 function %f5(f32, f32) -> f32 {
@@ -56,10 +44,7 @@ block0(v0: f32, v1: f32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmul s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmul s0, s0, s1
 ; nextln:  ret

 function %f6(f64, f64) -> f64 {
@@ -68,10 +53,7 @@ block0(v0: f64, v1: f64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmul d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmul d0, d0, d1
 ; nextln:  ret

 function %f7(f32, f32) -> f32 {
@@ -80,10 +62,7 @@ block0(v0: f32, v1: f32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fdiv s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fdiv s0, s0, s1
 ; nextln:  ret

 function %f8(f64, f64) -> f64 {
@@ -92,10 +71,7 @@ block0(v0: f64, v1: f64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fdiv d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fdiv d0, d0, d1
 ; nextln:  ret

 function %f9(f32, f32) -> f32 {
@@ -104,10 +80,7 @@ block0(v0: f32, v1: f32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmin s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmin s0, s0, s1
 ; nextln:  ret

 function %f10(f64, f64) -> f64 {
@@ -116,10 +89,7 @@ block0(v0: f64, v1: f64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmin d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmin d0, d0, d1
 ; nextln:  ret

 function %f11(f32, f32) -> f32 {
@@ -128,10 +98,7 @@ block0(v0: f32, v1: f32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmax s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmax s0, s0, s1
 ; nextln:  ret

 function %f12(f64, f64) -> f64 {
@@ -140,10 +107,7 @@ block0(v0: f64, v1: f64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmax d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmax d0, d0, d1
 ; nextln:  ret

 function %f13(f32) -> f32 {
@@ -152,10 +116,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fsqrt s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fsqrt s0, s0
 ; nextln:  ret

 function %f15(f64) -> f64 {
@@ -164,10 +125,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fsqrt d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fsqrt d0, d0
 ; nextln:  ret

 function %f16(f32) -> f32 {
@@ -176,10 +134,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fabs s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fabs s0, s0
 ; nextln:  ret

 function %f17(f64) -> f64 {
@@ -188,10 +143,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fabs d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fabs d0, d0
 ; nextln:  ret

 function %f18(f32) -> f32 {
@@ -200,10 +152,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fneg s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fneg s0, s0
 ; nextln:  ret

 function %f19(f64) -> f64 {
@@ -212,10 +161,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fneg d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fneg d0, d0
 ; nextln:  ret

 function %f20(f32) -> f64 {
@@ -224,10 +170,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcvt d0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fcvt d0, s0
 ; nextln:  ret

 function %f21(f64) -> f32 {
@@ -236,10 +179,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcvt s0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fcvt s0, d0
 ; nextln:  ret

 function %f22(f32) -> f32 {
@@ -248,10 +188,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintp s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintp s0, s0
 ; nextln:  ret

 function %f22(f64) -> f64 {
@@ -260,10 +197,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintp d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintp d0, d0
 ; nextln:  ret

 function %f23(f32) -> f32 {
@@ -272,10 +206,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintm s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintm s0, s0
 ; nextln:  ret

 function %f24(f64) -> f64 {
@@ -284,10 +215,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintm d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintm d0, d0
 ; nextln:  ret

 function %f25(f32) -> f32 {
@@ -296,10 +224,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintz s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintz s0, s0
 ; nextln:  ret

 function %f26(f64) -> f64 {
@@ -308,10 +233,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintz d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintz d0, d0
 ; nextln:  ret

 function %f27(f32) -> f32 {
@@ -320,10 +242,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintn s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintn s0, s0
 ; nextln:  ret

 function %f28(f64) -> f64 {
@@ -332,10 +251,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintn d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintn d0, d0
 ; nextln:  ret

 function %f29(f32, f32, f32) -> f32 {
@@ -344,10 +260,7 @@ block0(v0: f32, v1: f32, v2: f32):
  return v3
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmadd s0, s0, s1, s2
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmadd s0, s0, s1, s2
 ; nextln:  ret

 function %f30(f64, f64, f64) -> f64 {
@@ -356,9 +269,8 @@ block0(v0: f64, v1: f64, v2: f64):
  return v3
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmadd d0, d0, d1, d2
+; check:  fmadd d0, d0, d1, d2
+; nextln:  ret

 function %f31(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -366,11 +278,8 @@ block0(v0: f32, v1: f32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ushr v1.2s, v1.2s, #31
+; check:  ushr v1.2s, v1.2s, #31
 ; nextln:  sli v0.2s, v1.2s, #31
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f32(f64, f64) -> f64 {
@@ -379,11 +288,8 @@ block0(v0: f64, v1: f64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ushr d1, d1, #63
+; check:  ushr d1, d1, #63
 ; nextln:  sli d0, d1, #63
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f33(f32) -> i32 {
@@ -392,9 +298,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp s0, s0
+; check:  fcmp s0, s0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #49024, LSL #16
 ; nextln:  fmov d1, x0
@@ -405,7 +309,6 @@ block0(v0: f32):
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzu w0, s0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f34(f32) -> i32 {
@@ -414,9 +317,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp s0, s0
+; check:  fcmp s0, s0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #52992, LSL #16
 ; nextln:  fmov d1, x0
@@ -427,7 +328,6 @@ block0(v0: f32):
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzs w0, s0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f35(f32) -> i64 {
@@ -436,9 +336,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp s0, s0
+; check:  fcmp s0, s0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #49024, LSL #16
 ; nextln:  fmov d1, x0
@@ -449,7 +347,6 @@ block0(v0: f32):
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzu x0, s0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f36(f32) -> i64 {
@@ -458,9 +355,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp s0, s0
+; check:  fcmp s0, s0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #57088, LSL #16
 ; nextln:  fmov d1, x0
@@ -471,7 +366,6 @@ block0(v0: f32):
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzs x0, s0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f37(f64) -> i32 {
@@ -480,9 +374,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp d0, d0
+; check:  fcmp d0, d0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #49136, LSL #48
 ; nextln:  fmov d1, x0
@@ -493,7 +385,6 @@ block0(v0: f64):
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzu w0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f38(f64) -> i32 {
@@ -502,9 +393,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp d0, d0
+; check:  fcmp d0, d0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -2147483649
 ; nextln:  fcmp d0, d1
@@ -514,7 +403,6 @@ block0(v0: f64):
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzs w0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f39(f64) -> i64 {
@@ -523,9 +411,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp d0, d0
+; check:  fcmp d0, d0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #49136, LSL #48
 ; nextln:  fmov d1, x0
@@ -536,7 +422,6 @@ block0(v0: f64):
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzu x0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f40(f64) -> i64 {
@@ -545,9 +430,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp d0, d0
+; check:  fcmp d0, d0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #50144, LSL #48
 ; nextln:  fmov d1, x0
@@ -558,7 +441,6 @@ block0(v0: f64):
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzs x0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f41(i32) -> f32 {
@@ -567,10 +449,7 @@ block0(v0: i32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ucvtf s0, w0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ucvtf s0, w0
 ; nextln:  ret

 function %f42(i32) -> f32 {
@@ -579,10 +458,7 @@ block0(v0: i32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  scvtf s0, w0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  scvtf s0, w0
 ; nextln:  ret

 function %f43(i64) -> f32 {
@@ -591,10 +467,7 @@ block0(v0: i64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ucvtf s0, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ucvtf s0, x0
 ; nextln:  ret

 function %f44(i64) -> f32 {
@@ -603,10 +476,7 @@ block0(v0: i64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  scvtf s0, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  scvtf s0, x0
 ; nextln:  ret

 function %f45(i32) -> f64 {
@@ -615,10 +485,7 @@ block0(v0: i32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ucvtf d0, w0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ucvtf d0, w0
 ; nextln:  ret

 function %f46(i32) -> f64 {
@@ -627,10 +494,7 @@ block0(v0: i32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  scvtf d0, w0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  scvtf d0, w0
 ; nextln:  ret

 function %f47(i64) -> f64 {
@@ -639,10 +503,7 @@ block0(v0: i64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ucvtf d0, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ucvtf d0, x0
 ; nextln:  ret

 function %f48(i64) -> f64 {
@@ -651,10 +512,7 @@ block0(v0: i64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  scvtf d0, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  scvtf d0, x0
 ; nextln:  ret

 function %f49(f32) -> i32 {
@@ -663,9 +521,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #20352, LSL #16
+; check:  movz x0, #20352, LSL #16
 ; nextln: fmov d1, x0
 ; nextln: fmin s2, s0, s1
 ; nextln: movi v1.2s, #0
@@ -673,7 +529,6 @@ block0(v0: f32):
 ; nextln: fcmp s0, s0
 ; nextln: fcsel s0, s1, s2, ne
 ; nextln: fcvtzu w0, s0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f50(f32) -> i32 {
@@ -682,9 +537,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #20224, LSL #16
+; check:  movz x0, #20224, LSL #16
 ; nextln: fmov d1, x0
 ; nextln: fmin s1, s0, s1
 ; nextln: movz x0, #52992, LSL #16
@@ -694,7 +547,6 @@ block0(v0: f32):
 ; nextln: fcmp s0, s0
 ; nextln: fcsel s0, s2, s1, ne
 ; nextln: fcvtzs w0, s0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f51(f32) -> i64 {
@@ -703,9 +555,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #24448, LSL #16
+; check:  movz x0, #24448, LSL #16
 ; nextln: fmov d1, x0
 ; nextln: fmin s2, s0, s1
 ; nextln: movi v1.2s, #0
@@ -713,7 +563,6 @@ block0(v0: f32):
 ; nextln: fcmp s0, s0
 ; nextln: fcsel s0, s1, s2, ne
 ; nextln: fcvtzu x0, s0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f52(f32) -> i64 {
@@ -722,9 +571,7 @@ block0(v0: f32):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #24320, LSL #16
+; check:  movz x0, #24320, LSL #16
 ; nextln: fmov d1, x0
 ; nextln: fmin s1, s0, s1
 ; nextln: movz x0, #57088, LSL #16
@@ -734,7 +581,6 @@ block0(v0: f32):
 ; nextln: fcmp s0, s0
 ; nextln: fcsel s0, s2, s1, ne
 ; nextln: fcvtzs x0, s0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f53(f64) -> i32 {
@@ -743,16 +589,13 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967295
+; check:  ldr d1, pc+8 ; b 12 ; data.f64 4294967295
 ; nextln: fmin d2, d0, d1
 ; nextln: movi v1.2s, #0
 ; nextln: fmax d2, d2, d1
 ; nextln: fcmp d0, d0
 ; nextln: fcsel d0, d1, d2, ne
 ; nextln: fcvtzu w0, d0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f54(f64) -> i32 {
@@ -761,9 +604,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: ldr d1, pc+8 ; b 12 ; data.f64 2147483647
+; check:  ldr d1, pc+8 ; b 12 ; data.f64 2147483647
 ; nextln: fmin d1, d0, d1
 ; nextln: movz x0, #49632, LSL #48
 ; nextln: fmov d2, x0
@@ -772,7 +613,6 @@ block0(v0: f64):
 ; nextln: fcmp d0, d0
 ; nextln: fcsel d0, d2, d1, ne
 ; nextln: fcvtzs w0, d0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f55(f64) -> i64 {
@@ -781,9 +621,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #17392, LSL #48
+; check:  movz x0, #17392, LSL #48
 ; nextln: fmov d1, x0
 ; nextln: fmin d2, d0, d1
 ; nextln: movi v1.2s, #0
@@ -791,7 +629,6 @@ block0(v0: f64):
 ; nextln: fcmp d0, d0
 ; nextln: fcsel d0, d1, d2, ne
 ; nextln: fcvtzu x0, d0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f56(f64) -> i64 {
@@ -800,9 +637,7 @@ block0(v0: f64):
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #17376, LSL #48
+; check:  movz x0, #17376, LSL #48
 ; nextln: fmov d1, x0
 ; nextln: fmin d1, d0, d1
 ; nextln: movz x0, #50144, LSL #48
@@ -812,5 +647,4 @@ block0(v0: f64):
 ; nextln: fcmp d0, d0
 ; nextln: fcsel d0, d2, d1, ne
 ; nextln: fcvtzs x0, d0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
@@ -14,9 +14,7 @@ block0(v0: i64, v1: i32):
 }

 ; check: Block 0:
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w2, w1
+; check: mov w2, w1
 ; nextln: ldr x3, [x0]
 ; nextln: mov x3, x3
 ; nextln: subs xzr, x2, x3
@@ -26,7 +24,6 @@ block0(v0: i64, v1: i32):
 ; nextln: subs xzr, x2, x3
 ; nextln: movz x1, #0
 ; nextln: csel x0, x1, x0, hi
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 ; check: Block 2:
 ; check: udf
@@ -41,9 +38,7 @@ block0(v0: i64, v1: i32):
 }

 ; check: Block 0:
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w2, w1
+; check: mov w2, w1
 ; nextln: subs xzr, x2, #65536
 ; nextln: b.ls label1 ; b label2
 ; check: Block 1:
@@ -51,7 +46,6 @@ block0(v0: i64, v1: i32):
 ; nextln: subs xzr, x2, #65536
 ; nextln: movz x1, #0
 ; nextln: csel x0, x1, x0, hi
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 ; check: Block 2:
 ; check: udf
--- a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
@@ -18,15 +18,12 @@ block0:
 ; nextln: Entry block: 0
 ; nextln: Block 0:
 ; nextln:   (original IR block: block0)
-; nextln:   (instruction range: 0 .. 10)
-; nextln:   Inst 0:   stp fp, lr, [sp, #-16]!
-; nextln:   Inst 1:   mov fp, sp
-; nextln:   Inst 2:   movz x0, #56780
-; nextln:   Inst 3:   uxth w0, w0
-; nextln:   Inst 4:   movz x1, #56780
-; nextln:   Inst 5:   subs wzr, w0, w1, UXTH
-; nextln:   Inst 6:   cset x0, ne
-; nextln:   Inst 7:   and w0, w0, #1
-; nextln:   Inst 8:   ldp fp, lr, [sp], #16
-; nextln:   Inst 9:   ret
+; nextln:   (instruction range: 0 .. 7)
+; nextln:   Inst 0:   movz x0, #56780
+; nextln:   Inst 1:   uxth w0, w0
+; nextln:   Inst 2:   movz x1, #56780
+; nextln:   Inst 3:   subs wzr, w0, w1, UXTH
+; nextln:   Inst 4:   cset x0, ne
+; nextln:   Inst 5:   and w0, w0, #1
+; nextln:   Inst 6:   ret
 ; nextln: }}
--- a/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif
@@ -10,9 +10,6 @@ block1:
  return v0, v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #1
+; check:  movz x0, #1
 ; nextln:  movz x1, #2
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
--- a/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif
@@ -8,10 +8,7 @@ block0(v0: i8, v1: i8):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add w0, w0, w1
-; nextln: ldp fp, lr, [sp], #16
+; check: add w0, w0, w1
 ; nextln: ret

 function %add16(i16, i16) -> i16 {
@@ -20,10 +17,7 @@ block0(v0: i16, v1: i16):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add w0, w0, w1
-; nextln: ldp fp, lr, [sp], #16
+; check: add w0, w0, w1
 ; nextln: ret

 function %add32(i32, i32) -> i32 {
@@ -32,10 +26,7 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add w0, w0, w1
-; nextln: ldp fp, lr, [sp], #16
+; check: add w0, w0, w1
 ; nextln: ret

 function %add32_8(i32, i8) -> i32 {
@@ -45,10 +36,7 @@ block0(v0: i32, v1: i8):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add w0, w0, w1, SXTB
-; nextln: ldp fp, lr, [sp], #16
+; check: add w0, w0, w1, SXTB
 ; nextln: ret

 function %add64_32(i64, i32) -> i64 {
@@ -58,8 +46,5 @@ block0(v0: i64, v1: i32):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, x1, SXTW
-; nextln: ldp fp, lr, [sp], #16
+; check: add x0, x0, x1, SXTW
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/reduce.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/reduce.clif
@@ -7,37 +7,25 @@ block0(v0: i128):
  v1 = ireduce.i64 v0
  return v1
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %ireduce_128_32(i128) -> i32 {
 block0(v0: i128):
  v1 = ireduce.i32 v0
  return v1
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %ireduce_128_16(i128) -> i16 {
 block0(v0: i128):
  v1 = ireduce.i16 v0
  return v1
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %ireduce_128_8(i128) -> i8 {
 block0(v0: i128):
  v1 = ireduce.i8 v0
  return v1
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
--- a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
@@ -7,10 +7,7 @@ block0(v0: r64):
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret

 function %f1(r64) -> b1 {
 block0(v0: r64):
@@ -18,11 +15,8 @@ block0(v0: r64):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, #0
+; check: subs xzr, x0, #0
 ; nextln: cset x0, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f2(r64) -> b1 {
@@ -31,11 +25,8 @@ block0(v0: r64):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, #1
+; check: adds xzr, x0, #1
 ; nextln: cset x0, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f3() -> r64 {
@@ -44,10 +35,7 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #0
 ; nextln: ret

 function %f4(r64, r64) -> r64, r64, r64 {
--- a/cranelift/filetests/filetests/isa/aarch64/shift-op.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/shift-op.clif
@@ -10,10 +10,7 @@ block0(v0: i64):
  return v3
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, x0, LSL 3
-; nextln: ldp fp, lr, [sp], #16
+; check: add x0, x0, x0, LSL 3
 ; nextln: ret

 function %f(i32) -> i32 {
@@ -23,8 +20,5 @@ block0(v0: i32):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: lsl w0, w0, #21
-; nextln: ldp fp, lr, [sp], #16
+; check: lsl w0, w0, #21
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
@@ -12,9 +12,7 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x3, #128
+; check: movz x3, #128
 ; nextln: sub x5, x3, x2
 ; nextln: orn w4, wzr, w2
 ; nextln: lsl x6, x1, #1
@@ -36,7 +34,6 @@ block0(v0: i128, v1: i128):
 ; nextln: csel x0, xzr, x0, ne
 ; nextln: orr x0, x0, x4
 ; nextln: orr x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f0(i64, i64) -> i64 {
@@ -45,10 +42,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror x0, x0, x1
 ; nextln:  ret

 function %f1(i32, i32) -> i32 {
@@ -57,10 +51,7 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror w0, w0, w1
 ; nextln:  ret

 function %f2(i16, i16) -> i16 {
@@ -69,16 +60,13 @@ block0(v0: i16, v1: i16):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxth w0, w0
+; check:  uxth w0, w0
 ; nextln:  and w1, w1, #15
 ; nextln:  sub w2, w1, #16
 ; nextln:  sub w2, wzr, w2
 ; nextln:  lsr w1, w0, w1
 ; nextln:  lsl w0, w0, w2
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f3(i8, i8) -> i8 {
@@ -87,16 +75,13 @@ block0(v0: i8, v1: i8):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxtb w0, w0
+; check:  uxtb w0, w0
 ; nextln:  and w1, w1, #7
 ; nextln:  sub w2, w1, #8
 ; nextln:  sub w2, wzr, w2
 ; nextln:  lsr w1, w0, w1
 ; nextln:  lsl w0, w0, w2
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -109,9 +94,7 @@ block0(v0: i128, v1: i128):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x3, #128
+; check: movz x3, #128
 ; nextln: sub x5, x3, x2
 ; nextln: orn w4, wzr, w2
 ; nextln: lsr x6, x0, #1
@@ -136,7 +119,6 @@ block0(v0: i128, v1: i128):
 ; nextln: mov x2, x0
 ; nextln: mov x0, x1
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f4(i64, i64) -> i64 {
@@ -145,11 +127,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub x1, xzr, x1
+; check:  sub x1, xzr, x1
 ; nextln:  ror x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f5(i32, i32) -> i32 {
@@ -158,11 +137,8 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub w1, wzr, w1
+; check:  sub w1, wzr, w1
 ; nextln:  ror w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f6(i16, i16) -> i16 {
@@ -171,9 +147,7 @@ block0(v0: i16, v1: i16):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxth w0, w0
+; check:  uxth w0, w0
 ; nextln:  sub w1, wzr, w1
 ; nextln:  and w1, w1, #15
 ; nextln:  sub w2, w1, #16
@@ -181,7 +155,6 @@ block0(v0: i16, v1: i16):
 ; nextln:  lsr w1, w0, w1
 ; nextln:  lsl w0, w0, w2
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f7(i8, i8) -> i8 {
@@ -190,9 +163,7 @@ block0(v0: i8, v1: i8):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxtb w0, w0
+; check:  uxtb w0, w0
 ; nextln:  sub w1, wzr, w1
 ; nextln:  and w1, w1, #7
 ; nextln:  sub w2, w1, #8
@@ -200,7 +171,6 @@ block0(v0: i8, v1: i8):
 ; nextln:  lsr w1, w0, w1
 ; nextln:  lsl w0, w0, w2
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -213,10 +183,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsr x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsr x0, x0, x1
 ; nextln:  ret

 function %f9(i32, i32) -> i32 {
@@ -225,10 +192,7 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsr w0, w0, w1
 ; nextln:  ret

 function %f10(i16, i16) -> i16 {
@@ -240,6 +204,7 @@ block0(v0: i16, v1: i16):
 ; check:  uxth w0, w0
 ; nextln:  and w1, w1, #15
 ; nextln:  lsr w0, w0, w1
+; nextln:  ret

 function %f11(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -250,6 +215,7 @@ block0(v0: i8, v1: i8):
 ; check:  uxtb w0, w0
 ; nextln:  and w1, w1, #7
 ; nextln:  lsr w0, w0, w1
+; nextln:  ret

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; LSL, variable
@@ -261,10 +227,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsl x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsl x0, x0, x1
 ; nextln:  ret

 function %f13(i32, i32) -> i32 {
@@ -273,10 +236,7 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsl w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsl w0, w0, w1
 ; nextln:  ret

 function %f14(i16, i16) -> i16 {
@@ -287,6 +247,7 @@ block0(v0: i16, v1: i16):

 ; check:  and w1, w1, #15
 ; nextln:  lsl w0, w0, w1
+; nextln:  ret

 function %f15(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -296,6 +257,7 @@ block0(v0: i8, v1: i8):

 ; check:  and w1, w1, #7
 ; nextln:  lsl w0, w0, w1
+; nextln:  ret

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; ASR, variable
@@ -307,10 +269,7 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  asr x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  asr x0, x0, x1
 ; nextln:  ret

 function %f17(i32, i32) -> i32 {
@@ -319,10 +278,7 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  asr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  asr w0, w0, w1
 ; nextln:  ret

 function %f18(i16, i16) -> i16 {
@@ -333,6 +289,7 @@ block0(v0: i16, v1: i16):

 ; check:  and w1, w1, #15
 ; nextln:  asr w0, w0, w1
+; nextln:  ret

 function %f19(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -342,6 +299,7 @@ block0(v0: i8, v1: i8):

 ; check:  and w1, w1, #7
 ; nextln:  asr w0, w0, w1
+; nextln:  ret

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; immediate forms
@@ -354,10 +312,7 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror x0, x0, #17
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror x0, x0, #17
 ; nextln:  ret

 function %f21(i64) -> i64 {
@@ -367,10 +322,7 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror x0, x0, #47
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror x0, x0, #47
 ; nextln:  ret

 function %f22(i32) -> i32 {
@@ -380,10 +332,7 @@ block0(v0: i32):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror w0, w0, #15
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror w0, w0, #15
 ; nextln:  ret

 function %f23(i16) -> i16 {
@@ -393,13 +342,10 @@ block0(v0: i16):
  return v2
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
+; check: uxth w0, w0
 ; nextln: lsr w1, w0, #6
 ; nextln: lsl w0, w0, #10
 ; nextln: orr w0, w0, w1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %f24(i8) -> i8 {
@@ -409,13 +355,10 @@ block0(v0: i8):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxtb w0, w0
+; check:  uxtb w0, w0
 ; nextln:  lsr w1, w0, #5
 ; nextln:  lsl w0, w0, #3
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f25(i64) -> i64 {
@@ -425,10 +368,7 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsr x0, x0, #17
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsr x0, x0, #17
 ; nextln:  ret

 function %f26(i64) -> i64 {
@@ -438,10 +378,7 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  asr x0, x0, #17
-; nextln:  ldp fp, lr, [sp], #16
+; check:  asr x0, x0, #17
 ; nextln:  ret

 function %f27(i64) -> i64 {
@@ -451,8 +388,5 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsl x0, x0, #17
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsl x0, x0, #17
 ; nextln:  ret
--- a/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif
@@ -12,7 +12,6 @@ block0(v0: i8x16, v1: i8x16):

 ; check-not: sxtl
 ; check: smull v0.8h, v0.8b, v1.8b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn2(i8x16, i8x16) -> i16x8 {
@@ -25,7 +24,6 @@ block0(v0: i8x16, v1: i8x16):

 ; check-not: sxtl
 ; check: smull2 v0.8h, v0.16b, v1.16b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn3(i16x8, i16x8) -> i32x4 {
@@ -38,7 +36,6 @@ block0(v0: i16x8, v1: i16x8):

 ; check-not: sxtl
 ; check: smull v0.4s, v0.4h, v1.4h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn4(i16x8, i16x8) -> i32x4 {
@@ -51,7 +48,6 @@ block0(v0: i16x8, v1: i16x8):

 ; check-not: sxtl
 ; check: smull2 v0.4s, v0.8h, v1.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn5(i32x4, i32x4) -> i64x2 {
@@ -64,7 +60,6 @@ block0(v0: i32x4, v1: i32x4):

 ; check-not: sxtl
 ; check: smull v0.2d, v0.2s, v1.2s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn6(i32x4, i32x4) -> i64x2 {
@@ -77,7 +72,6 @@ block0(v0: i32x4, v1: i32x4):

 ; check-not: sxtl
 ; check: smull2 v0.2d, v0.4s, v1.4s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn7(i8x16, i8x16) -> i16x8 {
@@ -90,7 +84,6 @@ block0(v0: i8x16, v1: i8x16):

 ; check-not: uxtl
 ; check: umull v0.8h, v0.8b, v1.8b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn8(i8x16, i8x16) -> i16x8 {
@@ -103,7 +96,6 @@ block0(v0: i8x16, v1: i8x16):

 ; check-not: uxtl
 ; check: umull2 v0.8h, v0.16b, v1.16b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn9(i16x8, i16x8) -> i32x4 {
@@ -116,7 +108,6 @@ block0(v0: i16x8, v1: i16x8):

 ; check-not: uxtl
 ; check: umull v0.4s, v0.4h, v1.4h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn10(i16x8, i16x8) -> i32x4 {
@@ -129,7 +120,6 @@ block0(v0: i16x8, v1: i16x8):

 ; check-not: uxtl
 ; check: umull2 v0.4s, v0.8h, v1.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn11(i32x4, i32x4) -> i64x2 {
@@ -142,7 +132,6 @@ block0(v0: i32x4, v1: i32x4):

 ; check-not: uxtl
 ; check: umull v0.2d, v0.2s, v1.2s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn12(i32x4, i32x4) -> i64x2 {
@@ -155,5 +144,4 @@ block0(v0: i32x4, v1: i32x4):

 ; check-not: uxtl2
 ; check: umull2 v0.2d, v0.4s, v1.4s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
@@ -11,10 +11,7 @@ block0(v0: i8x16):
  return v3
 }

-; check: stp fp
-; nextln: mov fp, sp
-; nextln: saddlp v0.8h, v0.16b
-; nextln: ldp fp, lr, [sp], #16
+; check: saddlp v0.8h, v0.16b
 ; nextln: ret

 function %fn2(i8x16) -> i16x8 {
@@ -25,10 +22,7 @@ block0(v0: i8x16):
  return v3
 }

-; check: stp fp
-; nextln: mov fp, sp
-; nextln: uaddlp v0.8h, v0.16b
-; nextln: ldp fp, lr, [sp], #16
+; check: uaddlp v0.8h, v0.16b
 ; nextln: ret

 function %fn3(i16x8) -> i32x4 {
@@ -39,10 +33,7 @@ block0(v0: i16x8):
  return v3
 }

-; check: stp fp
-; nextln: mov fp, sp
-; nextln: saddlp v0.4s, v0.8h
-; nextln: ldp fp, lr, [sp], #16
+; check: saddlp v0.4s, v0.8h
 ; nextln: ret

 function %fn4(i16x8) -> i32x4 {
@@ -53,10 +44,7 @@ block0(v0: i16x8):
  return v3
 }

-; check: stp fp
-; nextln: mov fp, sp
-; nextln: uaddlp v0.4s, v0.8h
-; nextln: ldp fp, lr, [sp], #16
+; check: uaddlp v0.4s, v0.8h
 ; nextln: ret

 function %fn5(i8x16, i8x16) -> i16x8 {
@@ -67,12 +55,9 @@ block0(v0: i8x16, v1: i8x16):
  return v4
 }

-; check: stp fp
-; nextln: mov fp, sp
-; nextln: sxtl v0.8h, v0.8b
+; check: sxtl v0.8h, v0.8b
 ; nextln: sxtl2 v1.8h, v1.16b
 ; nextln: addp v0.8h, v0.8h, v1.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn6(i8x16, i8x16) -> i16x8 {
@@ -83,12 +68,9 @@ block0(v0: i8x16, v1: i8x16):
  return v4
 }

-; check: stp fp
-; nextln: mov fp, sp
-; nextln: uxtl v0.8h, v0.8b
+; check: uxtl v0.8h, v0.8b
 ; nextln: uxtl2 v1.8h, v1.16b
 ; nextln: addp v0.8h, v0.8h, v1.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn7(i8x16) -> i16x8 {
@@ -99,12 +81,9 @@ block0(v0: i8x16):
  return v3
 }

-; check: stp fp
-; nextln: mov fp, sp
-; nextln: uxtl v1.8h, v0.8b
+; check: uxtl v1.8h, v0.8b
 ; nextln: sxtl2 v0.8h, v0.16b
 ; nextln: addp v0.8h, v1.8h, v0.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret

 function %fn8(i8x16) -> i16x8 {
@@ -115,10 +94,7 @@ block0(v0: i8x16):
  return v3
 }

-; check: stp fp
-; nextln: mov fp, sp
-; nextln: sxtl v1.8h, v0.8b
+; check: sxtl v1.8h, v0.8b
 ; nextln: uxtl2 v0.8h, v0.16b
 ; nextln: addp v0.8h, v1.8h, v0.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/simd.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif
@@ -9,12 +9,9 @@ block0:
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #1
+; check:  movz x0, #1
 ; nextln:  movk x0, #1, LSL #48
 ; nextln:  dup v0.2d, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f2() -> i16x8 {
@@ -25,11 +22,8 @@ block0:
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #42679
+; check:  movz x0, #42679
 ; nextln:  dup v0.8h, w0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f3() -> b8x16 {
@@ -40,10 +34,7 @@ block0:
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movi v0.16b, #255
-; nextln:  ldp fp, lr, [sp], #16
+; check:  movi v0.16b, #255
 ; nextln:  ret

 function %f4(i32, i8x16, i8x16) -> i8x16 {
@@ -52,11 +43,8 @@ block0(v0: i32, v1: i8x16, v2: i8x16):
   return v3
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  subs wzr, w0, wzr
+; check:  subs wzr, w0, wzr
 ; nextln:  vcsel v0.16b, v0.16b, v1.16b, ne (if-then-else diamond)
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f5(i64) -> i8x16 {
@@ -66,10 +54,7 @@ block0(v0: i64):
  return v2
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ld1r { v0.16b }, [x0]
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ld1r { v0.16b }, [x0]
 ; nextln:  ret

 function %f6(i64, i64) -> i8x16, i8x16 {
@@ -81,11 +66,8 @@ block0(v0: i64, v1: i64):
  return v4, v5
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ld1r { v0.16b }, [x0]
+; check:  ld1r { v0.16b }, [x0]
 ; nextln:  ld1r { v1.16b }, [x1]
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f7(i64, i64) -> i8x16, i8x16 {
@@ -97,12 +79,9 @@ block0(v0: i64, v1: i64):
  return v4, v5
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ldrb w0, [x0]
+; check:  ldrb w0, [x0]
 ; nextln:  ld1r { v0.16b }, [x1]
 ; nextln:  dup v1.16b, w0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f8(i64, i64) -> i8x16, i8x16 {
@@ -113,12 +92,9 @@ block0(v0: i64, v1: i64):
  return v3, v4
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ldrb w0, [x0]
+; check:  ldrb w0, [x0]
 ; nextln:  dup v0.16b, w0
 ; nextln:  dup v1.16b, w0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f9() -> i32x2 {
@@ -128,11 +104,8 @@ block0:
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movi v0.2d, #18374687579166474495
+; check:  movi v0.2d, #18374687579166474495
 ; nextln:  fmov d0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f10() -> i32x4 {
@@ -142,10 +115,7 @@ block0:
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mvni v0.4s, #15, MSL #16
-; nextln:  ldp fp, lr, [sp], #16
+; check:  mvni v0.4s, #15, MSL #16
 ; nextln:  ret

 function %f11() -> f32x4 {
@@ -155,8 +125,5 @@ block0:
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmov v0.4s, #1.3125
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmov v0.4s, #1.3125
 ; nextln:  ret
--- a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif
@@ -9,12 +9,9 @@ block0:
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #1
+; check:  movz x0, #1
 ; nextln:  movk x0, #1, LSL #48
 ; nextln:  fmov d0, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

 function %f2() -> i32x4 {
@@ -24,9 +21,6 @@ block0:
  return v1
 }

-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #42679
+; check:  movz x0, #42679
 ; nextln:  fmov s0, w0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
--- a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
@@ -12,10 +12,7 @@ block0(v0: i64):
    return
 }

-; check:      stp fp, lr, [sp, #-16]!
-; nextln:     mov fp, sp
-; nextln:     ldp fp, lr, [sp], #16
-; nextln:     ret
+; check:      ret

 function %stack_limit_gv_leaf_zero(i64 vmctx) {
    gv0 = vmctx
@@ -26,10 +23,7 @@ block0(v0: i64):
    return
 }

-; check:      stp fp, lr, [sp, #-16]!
-; nextln:     mov fp, sp
-; nextln:     ldp fp, lr, [sp], #16
-; nextln:     ret
+; check:      ret


 function %stack_limit_call_zero(i64 stack_limit) {
--- a/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif
@@ -10,8 +10,5 @@ block0:
  return v0
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr x0, 8 ; b 12 ; data
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr x0, 8 ; b 12 ; data
 ; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
@@ -8,10 +8,7 @@ block0(v0: i8):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxtb w0, w0
 ; nextln: ret

 function %f_u_8_32(i8) -> i32 {
@@ -20,10 +17,7 @@ block0(v0: i8):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxtb w0, w0
 ; nextln: ret

 function %f_u_8_16(i8) -> i16 {
@@ -32,10 +26,7 @@ block0(v0: i8):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxtb w0, w0
 ; nextln: ret

 function %f_s_8_64(i8) -> i64 {
@@ -44,10 +35,7 @@ block0(v0: i8):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtb x0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxtb x0, w0
 ; nextln: ret

 function %f_s_8_32(i8) -> i32 {
@@ -56,10 +44,7 @@ block0(v0: i8):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxtb w0, w0
 ; nextln: ret

 function %f_s_8_16(i8) -> i16 {
@@ -68,10 +53,7 @@ block0(v0: i8):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxtb w0, w0
 ; nextln: ret

 function %f_u_16_64(i16) -> i64 {
@@ -80,10 +62,7 @@ block0(v0: i16):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxth w0, w0
 ; nextln: ret

 function %f_u_16_32(i16) -> i32 {
@@ -92,10 +71,7 @@ block0(v0: i16):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxth w0, w0
 ; nextln: ret

 function %f_s_16_64(i16) -> i64 {
@@ -104,10 +80,7 @@ block0(v0: i16):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxth x0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxth x0, w0
 ; nextln: ret

 function %f_s_16_32(i16) -> i32 {
@@ -116,10 +89,7 @@ block0(v0: i16):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxth w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxth w0, w0
 ; nextln: ret

 function %f_u_32_64(i32) -> i64 {
@@ -128,10 +98,7 @@ block0(v0: i32):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: mov w0, w0
 ; nextln: ret

 function %f_s_32_64(i32) -> i64 {
@@ -140,8 +107,5 @@ block0(v0: i32):
  return v1
 }

-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxtw x0, w0
 ; nextln: ret