Implement stack limit checks for AArch64 (#1573)

This commit implements the stack limit checks in cranelift for the AArch64 backend. This gets the `stack_limit` argument purpose as well as a function's global `stack_limit` directive working for the AArch64 backend. I've tested this locally on some hardware and in an emulator and it looks to be working for basic tests, but I've never really done AArch64 before so some scrutiny on the instructions would be most welcome!
2020-04-24 15:01:57 -05:00
parent c756078987
commit 74eda8090c
11 changed files with 490 additions and 42 deletions
--- a/build.rs
+++ b/build.rs
@@ -171,7 +171,6 @@ fn write_testsuite_tests(

 /// Ignore tests that aren't supported yet.
 fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
-    let target = env::var("TARGET").unwrap();
    match strategy {
        #[cfg(feature = "lightbeam")]
        "Lightbeam" => match (testsuite, testname) {
@@ -209,16 +208,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                return true;
            }

-            // FIXME(#1569) stack protection isn't implemented yet and these
-            // tests segfault.
-            ("spec_testsuite", "skip_stack_guard_page")
-            | ("spec_testsuite", "stack")
-            | ("misc_testsuite", "stack_overflow")
-                if target.contains("aarch64") =>
-            {
-                return true
-            }
-
            _ => {}
        },
        _ => panic!("unrecognized strategy"),
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -111,6 +111,7 @@ fn compute_arg_locs(call_conv: isa::CallConv, params: &[ir::AbiParam]) -> (Vec<A
        match &param.purpose {
            &ir::ArgumentPurpose::VMContext
            | &ir::ArgumentPurpose::Normal
+            | &ir::ArgumentPurpose::StackLimit
            | &ir::ArgumentPurpose::SignatureId => {}
            _ => panic!(
                "Unsupported argument purpose {:?} in signature: {:?}",
@@ -192,6 +193,20 @@ pub struct AArch64ABIBody {
    call_conv: isa::CallConv,
    /// The settings controlling this function's compilation.
    flags: settings::Flags,
+    /// Whether or not this function is a "leaf", meaning it calls no other
+    /// functions
+    is_leaf: bool,
+    /// If this function has a stack limit specified, then `Reg` is where the
+    /// stack limit will be located after the instructions specified have been
+    /// executed.
+    ///
+    /// Note that this is intended for insertion into the prologue, if
+    /// present. Also note that because the instructions here execute in the
+    /// prologue this happens after legalization/register allocation/etc so we
+    /// need to be extremely careful with each instruction. The instructions are
+    /// manually register-allocated and carefully only use caller-saved
+    /// registers and keep nothing live after this sequence of instructions.
+    stack_limit: Option<(Reg, Vec<Inst>)>,
 }

 fn in_int_reg(ty: ir::Type) -> bool {
@@ -209,6 +224,85 @@ fn in_vec_reg(ty: ir::Type) -> bool {
    }
 }

+/// Generates the instructions necessary for the `gv` to be materialized into a
+/// register.
+///
+/// This function will return a register that will contain the result of
+/// evaluating `gv`. It will also return any instructions necessary to calculate
+/// the value of the register.
+///
+/// Note that global values are typically lowered to instructions via the
+/// standard legalization pass. Unfortunately though prologue generation happens
+/// so late in the pipeline that we can't use these legalization passes to
+/// generate the instructions for `gv`. As a result we duplicate some lowering
+/// of `gv` here and support only some global values. This is similar to what
+/// the x86 backend does for now, and hopefully this can be somewhat cleaned up
+/// in the future too!
+///
+/// Also note that this function will make use of `writable_spilltmp_reg()` as a
+/// temporary register to store values in if necessary. Currently after we write
+/// to this register there's guaranteed to be no spilled values between where
+/// it's used, because we're not participating in register allocation anyway!
+fn gen_stack_limit(f: &ir::Function, abi: &ABISig, gv: ir::GlobalValue) -> (Reg, Vec<Inst>) {
+    let mut insts = Vec::new();
+    let reg = generate_gv(f, abi, gv, &mut insts);
+    return (reg, insts);
+
+    fn generate_gv(
+        f: &ir::Function,
+        abi: &ABISig,
+        gv: ir::GlobalValue,
+        insts: &mut Vec<Inst>,
+    ) -> Reg {
+        match f.global_values[gv] {
+            // Return the direct register the vmcontext is in
+            ir::GlobalValueData::VMContext => {
+                get_special_purpose_param_register(f, abi, ir::ArgumentPurpose::VMContext)
+                    .expect("no vmcontext parameter found")
+            }
+            // Load our base value into a register, then load from that register
+            // in to a temporary register.
+            ir::GlobalValueData::Load {
+                base,
+                offset,
+                global_type: _,
+                readonly: _,
+            } => {
+                let base = generate_gv(f, abi, base, insts);
+                let into_reg = writable_spilltmp_reg();
+                let mem = if let Some(offset) =
+                    UImm12Scaled::maybe_from_i64(offset.into(), ir::types::I8)
+                {
+                    MemArg::UnsignedOffset(base, offset)
+                } else {
+                    let offset: i64 = offset.into();
+                    insts.extend(Inst::load_constant(into_reg, offset as u64));
+                    MemArg::RegReg(base, into_reg.to_reg())
+                };
+                insts.push(Inst::ULoad64 {
+                    rd: into_reg,
+                    mem,
+                    srcloc: None,
+                });
+                return into_reg.to_reg();
+            }
+            ref other => panic!("global value for stack limit not supported: {}", other),
+        }
+    }
+}
+
+fn get_special_purpose_param_register(
+    f: &ir::Function,
+    abi: &ABISig,
+    purpose: ir::ArgumentPurpose,
+) -> Option<Reg> {
+    let idx = f.signature.special_param_index(purpose)?;
+    match abi.args[idx] {
+        ABIArg::Reg(reg, _) => Some(reg.to_reg()),
+        ABIArg::Stack(..) => None,
+    }
+}
+
 impl AArch64ABIBody {
    /// Create a new body ABI instance.
    pub fn new(f: &ir::Function, flags: settings::Flags) -> Self {
@@ -238,6 +332,15 @@ impl AArch64ABIBody {
            stackslots.push(off);
        }

+        // Figure out what instructions, if any, will be needed to check the
+        // stack limit. This can either be specified as a special-purpose
+        // argument or as a global value which often calculates the stack limit
+        // from the arguments.
+        let stack_limit =
+            get_special_purpose_param_register(f, &sig, ir::ArgumentPurpose::StackLimit)
+                .map(|reg| (reg, Vec::new()))
+                .or_else(|| f.stack_limit.map(|gv| gen_stack_limit(f, &sig, gv)));
+
        Self {
            sig,
            stackslots,
@@ -247,6 +350,8 @@ impl AArch64ABIBody {
            frame_size: None,
            call_conv,
            flags,
+            is_leaf: f.is_leaf(),
+            stack_limit,
        }
    }

@@ -262,6 +367,97 @@ impl AArch64ABIBody {
            16 // frame pointer + return address.
        }
    }
+
+    /// Inserts instructions necessary for checking the stack limit into the
+    /// prologue.
+    ///
+    /// This function will generate instructions necessary for perform a stack
+    /// check at the header of a function. The stack check is intended to trap
+    /// if the stack pointer goes below a particular threshold, preventing stack
+    /// overflow in wasm or other code. The `stack_limit` argument here is the
+    /// register which holds the threshold below which we're supposed to trap.
+    /// This function is known to allocate `stack_size` bytes and we'll push
+    /// instructions onto `insts`.
+    ///
+    /// Note that the instructions generated here are special because this is
+    /// happening so late in the pipeline (e.g. after register allocation). This
+    /// means that we need to do manual register allocation here and also be
+    /// careful to not clobber any callee-saved or argument registers. For now
+    /// this routine makes do with the `writable_spilltmp_reg` as one temporary
+    /// register, and a second register of `x16` which is caller-saved. This
+    /// should be fine for us since no spills should happen in this sequence of
+    /// instructions, so our register won't get accidentally clobbered.
+    ///
+    /// No values can be live after the prologue, but in this case that's ok
+    /// because we just need to perform a stack check before progressing with
+    /// the rest of the function.
+    fn insert_stack_check(&self, stack_limit: Reg, stack_size: u32, insts: &mut Vec<Inst>) {
+        // With no explicit stack allocated we can just emit the simple check of
+        // the stack registers against the stack limit register, and trap if
+        // it's out of bounds.
+        if stack_size == 0 {
+            return push_check(stack_limit, insts);
+        }
+
+        // Note that the 32k stack size here is pretty special. See the
+        // documentation in x86/abi.rs for why this is here. The general idea is
+        // that we're protecting against overflow in the addition that happens
+        // below.
+        if stack_size >= 32 * 1024 {
+            push_check(stack_limit, insts);
+        }
+
+        // Add the `stack_size` to `stack_limit`, placing the result in
+        // `scratch`.
+        //
+        // Note though that `stack_limit`'s register may be the same as
+        // `scratch`. If our stack size doesn't fit into an immediate this
+        // means we need a second scratch register for loading the stack size
+        // into a register. We use `x16` here since it's caller-saved and we're
+        // in the function prologue and nothing else is allocated to it yet.
+        let scratch = writable_spilltmp_reg();
+        let stack_size = u64::from(stack_size);
+        if let Some(imm12) = Imm12::maybe_from_u64(stack_size) {
+            insts.push(Inst::AluRRImm12 {
+                alu_op: ALUOp::Add64,
+                rd: scratch,
+                rn: stack_limit,
+                imm12,
+            });
+        } else {
+            let scratch2 = 16;
+            insts.extend(Inst::load_constant(
+                Writable::from_reg(xreg(scratch2)),
+                stack_size.into(),
+            ));
+            insts.push(Inst::AluRRRExtend {
+                alu_op: ALUOp::Add64,
+                rd: scratch,
+                rn: stack_limit,
+                rm: xreg(scratch2),
+                extendop: ExtendOp::UXTX,
+            });
+        }
+        push_check(scratch.to_reg(), insts);
+
+        fn push_check(stack_limit: Reg, insts: &mut Vec<Inst>) {
+            insts.push(Inst::AluRRR {
+                alu_op: ALUOp::SubS64XR,
+                rd: writable_zero_reg(),
+                rn: stack_reg(),
+                rm: stack_limit,
+            });
+            insts.push(Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(8),
+                // Here `Hs` == "higher or same" when interpreting the two
+                // operands as unsigned integers.
+                kind: CondBrKind::Cond(Cond::Hs),
+            });
+            insts.push(Inst::Udf {
+                trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow),
+            });
+        }
+    }
 }

 fn load_stack_from_fp(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst {
@@ -682,31 +878,41 @@ impl ABIBody for AArch64ABIBody {
        }
        let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack.

-        if !self.call_conv.extends_baldrdash() && total_stacksize > 0 {
-            // sub sp, sp, #total_stacksize
-            if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) {
-                let sub_inst = Inst::AluRRImm12 {
-                    alu_op: ALUOp::Sub64,
-                    rd: writable_stack_reg(),
-                    rn: stack_reg(),
-                    imm12,
-                };
-                insts.push(sub_inst);
-            } else {
-                let tmp = writable_spilltmp_reg();
-                let const_inst = Inst::LoadConst64 {
-                    rd: tmp,
-                    const_data: total_stacksize as u64,
-                };
-                let sub_inst = Inst::AluRRRExtend {
-                    alu_op: ALUOp::Sub64,
-                    rd: writable_stack_reg(),
-                    rn: stack_reg(),
-                    rm: tmp.to_reg(),
-                    extendop: ExtendOp::UXTX,
-                };
-                insts.push(const_inst);
-                insts.push(sub_inst);
+        if !self.call_conv.extends_baldrdash() {
+            // Leaf functions with zero stack don't need a stack check if one's
+            // specified, otherwise always insert the stack check.
+            if total_stacksize > 0 || !self.is_leaf {
+                if let Some((reg, stack_limit_load)) = &self.stack_limit {
+                    insts.extend_from_slice(stack_limit_load);
+                    self.insert_stack_check(*reg, total_stacksize, &mut insts);
+                }
+            }
+            if total_stacksize > 0 {
+                // sub sp, sp, #total_stacksize
+                if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) {
+                    let sub_inst = Inst::AluRRImm12 {
+                        alu_op: ALUOp::Sub64,
+                        rd: writable_stack_reg(),
+                        rn: stack_reg(),
+                        imm12,
+                    };
+                    insts.push(sub_inst);
+                } else {
+                    let tmp = writable_spilltmp_reg();
+                    let const_inst = Inst::LoadConst64 {
+                        rd: tmp,
+                        const_data: total_stacksize as u64,
+                    };
+                    let sub_inst = Inst::AluRRRExtend {
+                        alu_op: ALUOp::Sub64,
+                        rd: writable_stack_reg(),
+                        rn: stack_reg(),
+                        rm: tmp.to_reg(),
+                        extendop: ExtendOp::UXTX,
+                    };
+                    insts.push(const_inst);
+                    insts.push(sub_inst);
+                }
            }
        }

--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -347,6 +347,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                    ALUOp::AddS64 => 0b10101011_000,
                    ALUOp::SubS32 => 0b01101011_000,
                    ALUOp::SubS64 => 0b11101011_000,
+                    ALUOp::SubS64XR => 0b11101011_001,
                    ALUOp::SDiv64 => 0b10011010_110,
                    ALUOp::UDiv64 => 0b10011010_110,
                    ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
@@ -369,12 +370,16 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                    ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
                    ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
                    ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
+                    ALUOp::SubS64XR => 0b011000,
                    _ => 0b000000,
                };
                debug_assert_ne!(writable_stack_reg(), rd);
-                // The stack pointer is the zero register in this context, so this might be an
-                // indication that something is wrong.
-                debug_assert_ne!(stack_reg(), rn);
+                // The stack pointer is the zero register if this instruction
+                // doesn't have access to extended registers, so this might be
+                // an indication that something is wrong.
+                if alu_op != ALUOp::SubS64XR {
+                    debug_assert_ne!(stack_reg(), rn);
+                }
                debug_assert_ne!(stack_reg(), rm);
                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
            }
@@ -2149,6 +2154,17 @@ mod test {
            "subs x10, x11, x12, LSL 23",
        ));

+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::SubS64XR,
+                rd: writable_zero_reg(),
+                rn: stack_reg(),
+                rm: xreg(12),
+            },
+            "FF632CEB",
+            "subs xzr, sp, x12",
+        ));
+
        insns.push((
            Inst::AluRRRR {
                alu_op: ALUOp::MAdd32,
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -66,6 +66,8 @@ pub enum ALUOp {
    SubS32,
    /// Sub, setting flags
    SubS64,
+    /// Sub, setting flags, using extended registers
+    SubS64XR,
    /// Multiply-add
    MAdd32,
    /// Multiply-add
@@ -1931,6 +1933,7 @@ impl ShowWithRRU for Inst {
                ALUOp::AddS64 => ("adds", InstSize::Size64),
                ALUOp::SubS32 => ("subs", InstSize::Size32),
                ALUOp::SubS64 => ("subs", InstSize::Size64),
+                ALUOp::SubS64XR => ("subs", InstSize::Size64),
                ALUOp::MAdd32 => ("madd", InstSize::Size32),
                ALUOp::MAdd64 => ("madd", InstSize::Size64),
                ALUOp::MSub32 => ("msub", InstSize::Size32),
--- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif
@@ -0,0 +1,190 @@
+test vcode
+target aarch64
+
+function %foo() {
+block0:
+    return
+}
+
+function %stack_limit_leaf_zero(i64 stack_limit) {
+block0(v0: i64):
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
+
+function %stack_limit_gv_leaf_zero(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    gv2 = load.i64 notrap aligned gv1+4
+    stack_limit = gv2
+block0(v0: i64):
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
+
+
+function %stack_limit_call_zero(i64 stack_limit) {
+    fn0 = %foo()
+block0(v0: i64):
+    call fn0()
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     subs xzr, sp, x0
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     bl 0
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
+
+function %stack_limit_gv_call_zero(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    gv2 = load.i64 notrap aligned gv1+4
+    stack_limit = gv2
+    fn0 = %foo()
+block0(v0: i64):
+    call fn0()
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     ldr x15, [x0]
+; nextln:     ldr x15, [x15, #4]
+; nextln:     subs xzr, sp, x15
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     bl 0
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
+
+
+function %stack_limit(i64 stack_limit) {
+    ss0 = explicit_slot 168
+block0(v0: i64):
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     add x15, x0, #176
+; nextln:     subs xzr, sp, x15
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     sub sp, sp, #176
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
+
+function %huge_stack_limit(i64 stack_limit) {
+    ss0 = explicit_slot 400000
+block0(v0: i64):
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     subs xzr, sp, x0
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     movz x16, #6784
+; nextln:     movk x16, #6, LSL #16
+; nextln:     add x15, x0, x16, UXTX
+; nextln:     subs xzr, sp, x15
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     ldr x15, 8 ; b 12 ; data 400000
+; nextln:     sub sp, sp, x15, UXTX
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
+
+function %limit_preamble(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    gv2 = load.i64 notrap aligned gv1+4
+    stack_limit = gv2
+    ss0 = explicit_slot 20
+block0(v0: i64):
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     ldr x15, [x0]
+; nextln:     ldr x15, [x15, #4]
+; nextln:     add x15, x15, #32
+; nextln:     subs xzr, sp, x15
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     sub sp, sp, #32
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
+
+function %limit_preamble_huge(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    gv2 = load.i64 notrap aligned gv1+4
+    stack_limit = gv2
+    ss0 = explicit_slot 400000
+block0(v0: i64):
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     ldr x15, [x0]
+; nextln:     ldr x15, [x15, #4]
+; nextln:     subs xzr, sp, x15
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     movz x16, #6784
+; nextln:     movk x16, #6, LSL #16
+; nextln:     add x15, x15, x16, UXTX
+; nextln:     subs xzr, sp, x15
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     ldr x15, 8 ; b 12 ; data 400000
+; nextln:     sub sp, sp, x15, UXTX
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
+
+function %limit_preamble_huge_offset(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+400000
+    stack_limit = gv1
+    ss0 = explicit_slot 20
+block0(v0: i64):
+    return
+}
+
+; check:      stp fp, lr, [sp, #-16]!
+; nextln:     mov fp, sp
+; nextln:     movz x15, #6784
+; nextln:     movk x15, #6, LSL #16
+; nextln:     ldr x15, [x0, x15]
+; nextln:     add x15, x15, #32
+; nextln:     subs xzr, sp, x15
+; nextln:     b.hs 8
+; nextln:     udf
+; nextln:     sub sp, sp, #32
+; nextln:     mov sp, fp
+; nextln:     ldp fp, lr, [sp], #16
+; nextln:     ret
--- a/crates/api/src/frame_info.rs
+++ b/crates/api/src/frame_info.rs
@@ -94,7 +94,11 @@ impl GlobalFrameInfo {
        // the function, because otherwise something is buggy along the way and
        // not accounting for all the instructions. This isn't super critical
        // though so we can omit this check in release mode.
-        debug_assert!(pos.is_some(), "failed to find instruction for {:x}", pc);
+        //
+        // FIXME(#1521) aarch64 instruction info isn't quite up-to-par yet.
+        if !cfg!(target_arch = "aarch64") {
+            debug_assert!(pos.is_some(), "failed to find instruction for {:x}", pc);
+        }

        let instr = match pos {
            Some(pos) => func.instr_map.instructions[pos].srcloc,
--- a/foo.c
+++ b/foo.c
@@ -0,0 +1,10 @@
+void bar();
+void baz();
+int foo(int b, int a) {
+  if (b) {
+    baz();
+    return a + 3;
+  }
+  bar();
+  return a + 4;
+}
--- a/foo.o
+++ b/foo.o
--- a/foo.s
+++ b/foo.s
@@ -0,0 +1,32 @@
+	.arch armv8-a
+	.file	"foo.c"
+	.text
+	.align	2
+	.global	foo
+	.type	foo, %function
+foo:
+.LFB0:
+	.cfi_startproc
+	stp	x29, x30, [sp, -32]!
+	.cfi_def_cfa_offset 32
+	.cfi_offset 29, -32
+	.cfi_offset 30, -24
+	add	x29, sp, 0
+	.cfi_def_cfa_register 29
+	str	x19, [sp, 16]
+	.cfi_offset 19, -16
+	mov	w19, w0
+	bl	bar
+	add	w0, w19, 3
+	ldr	x19, [sp, 16]
+	ldp	x29, x30, [sp], 32
+	.cfi_restore 30
+	.cfi_restore 29
+	.cfi_restore 19
+	.cfi_def_cfa 31, 0
+	ret
+	.cfi_endproc
+.LFE0:
+	.size	foo, .-foo
+	.ident	"GCC: (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) 7.5.0"
+	.section	.note.GNU-stack,"",@progbits
--- a/tests/all/iloop.rs
+++ b/tests/all/iloop.rs
@@ -99,7 +99,6 @@ fn loop_interrupt_from_afar() -> anyhow::Result<()> {
 }

 #[test]
-#[cfg_attr(target_arch = "aarch64", ignore)] // FIXME(#1569)
 fn function_interrupt_from_afar() -> anyhow::Result<()> {
    // Create an instance which calls an imported function on each iteration of
    // the loop so we can count the number of loop iterations we've executed so
--- a/tests/all/stack_overflow.rs
+++ b/tests/all/stack_overflow.rs
@@ -2,7 +2,6 @@ use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
 use wasmtime::*;

 #[test]
-#[cfg_attr(target_arch = "aarch64", ignore)] // FIXME(#1569)
 fn host_always_has_some_stack() -> anyhow::Result<()> {
    static HITS: AtomicUsize = AtomicUsize::new(0);
    // assume hosts always have at least 512k of stack