From c15c3061ca66d68f874bbfa1c6712f403d5e3158 Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Thu, 9 Jun 2022 15:17:12 +0100 Subject: [PATCH] CFI improvements to the AArch64 fiber implementation (#4195) Now the fiber implementation on AArch64 authenticates function return addresses and includes the relevant BTI instructions, except on macOS. Also, change the locations of the saved FP and LR registers on the fiber stack to make them compliant with the Procedure Call Standard for the Arm 64-bit Architecture. Copyright (c) 2022, Arm Limited. --- .github/workflows/main.yml | 4 +- cranelift/codegen/src/isa/aarch64/abi.rs | 3 +- .../src/isa/aarch64/inst/unwind/systemv.rs | 4 +- cranelift/codegen/src/machinst/abi_impl.rs | 5 +- crates/cranelift/src/lib.rs | 18 +++- crates/fiber/src/arch/aarch64.S | 84 +++++++++++++++++-- 6 files changed, 104 insertions(+), 14 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 98e24d1346..63084b04f1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -211,8 +211,6 @@ jobs: gcc: aarch64-linux-gnu-gcc qemu: qemu-aarch64 -L /usr/aarch64-linux-gnu qemu_target: aarch64-linux-user - # FIXME(#3183) shouldn't be necessary to specify this - qemu_flags: -cpu max,pauth=off - os: ubuntu-latest target: s390x-unknown-linux-gnu gcc_package: gcc-s390x-linux-gnu @@ -251,7 +249,7 @@ jobs: # Configure Cargo for cross compilation and tell it how it can run # cross executables upcase=$(echo ${{ matrix.target }} | awk '{ print toupper($0) }' | sed 's/-/_/g') - echo CARGO_TARGET_${upcase}_RUNNER=${{ runner.tool_cache }}/qemu/bin/${{ matrix.qemu }} ${{ matrix.qemu_flags }} >> $GITHUB_ENV + echo CARGO_TARGET_${upcase}_RUNNER=${{ runner.tool_cache }}/qemu/bin/${{ matrix.qemu }} >> $GITHUB_ENV echo CARGO_TARGET_${upcase}_LINKER=${{ matrix.gcc }} >> $GITHUB_ENV # QEMU emulation is not always the speediest, so total testing time diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index ca82f8c608..dde4e1b3b8 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -623,11 +623,12 @@ impl ABIMachineSpec for AArch64MachineDeps { } fn gen_debug_frame_info( + call_conv: isa::CallConv, flags: &settings::Flags, _isa_flags: &Vec, ) -> SmallInstVec { let mut insts = SmallVec::new(); - if flags.unwind_info() { + if flags.unwind_info() && call_conv.extends_apple_aarch64() { insts.push(Inst::Unwind { inst: UnwindInst::Aarch64SetPointerAuth { return_addresses: false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs index 77f65862a3..b029ce0101 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs @@ -104,7 +104,7 @@ mod tests { _ => panic!("expected unwind information"), }; - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] })), (4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }"); + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }"); } fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { @@ -146,7 +146,7 @@ mod tests { assert_eq!( format!("{:?}", fde), - "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] }))] }" + "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }" ); } diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 66ca26c2aa..41e81197ee 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -406,6 +406,7 @@ pub trait ABIMachineSpec { /// Generates extra unwind instructions for a new frame for this /// architecture, whether the frame has a prologue sequence or not. fn gen_debug_frame_info( + _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &Vec, ) -> SmallInstVec { @@ -1238,7 +1239,9 @@ impl ABICallee for ABICalleeImpl { self.fixed_frame_storage_size, ); - insts.extend(M::gen_debug_frame_info(&self.flags, &self.isa_flags).into_iter()); + insts.extend( + M::gen_debug_frame_info(self.call_conv, &self.flags, &self.isa_flags).into_iter(), + ); if self.setup_frame { // set up frame diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index 0bd91a55f9..2d429e6922 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -217,7 +217,23 @@ fn func_signature( // then we can optimize this function to use the fastest calling // convention since it's purely an internal implementation detail of // the module itself. - Some(_idx) if !func.is_escaping() => CallConv::Fast, + Some(_idx) if !func.is_escaping() => { + let on_apple_aarch64 = isa + .triple() + .default_calling_convention() + .unwrap_or(CallingConvention::SystemV) + == CallingConvention::AppleAarch64; + + if on_apple_aarch64 { + // FIXME: We need an Apple-specific calling convention, so that + // Cranelift's ABI implementation generates unwinding directives + // about pointer authentication usage, so we can't just use + // `CallConv::Fast`. + CallConv::WasmtimeAppleAarch64 + } else { + CallConv::Fast + } + } // ... otherwise if it's an imported function or if it's a possibly // exported function then we use the default ABI wasmtime would diff --git a/crates/fiber/src/arch/aarch64.S b/crates/fiber/src/arch/aarch64.S index e2e201c977..d772b1c3c2 100644 --- a/crates/fiber/src/arch/aarch64.S +++ b/crates/fiber/src/arch/aarch64.S @@ -7,18 +7,58 @@ // // Also at this time this file is heavily based off the x86_64 file, so you'll // probably want to read that one as well. +// +// Finally, control flow integrity hardening has been applied to the code using +// the Pointer Authentication (PAuth) and Branch Target Identification (BTI) +// technologies from the Arm instruction set architecture: +// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP` +// instructions +// * Return addresses are signed and authenticated using the stack pointer +// value as a modifier (similarly to the salt in a HMAC operation); the +// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the +// `.cfi_window_save` assembler directive) informs an unwinder about this #include "header.h" +#ifndef CFG_TARGET_OS_macos +// We need to tell whatever loads the following code (e.g. the dynamic linker) +// that it is compatible with BTI, so that the corresponding executable memory +// pages have the necessary attribute set (if supported by the environment). To +// this end, we follow the ELF for the ArmĀ® 64-bit Architecture standard, and +// use a special metadata section. Further details are in section 6.2 of the +// specification: +// +// https://github.com/ARM-software/abi-aa/blob/2022Q1/aaelf64/aaelf64.rst#program-property +// +// We also set the PAuth (PAC) property, even though it is optional, for the +// sake of completeness. +.pushsection .note.gnu.property, "a"; +.p2align 3; +.word 4; +.word 16; +.word 5; +.asciz "GNU"; +.word 0xc0000000; // GNU_PROPERTY_AARCH64_FEATURE_1_AND +.word 4; +.word 3; // GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC +.word 0; +.popsection +#endif + // fn(top_of_stack(%x0): *mut u8) HIDDEN(wasmtime_fiber_switch) GLOBL(wasmtime_fiber_switch) .p2align 2 TYPE(wasmtime_fiber_switch) FUNCTION(wasmtime_fiber_switch): + .cfi_startproc +#ifndef CFG_TARGET_OS_macos + hint #25 // paciasp + .cfi_window_save +#endif // Save all callee-saved registers on the stack since we're assuming // they're clobbered as a result of the stack switch. - stp lr, fp, [sp, -16]! + stp x29, x30, [sp, -16]! stp x20, x19, [sp, -16]! stp x22, x21, [sp, -16]! stp x24, x23, [sp, -16]! @@ -47,8 +87,13 @@ FUNCTION(wasmtime_fiber_switch): ldp x24, x23, [sp], 16 ldp x22, x21, [sp], 16 ldp x20, x19, [sp], 16 - ldp lr, fp, [sp], 16 + ldp x29, x30, [sp], 16 +#ifndef CFG_TARGET_OS_macos + hint #29 // autiasp + .cfi_window_save +#endif ret + .cfi_endproc SIZE(wasmtime_fiber_switch) // fn( @@ -61,8 +106,31 @@ GLOBL(wasmtime_fiber_init) .p2align 2 TYPE(wasmtime_fiber_init) FUNCTION(wasmtime_fiber_init): - adr x8, FUNCTION(wasmtime_fiber_start) - stp x0, x8, [x0, -0x28] // x0 => x19, x8 => lr + .cfi_startproc + hint #34 // bti c + // We set up the newly initialized fiber, so that it resumes execution from + // wasmtime_fiber_start(). As a result, we need a signed address of this + // function, so there are 2 requirements: + // * The fiber stack pointer value that is used by the signing operation + // must match the value when the pointer is authenticated inside + // wasmtime_fiber_switch(), otherwise the latter would fault + // * We would like to use an instruction that is executed as a no-op by + // processors that do not support PAuth, so that the code is backward- + // compatible and there is no duplication; `PACIA1716` is a suitable + // one, which has the following operand register conventions: + // * X17 contains the pointer value to sign + // * X16 contains the modifier value + // + // TODO: Use the PACGA instruction to authenticate the saved register state, + // which avoids creating signed pointers to wasmtime_fiber_start(), and + // provides wider coverage. + sub x16, x0, #16 + adr x17, FUNCTION(wasmtime_fiber_start) +#ifndef CFG_TARGET_OS_macos + hint #8 // pacia1716 +#endif + str x17, [x16, -0x8] // x17 => lr + str x0, [x16, -0x18] // x0 => x19 stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21 // `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for @@ -70,6 +138,7 @@ FUNCTION(wasmtime_fiber_init): add x8, x0, -0xb0 str x8, [x0, -0x10] ret + .cfi_endproc SIZE(wasmtime_fiber_init) .p2align 2 @@ -86,8 +155,11 @@ FUNCTION(wasmtime_fiber_start): 0x06, /* DW_OP_deref */ \ 0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */ - .cfi_rel_offset x29, -0x08 - .cfi_rel_offset lr, -0x10 + .cfi_rel_offset x29, -0x10 +#ifndef CFG_TARGET_OS_macos + .cfi_window_save +#endif + .cfi_rel_offset x30, -0x08 .cfi_rel_offset x19, -0x18 .cfi_rel_offset x20, -0x20 .cfi_rel_offset x21, -0x28