diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 98e24d1346..63084b04f1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -211,8 +211,6 @@ jobs: gcc: aarch64-linux-gnu-gcc qemu: qemu-aarch64 -L /usr/aarch64-linux-gnu qemu_target: aarch64-linux-user - # FIXME(#3183) shouldn't be necessary to specify this - qemu_flags: -cpu max,pauth=off - os: ubuntu-latest target: s390x-unknown-linux-gnu gcc_package: gcc-s390x-linux-gnu @@ -251,7 +249,7 @@ jobs: # Configure Cargo for cross compilation and tell it how it can run # cross executables upcase=$(echo ${{ matrix.target }} | awk '{ print toupper($0) }' | sed 's/-/_/g') - echo CARGO_TARGET_${upcase}_RUNNER=${{ runner.tool_cache }}/qemu/bin/${{ matrix.qemu }} ${{ matrix.qemu_flags }} >> $GITHUB_ENV + echo CARGO_TARGET_${upcase}_RUNNER=${{ runner.tool_cache }}/qemu/bin/${{ matrix.qemu }} >> $GITHUB_ENV echo CARGO_TARGET_${upcase}_LINKER=${{ matrix.gcc }} >> $GITHUB_ENV # QEMU emulation is not always the speediest, so total testing time diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index ca82f8c608..dde4e1b3b8 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -623,11 +623,12 @@ impl ABIMachineSpec for AArch64MachineDeps { } fn gen_debug_frame_info( + call_conv: isa::CallConv, flags: &settings::Flags, _isa_flags: &Vec, ) -> SmallInstVec { let mut insts = SmallVec::new(); - if flags.unwind_info() { + if flags.unwind_info() && call_conv.extends_apple_aarch64() { insts.push(Inst::Unwind { inst: UnwindInst::Aarch64SetPointerAuth { return_addresses: false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs index 77f65862a3..b029ce0101 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs @@ -104,7 +104,7 @@ mod tests { _ => panic!("expected unwind information"), }; - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] })), (4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }"); + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }"); } fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { @@ -146,7 +146,7 @@ mod tests { assert_eq!( format!("{:?}", fde), - "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] }))] }" + "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }" ); } diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 66ca26c2aa..41e81197ee 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -406,6 +406,7 @@ pub trait ABIMachineSpec { /// Generates extra unwind instructions for a new frame for this /// architecture, whether the frame has a prologue sequence or not. fn gen_debug_frame_info( + _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &Vec, ) -> SmallInstVec { @@ -1238,7 +1239,9 @@ impl ABICallee for ABICalleeImpl { self.fixed_frame_storage_size, ); - insts.extend(M::gen_debug_frame_info(&self.flags, &self.isa_flags).into_iter()); + insts.extend( + M::gen_debug_frame_info(self.call_conv, &self.flags, &self.isa_flags).into_iter(), + ); if self.setup_frame { // set up frame diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index 0bd91a55f9..2d429e6922 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -217,7 +217,23 @@ fn func_signature( // then we can optimize this function to use the fastest calling // convention since it's purely an internal implementation detail of // the module itself. - Some(_idx) if !func.is_escaping() => CallConv::Fast, + Some(_idx) if !func.is_escaping() => { + let on_apple_aarch64 = isa + .triple() + .default_calling_convention() + .unwrap_or(CallingConvention::SystemV) + == CallingConvention::AppleAarch64; + + if on_apple_aarch64 { + // FIXME: We need an Apple-specific calling convention, so that + // Cranelift's ABI implementation generates unwinding directives + // about pointer authentication usage, so we can't just use + // `CallConv::Fast`. + CallConv::WasmtimeAppleAarch64 + } else { + CallConv::Fast + } + } // ... otherwise if it's an imported function or if it's a possibly // exported function then we use the default ABI wasmtime would diff --git a/crates/fiber/src/arch/aarch64.S b/crates/fiber/src/arch/aarch64.S index e2e201c977..d772b1c3c2 100644 --- a/crates/fiber/src/arch/aarch64.S +++ b/crates/fiber/src/arch/aarch64.S @@ -7,18 +7,58 @@ // // Also at this time this file is heavily based off the x86_64 file, so you'll // probably want to read that one as well. +// +// Finally, control flow integrity hardening has been applied to the code using +// the Pointer Authentication (PAuth) and Branch Target Identification (BTI) +// technologies from the Arm instruction set architecture: +// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP` +// instructions +// * Return addresses are signed and authenticated using the stack pointer +// value as a modifier (similarly to the salt in a HMAC operation); the +// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the +// `.cfi_window_save` assembler directive) informs an unwinder about this #include "header.h" +#ifndef CFG_TARGET_OS_macos +// We need to tell whatever loads the following code (e.g. the dynamic linker) +// that it is compatible with BTI, so that the corresponding executable memory +// pages have the necessary attribute set (if supported by the environment). To +// this end, we follow the ELF for the ArmĀ® 64-bit Architecture standard, and +// use a special metadata section. Further details are in section 6.2 of the +// specification: +// +// https://github.com/ARM-software/abi-aa/blob/2022Q1/aaelf64/aaelf64.rst#program-property +// +// We also set the PAuth (PAC) property, even though it is optional, for the +// sake of completeness. +.pushsection .note.gnu.property, "a"; +.p2align 3; +.word 4; +.word 16; +.word 5; +.asciz "GNU"; +.word 0xc0000000; // GNU_PROPERTY_AARCH64_FEATURE_1_AND +.word 4; +.word 3; // GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC +.word 0; +.popsection +#endif + // fn(top_of_stack(%x0): *mut u8) HIDDEN(wasmtime_fiber_switch) GLOBL(wasmtime_fiber_switch) .p2align 2 TYPE(wasmtime_fiber_switch) FUNCTION(wasmtime_fiber_switch): + .cfi_startproc +#ifndef CFG_TARGET_OS_macos + hint #25 // paciasp + .cfi_window_save +#endif // Save all callee-saved registers on the stack since we're assuming // they're clobbered as a result of the stack switch. - stp lr, fp, [sp, -16]! + stp x29, x30, [sp, -16]! stp x20, x19, [sp, -16]! stp x22, x21, [sp, -16]! stp x24, x23, [sp, -16]! @@ -47,8 +87,13 @@ FUNCTION(wasmtime_fiber_switch): ldp x24, x23, [sp], 16 ldp x22, x21, [sp], 16 ldp x20, x19, [sp], 16 - ldp lr, fp, [sp], 16 + ldp x29, x30, [sp], 16 +#ifndef CFG_TARGET_OS_macos + hint #29 // autiasp + .cfi_window_save +#endif ret + .cfi_endproc SIZE(wasmtime_fiber_switch) // fn( @@ -61,8 +106,31 @@ GLOBL(wasmtime_fiber_init) .p2align 2 TYPE(wasmtime_fiber_init) FUNCTION(wasmtime_fiber_init): - adr x8, FUNCTION(wasmtime_fiber_start) - stp x0, x8, [x0, -0x28] // x0 => x19, x8 => lr + .cfi_startproc + hint #34 // bti c + // We set up the newly initialized fiber, so that it resumes execution from + // wasmtime_fiber_start(). As a result, we need a signed address of this + // function, so there are 2 requirements: + // * The fiber stack pointer value that is used by the signing operation + // must match the value when the pointer is authenticated inside + // wasmtime_fiber_switch(), otherwise the latter would fault + // * We would like to use an instruction that is executed as a no-op by + // processors that do not support PAuth, so that the code is backward- + // compatible and there is no duplication; `PACIA1716` is a suitable + // one, which has the following operand register conventions: + // * X17 contains the pointer value to sign + // * X16 contains the modifier value + // + // TODO: Use the PACGA instruction to authenticate the saved register state, + // which avoids creating signed pointers to wasmtime_fiber_start(), and + // provides wider coverage. + sub x16, x0, #16 + adr x17, FUNCTION(wasmtime_fiber_start) +#ifndef CFG_TARGET_OS_macos + hint #8 // pacia1716 +#endif + str x17, [x16, -0x8] // x17 => lr + str x0, [x16, -0x18] // x0 => x19 stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21 // `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for @@ -70,6 +138,7 @@ FUNCTION(wasmtime_fiber_init): add x8, x0, -0xb0 str x8, [x0, -0x10] ret + .cfi_endproc SIZE(wasmtime_fiber_init) .p2align 2 @@ -86,8 +155,11 @@ FUNCTION(wasmtime_fiber_start): 0x06, /* DW_OP_deref */ \ 0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */ - .cfi_rel_offset x29, -0x08 - .cfi_rel_offset lr, -0x10 + .cfi_rel_offset x29, -0x10 +#ifndef CFG_TARGET_OS_macos + .cfi_window_save +#endif + .cfi_rel_offset x30, -0x08 .cfi_rel_offset x19, -0x18 .cfi_rel_offset x20, -0x20 .cfi_rel_offset x21, -0x28