CFI improvements to the AArch64 fiber implementation (#4195)

Now the fiber implementation on AArch64 authenticates function
return addresses and includes the relevant BTI instructions, except
on macOS.

Also, change the locations of the saved FP and LR registers on the
fiber stack to make them compliant with the Procedure Call Standard
for the Arm 64-bit Architecture.

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Anton Kirilov
2022-06-09 15:17:12 +01:00
committed by GitHub
parent 823817595a
commit c15c3061ca
6 changed files with 104 additions and 14 deletions

View File

@@ -211,8 +211,6 @@ jobs:
gcc: aarch64-linux-gnu-gcc gcc: aarch64-linux-gnu-gcc
qemu: qemu-aarch64 -L /usr/aarch64-linux-gnu qemu: qemu-aarch64 -L /usr/aarch64-linux-gnu
qemu_target: aarch64-linux-user qemu_target: aarch64-linux-user
# FIXME(#3183) shouldn't be necessary to specify this
qemu_flags: -cpu max,pauth=off
- os: ubuntu-latest - os: ubuntu-latest
target: s390x-unknown-linux-gnu target: s390x-unknown-linux-gnu
gcc_package: gcc-s390x-linux-gnu gcc_package: gcc-s390x-linux-gnu
@@ -251,7 +249,7 @@ jobs:
# Configure Cargo for cross compilation and tell it how it can run # Configure Cargo for cross compilation and tell it how it can run
# cross executables # cross executables
upcase=$(echo ${{ matrix.target }} | awk '{ print toupper($0) }' | sed 's/-/_/g') upcase=$(echo ${{ matrix.target }} | awk '{ print toupper($0) }' | sed 's/-/_/g')
echo CARGO_TARGET_${upcase}_RUNNER=${{ runner.tool_cache }}/qemu/bin/${{ matrix.qemu }} ${{ matrix.qemu_flags }} >> $GITHUB_ENV echo CARGO_TARGET_${upcase}_RUNNER=${{ runner.tool_cache }}/qemu/bin/${{ matrix.qemu }} >> $GITHUB_ENV
echo CARGO_TARGET_${upcase}_LINKER=${{ matrix.gcc }} >> $GITHUB_ENV echo CARGO_TARGET_${upcase}_LINKER=${{ matrix.gcc }} >> $GITHUB_ENV
# QEMU emulation is not always the speediest, so total testing time # QEMU emulation is not always the speediest, so total testing time

View File

@@ -623,11 +623,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
} }
fn gen_debug_frame_info( fn gen_debug_frame_info(
call_conv: isa::CallConv,
flags: &settings::Flags, flags: &settings::Flags,
_isa_flags: &Vec<settings::Value>, _isa_flags: &Vec<settings::Value>,
) -> SmallInstVec<Inst> { ) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new(); let mut insts = SmallVec::new();
if flags.unwind_info() { if flags.unwind_info() && call_conv.extends_apple_aarch64() {
insts.push(Inst::Unwind { insts.push(Inst::Unwind {
inst: UnwindInst::Aarch64SetPointerAuth { inst: UnwindInst::Aarch64SetPointerAuth {
return_addresses: false, return_addresses: false,

View File

@@ -104,7 +104,7 @@ mod tests {
_ => panic!("expected unwind information"), _ => panic!("expected unwind information"),
}; };
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] })), (4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }"); assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
} }
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function { fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
@@ -146,7 +146,7 @@ mod tests {
assert_eq!( assert_eq!(
format!("{:?}", fde), format!("{:?}", fde),
"FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] }))] }" "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }"
); );
} }

View File

@@ -406,6 +406,7 @@ pub trait ABIMachineSpec {
/// Generates extra unwind instructions for a new frame for this /// Generates extra unwind instructions for a new frame for this
/// architecture, whether the frame has a prologue sequence or not. /// architecture, whether the frame has a prologue sequence or not.
fn gen_debug_frame_info( fn gen_debug_frame_info(
_call_conv: isa::CallConv,
_flags: &settings::Flags, _flags: &settings::Flags,
_isa_flags: &Vec<settings::Value>, _isa_flags: &Vec<settings::Value>,
) -> SmallInstVec<Self::I> { ) -> SmallInstVec<Self::I> {
@@ -1238,7 +1239,9 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
self.fixed_frame_storage_size, self.fixed_frame_storage_size,
); );
insts.extend(M::gen_debug_frame_info(&self.flags, &self.isa_flags).into_iter()); insts.extend(
M::gen_debug_frame_info(self.call_conv, &self.flags, &self.isa_flags).into_iter(),
);
if self.setup_frame { if self.setup_frame {
// set up frame // set up frame

View File

@@ -217,7 +217,23 @@ fn func_signature(
// then we can optimize this function to use the fastest calling // then we can optimize this function to use the fastest calling
// convention since it's purely an internal implementation detail of // convention since it's purely an internal implementation detail of
// the module itself. // the module itself.
Some(_idx) if !func.is_escaping() => CallConv::Fast, Some(_idx) if !func.is_escaping() => {
let on_apple_aarch64 = isa
.triple()
.default_calling_convention()
.unwrap_or(CallingConvention::SystemV)
== CallingConvention::AppleAarch64;
if on_apple_aarch64 {
// FIXME: We need an Apple-specific calling convention, so that
// Cranelift's ABI implementation generates unwinding directives
// about pointer authentication usage, so we can't just use
// `CallConv::Fast`.
CallConv::WasmtimeAppleAarch64
} else {
CallConv::Fast
}
}
// ... otherwise if it's an imported function or if it's a possibly // ... otherwise if it's an imported function or if it's a possibly
// exported function then we use the default ABI wasmtime would // exported function then we use the default ABI wasmtime would

View File

@@ -7,18 +7,58 @@
// //
// Also at this time this file is heavily based off the x86_64 file, so you'll // Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well. // probably want to read that one as well.
//
// Finally, control flow integrity hardening has been applied to the code using
// the Pointer Authentication (PAuth) and Branch Target Identification (BTI)
// technologies from the Arm instruction set architecture:
// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP`
// instructions
// * Return addresses are signed and authenticated using the stack pointer
// value as a modifier (similarly to the salt in a HMAC operation); the
// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the
// `.cfi_window_save` assembler directive) informs an unwinder about this
#include "header.h" #include "header.h"
#ifndef CFG_TARGET_OS_macos
// We need to tell whatever loads the following code (e.g. the dynamic linker)
// that it is compatible with BTI, so that the corresponding executable memory
// pages have the necessary attribute set (if supported by the environment). To
// this end, we follow the ELF for the Arm® 64-bit Architecture standard, and
// use a special metadata section. Further details are in section 6.2 of the
// specification:
//
// https://github.com/ARM-software/abi-aa/blob/2022Q1/aaelf64/aaelf64.rst#program-property
//
// We also set the PAuth (PAC) property, even though it is optional, for the
// sake of completeness.
.pushsection .note.gnu.property, "a";
.p2align 3;
.word 4;
.word 16;
.word 5;
.asciz "GNU";
.word 0xc0000000; // GNU_PROPERTY_AARCH64_FEATURE_1_AND
.word 4;
.word 3; // GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC
.word 0;
.popsection
#endif
// fn(top_of_stack(%x0): *mut u8) // fn(top_of_stack(%x0): *mut u8)
HIDDEN(wasmtime_fiber_switch) HIDDEN(wasmtime_fiber_switch)
GLOBL(wasmtime_fiber_switch) GLOBL(wasmtime_fiber_switch)
.p2align 2 .p2align 2
TYPE(wasmtime_fiber_switch) TYPE(wasmtime_fiber_switch)
FUNCTION(wasmtime_fiber_switch): FUNCTION(wasmtime_fiber_switch):
.cfi_startproc
#ifndef CFG_TARGET_OS_macos
hint #25 // paciasp
.cfi_window_save
#endif
// Save all callee-saved registers on the stack since we're assuming // Save all callee-saved registers on the stack since we're assuming
// they're clobbered as a result of the stack switch. // they're clobbered as a result of the stack switch.
stp lr, fp, [sp, -16]! stp x29, x30, [sp, -16]!
stp x20, x19, [sp, -16]! stp x20, x19, [sp, -16]!
stp x22, x21, [sp, -16]! stp x22, x21, [sp, -16]!
stp x24, x23, [sp, -16]! stp x24, x23, [sp, -16]!
@@ -47,8 +87,13 @@ FUNCTION(wasmtime_fiber_switch):
ldp x24, x23, [sp], 16 ldp x24, x23, [sp], 16
ldp x22, x21, [sp], 16 ldp x22, x21, [sp], 16
ldp x20, x19, [sp], 16 ldp x20, x19, [sp], 16
ldp lr, fp, [sp], 16 ldp x29, x30, [sp], 16
#ifndef CFG_TARGET_OS_macos
hint #29 // autiasp
.cfi_window_save
#endif
ret ret
.cfi_endproc
SIZE(wasmtime_fiber_switch) SIZE(wasmtime_fiber_switch)
// fn( // fn(
@@ -61,8 +106,31 @@ GLOBL(wasmtime_fiber_init)
.p2align 2 .p2align 2
TYPE(wasmtime_fiber_init) TYPE(wasmtime_fiber_init)
FUNCTION(wasmtime_fiber_init): FUNCTION(wasmtime_fiber_init):
adr x8, FUNCTION(wasmtime_fiber_start) .cfi_startproc
stp x0, x8, [x0, -0x28] // x0 => x19, x8 => lr hint #34 // bti c
// We set up the newly initialized fiber, so that it resumes execution from
// wasmtime_fiber_start(). As a result, we need a signed address of this
// function, so there are 2 requirements:
// * The fiber stack pointer value that is used by the signing operation
// must match the value when the pointer is authenticated inside
// wasmtime_fiber_switch(), otherwise the latter would fault
// * We would like to use an instruction that is executed as a no-op by
// processors that do not support PAuth, so that the code is backward-
// compatible and there is no duplication; `PACIA1716` is a suitable
// one, which has the following operand register conventions:
// * X17 contains the pointer value to sign
// * X16 contains the modifier value
//
// TODO: Use the PACGA instruction to authenticate the saved register state,
// which avoids creating signed pointers to wasmtime_fiber_start(), and
// provides wider coverage.
sub x16, x0, #16
adr x17, FUNCTION(wasmtime_fiber_start)
#ifndef CFG_TARGET_OS_macos
hint #8 // pacia1716
#endif
str x17, [x16, -0x8] // x17 => lr
str x0, [x16, -0x18] // x0 => x19
stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21 stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21
// `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for // `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for
@@ -70,6 +138,7 @@ FUNCTION(wasmtime_fiber_init):
add x8, x0, -0xb0 add x8, x0, -0xb0
str x8, [x0, -0x10] str x8, [x0, -0x10]
ret ret
.cfi_endproc
SIZE(wasmtime_fiber_init) SIZE(wasmtime_fiber_init)
.p2align 2 .p2align 2
@@ -86,8 +155,11 @@ FUNCTION(wasmtime_fiber_start):
0x06, /* DW_OP_deref */ \ 0x06, /* DW_OP_deref */ \
0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */ 0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */
.cfi_rel_offset x29, -0x08 .cfi_rel_offset x29, -0x10
.cfi_rel_offset lr, -0x10 #ifndef CFG_TARGET_OS_macos
.cfi_window_save
#endif
.cfi_rel_offset x30, -0x08
.cfi_rel_offset x19, -0x18 .cfi_rel_offset x19, -0x18
.cfi_rel_offset x20, -0x20 .cfi_rel_offset x20, -0x20
.cfi_rel_offset x21, -0x28 .cfi_rel_offset x21, -0x28