Use global_asm! instead of external assembly files (#4306)

* Use `global_asm!` instead of external assembly files

This commit moves the external assembly files of the `wasmtime-fiber`
crate into `global_asm!` blocks defined in Rust. The motivation for
doing this is not very strong at this time, but the points in favor of
this are:

* One less tool needed to cross-compile Wasmtime. A linker is still
  needed but perhaps one day that will improve as well.
* A "modern" assembler, built-in to LLVM, is used instead of whatever
  appears on the system.

The first point hasn't really cropped up that much and typically getting
an assembler is just as hard as getting a linker nowadays. The second
point though has us using `hint #xx` in aarch64 assembly instead of the
actual instructions for assembler compatibility, and I believe that's no
longer necessary because the LLVM assembler supports the modern
instruction names.

The translation of the x86/x86_64 assembly has been done to Intel
syntax as well as opposed to the old AT&T syntax since that's Rust's
default. Additionally s390x still remains in an external assembler file
because `global_asm!` is still unstable in Rust on that platform.

* Simplify alignment specification

* Temporarily disable fail-fast

* Add `.cfi_def_cfa_offset 0` to fix CI

* Turn off fail-fast

* Review comments
This commit is contained in:
Alex Crichton
2022-06-27 13:20:19 -05:00
committed by GitHub
parent 0ef873f1bd
commit 4543a07bb5
16 changed files with 631 additions and 603 deletions

1
Cargo.lock generated
View File

@@ -3501,6 +3501,7 @@ version = "0.39.0"
dependencies = [
"backtrace",
"cc",
"cfg-if",
"rustix",
"winapi",
]

View File

@@ -13,6 +13,9 @@ edition = "2021"
# directives or similar to embed a version number of this crate in symbols.
links = "wasmtime-fiber-shims"
[dependencies]
cfg-if = "1.0"
[target.'cfg(unix)'.dependencies]
rustix = { version = "0.35.6", features = ["mm", "param"] }

View File

@@ -1,23 +1,18 @@
use std::env;
use std::fs;
fn main() {
let mut build = cc::Build::new();
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
let family = env::var("CARGO_CFG_TARGET_FAMILY").unwrap();
let os = env::var("CARGO_CFG_TARGET_OS").unwrap();
let family_file = format!("src/arch/{}.c", family);
let arch_file = format!("src/arch/{}.S", arch);
if fs::metadata(&family_file).is_ok() {
build.file(&family_file);
} else if fs::metadata(&arch_file).is_ok() {
build.file(&arch_file);
if os == "windows" {
build.file("src/windows.c");
} else if arch == "s390x" {
build.file("src/unix/s390x.S");
} else {
panic!(
"wasmtime doesn't support fibers on platform: {}",
env::var("TARGET").unwrap()
);
// assume that this is included via inline assembly in the crate itself,
// and the crate will otherwise have a `compile_error!` for unsupported
// platforms.
return;
}
build.define(&format!("CFG_TARGET_OS_{}", os), None);
build.define(&format!("CFG_TARGET_ARCH_{}", arch), None);

View File

@@ -1,194 +0,0 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well.
//
// Finally, control flow integrity hardening has been applied to the code using
// the Pointer Authentication (PAuth) and Branch Target Identification (BTI)
// technologies from the Arm instruction set architecture:
// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP`
// instructions
// * Return addresses are signed and authenticated using the stack pointer
// value as a modifier (similarly to the salt in a HMAC operation); the
// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the
// `.cfi_window_save` assembler directive) informs an unwinder about this
#include "header.h"
#ifndef CFG_TARGET_OS_macos
// We need to tell whatever loads the following code (e.g. the dynamic linker)
// that it is compatible with BTI, so that the corresponding executable memory
// pages have the necessary attribute set (if supported by the environment). To
// this end, we follow the ELF for the Arm® 64-bit Architecture standard, and
// use a special metadata section. Further details are in section 6.2 of the
// specification:
//
// https://github.com/ARM-software/abi-aa/blob/2022Q1/aaelf64/aaelf64.rst#program-property
//
// We also set the PAuth (PAC) property, even though it is optional, for the
// sake of completeness.
.pushsection .note.gnu.property, "a";
.p2align 3;
.word 4;
.word 16;
.word 5;
.asciz "GNU";
.word 0xc0000000; // GNU_PROPERTY_AARCH64_FEATURE_1_AND
.word 4;
.word 3; // GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC
.word 0;
.popsection
#endif
// fn(top_of_stack(%x0): *mut u8)
HIDDEN(wasmtime_fiber_switch)
GLOBL(wasmtime_fiber_switch)
.p2align 2
TYPE(wasmtime_fiber_switch)
FUNCTION(wasmtime_fiber_switch):
.cfi_startproc
#ifndef CFG_TARGET_OS_macos
hint #25 // paciasp
.cfi_window_save
#endif
// Save all callee-saved registers on the stack since we're assuming
// they're clobbered as a result of the stack switch.
stp x29, x30, [sp, -16]!
stp x20, x19, [sp, -16]!
stp x22, x21, [sp, -16]!
stp x24, x23, [sp, -16]!
stp x26, x25, [sp, -16]!
stp x28, x27, [sp, -16]!
stp d9, d8, [sp, -16]!
stp d11, d10, [sp, -16]!
stp d13, d12, [sp, -16]!
stp d15, d14, [sp, -16]!
// Load our previously saved stack pointer to resume to, and save off our
// current stack pointer on where to come back to eventually.
ldr x8, [x0, -0x10]
mov x9, sp
str x9, [x0, -0x10]
// Switch to the new stack and restore all our callee-saved registers after
// the switch and return to our new stack.
mov sp, x8
ldp d15, d14, [sp], 16
ldp d13, d12, [sp], 16
ldp d11, d10, [sp], 16
ldp d9, d8, [sp], 16
ldp x28, x27, [sp], 16
ldp x26, x25, [sp], 16
ldp x24, x23, [sp], 16
ldp x22, x21, [sp], 16
ldp x20, x19, [sp], 16
ldp x29, x30, [sp], 16
#ifndef CFG_TARGET_OS_macos
hint #29 // autiasp
.cfi_window_save
#endif
ret
.cfi_endproc
SIZE(wasmtime_fiber_switch)
// fn(
// top_of_stack(%x0): *mut u8,
// entry_point(%x1): extern fn(*mut u8, *mut u8),
// entry_arg0(%x2): *mut u8,
// )
HIDDEN(wasmtime_fiber_init)
GLOBL(wasmtime_fiber_init)
.p2align 2
TYPE(wasmtime_fiber_init)
FUNCTION(wasmtime_fiber_init):
.cfi_startproc
hint #34 // bti c
// We set up the newly initialized fiber, so that it resumes execution from
// wasmtime_fiber_start(). As a result, we need a signed address of this
// function, so there are 2 requirements:
// * The fiber stack pointer value that is used by the signing operation
// must match the value when the pointer is authenticated inside
// wasmtime_fiber_switch(), otherwise the latter would fault
// * We would like to use an instruction that is executed as a no-op by
// processors that do not support PAuth, so that the code is backward-
// compatible and there is no duplication; `PACIA1716` is a suitable
// one, which has the following operand register conventions:
// * X17 contains the pointer value to sign
// * X16 contains the modifier value
//
// TODO: Use the PACGA instruction to authenticate the saved register state,
// which avoids creating signed pointers to wasmtime_fiber_start(), and
// provides wider coverage.
sub x16, x0, #16
adr x17, FUNCTION(wasmtime_fiber_start)
#ifndef CFG_TARGET_OS_macos
hint #8 // pacia1716
#endif
str x17, [x16, -0x8] // x17 => lr
str x0, [x16, -0x18] // x0 => x19
stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21
// `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for
// the original reserved 16 bytes.
add x8, x0, -0xb0
str x8, [x0, -0x10]
ret
.cfi_endproc
SIZE(wasmtime_fiber_init)
.p2align 2
TYPE(wasmtime_fiber_start)
FUNCTION(wasmtime_fiber_start):
.cfi_startproc simple
// See the x86_64 file for more commentary on what these CFI directives are
// doing. Like over there note that the relative offsets to registers here
// match the frame layout in `wasmtime_fiber_switch`.
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x6f, /* DW_OP_reg31(%sp) */ \
0x06, /* DW_OP_deref */ \
0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */
.cfi_rel_offset x29, -0x10
#ifndef CFG_TARGET_OS_macos
.cfi_window_save
#endif
.cfi_rel_offset x30, -0x08
.cfi_rel_offset x19, -0x18
.cfi_rel_offset x20, -0x20
.cfi_rel_offset x21, -0x28
.cfi_rel_offset x22, -0x30
.cfi_rel_offset x23, -0x38
.cfi_rel_offset x24, -0x40
.cfi_rel_offset x25, -0x48
.cfi_rel_offset x26, -0x50
.cfi_rel_offset x27, -0x58
// Load our two arguments from the stack, where x1 is our start procedure
// and x0 is its first argument. This also blows away the stack space used
// by those two arguments.
mov x0, x21
mov x1, x19
// ... and then we call the function! Note that this is a function call so
// our frame stays on the stack to backtrace through.
blr x20
// Unreachable, here for safety. This should help catch unexpected behaviors.
// Use a noticeable payload so one can grep for it in the codebase.
brk 0xf1b3
.cfi_endproc
SIZE(wasmtime_fiber_start)
// This omits the `.subsections_via_symbols` directive on macOS which means we
// can't GC specific intrinsics from this file, but it enables usage of the
// `adr` instruction above in lieu of figuring out a slightly more complicated
// way of implementing that.
#ifndef CFG_TARGET_OS_macos
FOOTER
#endif

View File

@@ -1,83 +0,0 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well.
#include "header.h"
// fn(top_of_stack(%r0): *mut u8)
HIDDEN(wasmtime_fiber_switch)
GLOBL(wasmtime_fiber_switch)
TYPE(wasmtime_fiber_switch)
FUNCTION(wasmtime_fiber_switch):
// Save callee-saved registers
push {r4-r11,lr}
// Swap stacks, recording our current stack pointer
ldr r4, [r0, #-0x08]
str sp, [r0, #-0x08]
mov sp, r4
// Restore and return
pop {r4-r11,lr}
bx lr
SIZE(wasmtime_fiber_switch)
// fn(
// top_of_stack(%r0): *mut u8,
// entry_point(%r1): extern fn(*mut u8, *mut u8),
// entry_arg0(%r2): *mut u8,
// )
HIDDEN(wasmtime_fiber_init)
GLOBL(wasmtime_fiber_init)
TYPE(wasmtime_fiber_init)
FUNCTION(wasmtime_fiber_init):
adr r3, FUNCTION(wasmtime_fiber_start)
str r3, [r0, #-0x0c] // => lr
str r0, [r0, #-0x10] // => r11
str r1, [r0, #-0x14] // => r10
str r2, [r0, #-0x18] // => r9
add r3, r0, #-0x2c
str r3, [r0, #-0x08]
bx lr
SIZE(wasmtime_fiber_init)
FUNCTION(wasmtime_fiber_start):
.cfi_startproc simple
// See the x86_64 file for more commentary on what these CFI directives are
// doing. Like over there note that the relative offsets to registers here
// match the frame layout in `wasmtime_fiber_switch`.
//
// TODO: this is only lightly tested. This gets backtraces in gdb but not
// at runtime. Perhaps the libgcc at runtime was too old? Doesn't support
// something here? Unclear. Will need investigation if someone ends up
// needing this and it still doesn't work.
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x7d, 0x00, /* DW_OP_breg14(%sp) + 0 */ \
0x06, /* DW_OP_deref */ \
0x23, 0x24 /* DW_OP_plus_uconst 0x24 */
.cfi_rel_offset lr, -0x04
.cfi_rel_offset r11, -0x08
.cfi_rel_offset r10, -0x0c
.cfi_rel_offset r9, -0x10
.cfi_rel_offset r8, -0x14
.cfi_rel_offset r7, -0x18
.cfi_rel_offset r6, -0x1c
.cfi_rel_offset r5, -0x20
.cfi_rel_offset r4, -0x24
mov r1, r11
mov r0, r9
blx r10
.cfi_endproc
SIZE(wasmtime_fiber_start)
FOOTER

View File

@@ -1,36 +0,0 @@
#ifndef __wasmtime_common_h
#define __wasmtime_common_h
#if CFG_TARGET_OS_macos
.section __TEXT,__text,regular,pure_instructions
#define GLOBL(fnname) .globl _##fnname
#define HIDDEN(fnname) .private_extern _##fnname
#define TYPE(fnname)
#define FUNCTION(fnname) _##fnname
#define SIZE(fnname)
// Tells the linker it's safe to gc symbols away if not used.
#define FOOTER .subsections_via_symbols
#else
.text
#define GLOBL(fnname) .globl fnname
#define HIDDEN(fnname) .hidden fnname
#ifdef CFG_TARGET_ARCH_arm
#define TYPE(fnname) .type fnname,%function
#else
#define TYPE(fnname) .type fnname,@function
#endif
#define FUNCTION(fnname) fnname
#define SIZE(fnname) .size fnname,.-fnname
// Mark that we don't need executable stack.
#define FOOTER .section .note.GNU-stack,"",%progbits
#endif
#endif // __wasmtime_common_h

View File

@@ -1,107 +0,0 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// This file is modeled after x86_64.S and comments are not copied over. For
// reference be sure to review the other file. Note that the pointer size is
// different so the reserved space at the top of the stack is 8 bytes, not 16
// bytes. Still two pointers though.
#include "header.h"
// fn(top_of_stack: *mut u8)
HIDDEN(wasmtime_fiber_switch)
GLOBL(wasmtime_fiber_switch)
TYPE(wasmtime_fiber_switch)
FUNCTION(wasmtime_fiber_switch):
// Load our stack-to-use
mov 0x4(%esp), %eax
mov -0x8(%eax), %ecx
// Save callee-saved registers
push %ebp
push %ebx
push %esi
push %edi
// Save our current stack and jump to the stack-to-use
mov %esp, -0x8(%eax)
mov %ecx, %esp
// Restore callee-saved registers
pop %edi
pop %esi
pop %ebx
pop %ebp
ret
SIZE(wasmtime_fiber_switch)
// fn(
// top_of_stack: *mut u8,
// entry_point: extern fn(*mut u8, *mut u8),
// entry_arg0: *mut u8,
// )
HIDDEN(wasmtime_fiber_init)
GLOBL(wasmtime_fiber_init)
TYPE(wasmtime_fiber_init)
FUNCTION(wasmtime_fiber_init):
mov 4(%esp), %eax
// move top_of_stack to the 2nd argument
mov %eax, -0x0c(%eax)
// move entry_arg0 to the 1st argument
mov 12(%esp), %ecx
mov %ecx, -0x10(%eax)
// Move our start function to the return address which the `ret` in
// `wasmtime_fiber_start` will return to.
lea FUNCTION(wasmtime_fiber_start), %ecx
mov %ecx, -0x14(%eax)
// And move `entry_point` to get loaded into `%ebp` through the context
// switch. This'll get jumped to in `wasmtime_fiber_start`.
mov 8(%esp), %ecx
mov %ecx, -0x18(%eax)
// Our stack from top-to-bottom looks like:
//
// * 8 bytes of reserved space per unix.rs (two-pointers space)
// * 8 bytes of arguments (two arguments wasmtime_fiber_start forwards)
// * 4 bytes of return address
// * 16 bytes of saved registers
//
// Note that after the return address the stack is conveniently 16-byte
// aligned as required, so we just leave the arguments on the stack in
// `wasmtime_fiber_start` and immediately do the call.
lea -0x24(%eax), %ecx
mov %ecx, -0x08(%eax)
ret
SIZE(wasmtime_fiber_init)
TYPE(wasmtime_fiber_start)
FUNCTION(wasmtime_fiber_start):
.cfi_startproc simple
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x74, 0x08, /* DW_OP_breg4 (%esp) + 8 */ \
0x06, /* DW_OP_deref */ \
0x23, 0x14 /* DW_OP_plus_uconst 0x14 */
.cfi_rel_offset eip, -4
.cfi_rel_offset ebp, -8
.cfi_rel_offset ebx, -12
.cfi_rel_offset esi, -16
.cfi_rel_offset edi, -20
// Our arguments and stack alignment are all prepped by
// `wasmtime_fiber_init`.
call *%ebp
ud2
.cfi_endproc
SIZE(wasmtime_fiber_start)
FOOTER

View File

@@ -1,159 +0,0 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
#include "header.h"
// fn(top_of_stack(%rdi): *mut u8)
HIDDEN(wasmtime_fiber_switch)
GLOBL(wasmtime_fiber_switch)
.align 16
TYPE(wasmtime_fiber_switch)
FUNCTION(wasmtime_fiber_switch):
// We're switching to arbitrary code somewhere else, so pessimistically
// assume that all callee-save register are clobbered. This means we need
// to save/restore all of them.
//
// Note that this order for saving is important since we use CFI directives
// below to point to where all the saved registers are.
pushq %rbp
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
// Load pointer that we're going to resume at and store where we're going
// to get resumed from. This is in accordance with the diagram at the top
// of unix.rs.
movq -0x10(%rdi), %rax
mov %rsp, -0x10(%rdi)
// Swap stacks and restore all our callee-saved registers
mov %rax, %rsp
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
popq %rbp
ret
SIZE(wasmtime_fiber_switch)
// fn(
// top_of_stack(%rdi): *mut u8,
// entry_point(%rsi): extern fn(*mut u8, *mut u8),
// entry_arg0(%rdx): *mut u8,
// )
HIDDEN(wasmtime_fiber_init)
GLOBL(wasmtime_fiber_init)
.align 16
TYPE(wasmtime_fiber_init)
FUNCTION(wasmtime_fiber_init):
// Here we're going to set up a stack frame as expected by
// `wasmtime_fiber_switch`. The values we store here will get restored into
// registers by that function and the `wasmtime_fiber_start` function will
// take over and understands which values are in which registers.
//
// The first 16 bytes of stack are reserved for metadata, so we start
// storing values beneath that.
lea FUNCTION(wasmtime_fiber_start)(%rip), %rax
movq %rax, -0x18(%rdi)
movq %rdi, -0x20(%rdi) // loaded into rbp during switch
movq %rsi, -0x28(%rdi) // loaded into rbx during switch
movq %rdx, -0x30(%rdi) // loaded into r12 during switch
// And then we specify the stack pointer resumption should begin at. Our
// `wasmtime_fiber_switch` function consumes 6 registers plus a return
// pointer, and the top 16 bytes are reserved, so that's:
//
// (6 + 1) * 16 + 16 = 0x48
lea -0x48(%rdi), %rax
movq %rax, -0x10(%rdi)
ret
SIZE(wasmtime_fiber_init)
// This is a pretty special function that has no real signature. Its use is to
// be the "base" function of all fibers. This entrypoint is used in
// `wasmtime_fiber_init` to bootstrap the execution of a new fiber.
//
// We also use this function as a persistent frame on the stack to emit dwarf
// information to unwind into the caller. This allows us to unwind from the
// fiber's stack back to the main stack that the fiber was called from. We use
// special dwarf directives here to do so since this is a pretty nonstandard
// function.
//
// If you're curious a decent introduction to CFI things and unwinding is at
// https://www.imperialviolet.org/2017/01/18/cfi.html
.align 16
TYPE(wasmtime_fiber_start)
FUNCTION(wasmtime_fiber_start):
// Use the `simple` directive on the startproc here which indicates that some
// default settings for the platform are omitted, since this function is so
// nonstandard
.cfi_startproc simple
// This is where things get special, we're specifying a custom dwarf
// expression for how to calculate the CFA. The goal here is that we need
// to load the parent's stack pointer just before the call it made into
// `wasmtime_fiber_switch`. Note that the CFA value changes over time as
// well because a fiber may be resumed multiple times from different points
// on the original stack. This means that our custom CFA directive involves
// `DW_OP_deref`, which loads data from memory.
//
// The expression we're encoding here is that the CFA, the stack pointer of
// whatever called into `wasmtime_fiber_start`, is:
//
// *$rsp + 0x38
//
// $rsp is the stack pointer of `wasmtime_fiber_start` at the time the next
// instruction after the `.cfi_escape` is executed. Our $rsp at the start
// of this function is 16 bytes below the top of the stack (0xAff0 in
// the diagram in unix.rs). The $rsp to resume at is stored at that
// location, so we dereference the stack pointer to load it.
//
// After dereferencing, though, we have the $rsp value for
// `wasmtime_fiber_switch` itself. That's a weird function which sort of
// and sort of doesn't exist on the stack. We want to point to the caller
// of `wasmtime_fiber_switch`, so to do that we need to skip the stack space
// reserved by `wasmtime_fiber_switch`, which is the 6 saved registers plus
// the return address of the caller's `call` instruction. Hence we offset
// another 0x38 bytes.
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
4, /* the byte length of this expression */ \
0x57, /* DW_OP_reg7 (%rsp) */ \
0x06, /* DW_OP_deref */ \
0x23, 0x38 /* DW_OP_plus_uconst 0x38 */
// And now after we've indicated where our CFA is for our parent function,
// we can define that where all of the saved registers are located. This
// uses standard `.cfi` directives which indicate that these registers are
// all stored relative to the CFA. Note that this order is kept in sync
// with the above register spills in `wasmtime_fiber_switch`.
.cfi_rel_offset rip, -8
.cfi_rel_offset rbp, -16
.cfi_rel_offset rbx, -24
.cfi_rel_offset r12, -32
.cfi_rel_offset r13, -40
.cfi_rel_offset r14, -48
.cfi_rel_offset r15, -56
// The body of this function is pretty similar. All our parameters are
// already loaded into registers by the switch function. The
// `wasmtime_fiber_init` routine arranged the various values to be
// materialized into the registers used here. Our job is to then move the
// values into the ABI-defined registers and call the entry-point. Note that
// `callq` is used here to leave this frame on the stack so we can use the
// dwarf info here for unwinding. The trailing `ud2` is just for safety.
mov %r12,%rdi
mov %rbp,%rsi
callq *%rbx
ud2
.cfi_endproc
SIZE(wasmtime_fiber_start)
FOOTER

View File

@@ -4,15 +4,17 @@ use std::io;
use std::marker::PhantomData;
use std::panic::{self, AssertUnwindSafe};
#[cfg(windows)]
mod windows;
#[cfg(windows)]
use windows as imp;
#[cfg(unix)]
mod unix;
#[cfg(unix)]
use unix as imp;
cfg_if::cfg_if! {
if #[cfg(windows)] {
mod windows;
use windows as imp;
} else if #[cfg(unix)] {
mod unix;
use unix as imp;
} else {
compile_error!("fibers are not supported on this platform");
}
}
/// Represents an execution stack to use for a fiber.
#[derive(Debug)]

View File

@@ -29,6 +29,8 @@
//! `suspend`, which has 0xB000 so it can find this, will read that and write
//! its own resumption information into this slot as well.
#![allow(unused_macros)]
use crate::RunResult;
use std::cell::Cell;
use std::io;
@@ -174,3 +176,77 @@ impl Suspend {
ret.cast()
}
}
// This macro itself generates a macro named `asm_func!` which is suitable for
// generating a single `global_asm!`-defined function. This takes care of
// platform-specific directives to get the symbol attributes correct (e.g. ELF
// symbols get a size and are flagged as a function) and additionally handles
// visibility across platforms. All symbols should be visible to Rust but not
// visible externally outside of a `*.so`.
cfg_if::cfg_if! {
if #[cfg(target_os = "macos")] {
macro_rules! asm_func {
($name:tt, $($body:tt)*) => {
std::arch::global_asm!(concat!(
".p2align 4\n",
".private_extern _", $name, "\n",
".global _", $name, "\n",
"_", $name, ":\n",
$($body)*
));
};
}
macro_rules! asm_sym {
($name:tt) => (concat!("_", $name))
}
} else {
// Note that for now this "else" clause just assumes that everything
// other than macOS is ELF and has the various directives here for
// that.
cfg_if::cfg_if! {
if #[cfg(target_arch = "arm")] {
macro_rules! elf_func_type_header {
($name:tt) => (concat!(".type ", $name, ",%function\n"))
}
} else {
macro_rules! elf_func_type_header {
($name:tt) => (concat!(".type ", $name, ",@function\n"))
}
}
}
macro_rules! asm_func {
($name:tt, $($body:tt)*) => {
std::arch::global_asm!(concat!(
".p2align 4\n",
".hidden ", $name, "\n",
".global ", $name, "\n",
elf_func_type_header!($name),
$name, ":\n",
$($body)*
".size ", $name, ",.-", $name,
));
};
}
macro_rules! asm_sym {
($name:tt) => ($name)
}
}
}
cfg_if::cfg_if! {
if #[cfg(target_arch = "aarch64")] {
mod aarch64;
} else if #[cfg(target_arch = "x86_64")] {
mod x86_64;
} else if #[cfg(target_arch = "x86")] {
mod x86;
} else if #[cfg(target_arch = "arm")] {
mod arm;
} else if #[cfg(target_arch = "s390x")] {
// currently `global_asm!` isn't stable on s390x so this is an external
// assembler file built with the `build.rs`.
} else {
compile_error!("fibers are not supported on this CPU architecture");
}
}

View File

@@ -0,0 +1,175 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well.
//
// Finally, control flow integrity hardening has been applied to the code using
// the Pointer Authentication (PAuth) and Branch Target Identification (BTI)
// technologies from the Arm instruction set architecture:
// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP`
// instructions
// * Return addresses are signed and authenticated using the stack pointer
// value as a modifier (similarly to the salt in a HMAC operation); the
// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the
// `.cfi_window_save` assembler directive) informs an unwinder about this
cfg_if::cfg_if! {
if #[cfg(target_os = "macos")] {
macro_rules! cfi_window_save { () => (); }
macro_rules! pacia1716 { () => (); }
macro_rules! paciasp { () => (); }
macro_rules! autiasp { () => (); }
} else {
macro_rules! cfi_window_save { () => (".cfi_window_save\n"); }
macro_rules! pacia1716 { () => ("pacia1716\n"); }
macro_rules! paciasp { () => ("paciasp\n"); }
macro_rules! autiasp { () => ("autiasp\n"); }
}
}
// fn(top_of_stack(%x0): *mut u8)
asm_func!(
"wasmtime_fiber_switch",
"
.cfi_startproc
",
paciasp!(),
cfi_window_save!(),
"
// Save all callee-saved registers on the stack since we're
// assuming they're clobbered as a result of the stack switch.
stp x29, x30, [sp, -16]!
stp x20, x19, [sp, -16]!
stp x22, x21, [sp, -16]!
stp x24, x23, [sp, -16]!
stp x26, x25, [sp, -16]!
stp x28, x27, [sp, -16]!
stp d9, d8, [sp, -16]!
stp d11, d10, [sp, -16]!
stp d13, d12, [sp, -16]!
stp d15, d14, [sp, -16]!
// Load our previously saved stack pointer to resume to, and save
// off our current stack pointer on where to come back to
// eventually.
ldr x8, [x0, -0x10]
mov x9, sp
str x9, [x0, -0x10]
// Switch to the new stack and restore all our callee-saved
// registers after the switch and return to our new stack.
mov sp, x8
ldp d15, d14, [sp], 16
ldp d13, d12, [sp], 16
ldp d11, d10, [sp], 16
ldp d9, d8, [sp], 16
ldp x28, x27, [sp], 16
ldp x26, x25, [sp], 16
ldp x24, x23, [sp], 16
ldp x22, x21, [sp], 16
ldp x20, x19, [sp], 16
ldp x29, x30, [sp], 16
",
autiasp!(),
cfi_window_save!(),
"
ret
.cfi_endproc
",
);
// fn(
// top_of_stack(%x0): *mut u8,
// entry_point(%x1): extern fn(*mut u8, *mut u8),
// entry_arg0(%x2): *mut u8,
// )
// We set up the newly initialized fiber, so that it resumes execution
// from wasmtime_fiber_start(). As a result, we need a signed address
// of this function, so there are 2 requirements:
// * The fiber stack pointer value that is used by the signing operation
// must match the value when the pointer is authenticated inside
// wasmtime_fiber_switch(), otherwise the latter would fault
// * We would like to use an instruction that is executed as a no-op by
// processors that do not support PAuth, so that the code is
// backward-compatible and there is no duplication; `PACIA1716` is a
// suitable one, which has the following operand register
// conventions:
// * X17 contains the pointer value to sign
// * X16 contains the modifier value
//
// TODO: Use the PACGA instruction to authenticate the saved register
// state, which avoids creating signed pointers to
// wasmtime_fiber_start(), and provides wider coverage.
#[rustfmt::skip]
asm_func!(
"wasmtime_fiber_init",
"
.cfi_startproc
hint #34 // bti c
sub x16, x0, #16
adr x17, ", asm_sym!("wasmtime_fiber_start"), "
",
pacia1716!(),
"
str x17, [x16, -0x8] // x17 => lr
str x0, [x16, -0x18] // x0 => x19
stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21
// `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for
// the original reserved 16 bytes.
add x8, x0, -0xb0
str x8, [x0, -0x10]
ret
.cfi_endproc
",
);
// See the x86_64 file for more commentary on what these CFI directives are
// doing. Like over there note that the relative offsets to registers here
// match the frame layout in `wasmtime_fiber_switch`.
asm_func!(
"wasmtime_fiber_start",
"
.cfi_startproc simple
.cfi_def_cfa_offset 0
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x6f, /* DW_OP_reg31(%sp) */ \
0x06, /* DW_OP_deref */ \
0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */
.cfi_rel_offset x29, -0x10
.cfi_rel_offset x30, -0x08
",
cfi_window_save!(),
"
.cfi_rel_offset x19, -0x18
.cfi_rel_offset x20, -0x20
.cfi_rel_offset x21, -0x28
.cfi_rel_offset x22, -0x30
.cfi_rel_offset x23, -0x38
.cfi_rel_offset x24, -0x40
.cfi_rel_offset x25, -0x48
.cfi_rel_offset x26, -0x50
.cfi_rel_offset x27, -0x58
// Load our two arguments from the stack, where x1 is our start
// procedure and x0 is its first argument. This also blows away the
// stack space used by those two arguments.
mov x0, x21
mov x1, x19
// ... and then we call the function! Note that this is a function call
// so our frame stays on the stack to backtrace through.
blr x20
// Unreachable, here for safety. This should help catch unexpected
// behaviors. Use a noticeable payload so one can grep for it in the
// codebase.
brk 0xf1b3
.cfi_endproc
",
);

View File

@@ -0,0 +1,83 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well.
// fn(top_of_stack(%r0): *mut u8)
asm_func!(
"wasmtime_fiber_switch",
"
// Save callee-saved registers
push {{r4-r11,lr}}
// Swap stacks, recording our current stack pointer
ldr r4, [r0, #-0x08]
str sp, [r0, #-0x08]
mov sp, r4
// Restore and return
pop {{r4-r11,lr}}
bx lr
",
);
// fn(
// top_of_stack(%r0): *mut u8,
// entry_point(%r1): extern fn(*mut u8, *mut u8),
// entry_arg0(%r2): *mut u8,
// )
asm_func!(
"wasmtime_fiber_init",
"
adr r3, wasmtime_fiber_start
str r3, [r0, #-0x0c] // => lr
str r0, [r0, #-0x10] // => r11
str r1, [r0, #-0x14] // => r10
str r2, [r0, #-0x18] // => r9
add r3, r0, #-0x2c
str r3, [r0, #-0x08]
bx lr
",
);
asm_func!(
"wasmtime_fiber_start",
"
.cfi_startproc simple
.cfi_def_cfa_offset 0
// See the x86_64 file for more commentary on what these CFI directives
// are doing. Like over there note that the relative offsets to
// registers here match the frame layout in `wasmtime_fiber_switch`.
//
// TODO: this is only lightly tested. This gets backtraces in gdb but
// not at runtime. Perhaps the libgcc at runtime was too old? Doesn't
// support something here? Unclear. Will need investigation if someone
// ends up needing this and it still doesn't work.
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x7d, 0x00, /* DW_OP_breg14(%sp) + 0 */ \
0x06, /* DW_OP_deref */ \
0x23, 0x24 /* DW_OP_plus_uconst 0x24 */
.cfi_rel_offset lr, -0x04
.cfi_rel_offset r11, -0x08
.cfi_rel_offset r10, -0x0c
.cfi_rel_offset r9, -0x10
.cfi_rel_offset r8, -0x14
.cfi_rel_offset r7, -0x18
.cfi_rel_offset r6, -0x1c
.cfi_rel_offset r5, -0x20
.cfi_rel_offset r4, -0x24
mov r1, r11
mov r0, r9
blx r10
.cfi_endproc
",
);

View File

@@ -8,7 +8,13 @@
// Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well.
#include "header.h"
.text
#define GLOBL(fnname) .globl fnname
#define HIDDEN(fnname) .hidden fnname
#define TYPE(fnname) .type fnname,@function
#define FUNCTION(fnname) fnname
#define SIZE(fnname) .size fnname,.-fnname
// fn(top_of_stack(%x0): *mut u8)
HIDDEN(wasmtime_fiber_switch)
@@ -76,6 +82,7 @@ SIZE(wasmtime_fiber_init)
TYPE(wasmtime_fiber_start)
FUNCTION(wasmtime_fiber_start):
.cfi_startproc simple
.cfi_def_cfa_offset 0
// See the x86_64 file for more commentary on what these CFI directives are
// doing. Like over there note that the relative offsets to registers here
@@ -109,4 +116,5 @@ FUNCTION(wasmtime_fiber_start):
.cfi_endproc
SIZE(wasmtime_fiber_start)
FOOTER
// Mark that we don't need executable stack.
.section .note.GNU-stack,"",%progbits

View File

@@ -0,0 +1,107 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// This file is modeled after x86_64.rs and comments are not copied over. For
// reference be sure to review the other file. Note that the pointer size is
// different so the reserved space at the top of the stack is 8 bytes, not 16
// bytes. Still two pointers though.
// fn(top_of_stack: *mut u8)
asm_func!(
"wasmtime_fiber_switch",
"
// Load our stack-to-use
mov eax, 0x4[esp]
mov ecx, -0x8[eax]
// Save callee-saved registers
push ebp
push ebx
push esi
push edi
// Save our current stack and jump to the stack-to-use
mov -0x8[eax], esp
mov esp, ecx
// Restore callee-saved registers
pop edi
pop esi
pop ebx
pop ebp
ret
",
);
// fn(
// top_of_stack: *mut u8,
// entry_point: extern fn(*mut u8, *mut u8),
// entry_arg0: *mut u8,
// )
asm_func!(
"wasmtime_fiber_init",
"
mov eax, 4[esp]
// move top_of_stack to the 2nd argument
mov -0x0c[eax], eax
// move entry_arg0 to the 1st argument
mov ecx, 12[esp]
mov -0x10[eax], ecx
// Move our start function to the return address which the `ret` in
// `wasmtime_fiber_start` will return to.
lea ecx, wasmtime_fiber_start2
lea ecx, wasmtime_fiber_start
mov -0x14[eax], ecx
// And move `entry_point` to get loaded into `%ebp` through the context
// switch. This'll get jumped to in `wasmtime_fiber_start`.
mov ecx, 8[esp]
mov -0x18[eax], ecx
// Our stack from top-to-bottom looks like:
//
// * 8 bytes of reserved space per unix.rs (two-pointers space)
// * 8 bytes of arguments (two arguments wasmtime_fiber_start forwards)
// * 4 bytes of return address
// * 16 bytes of saved registers
//
// Note that after the return address the stack is conveniently 16-byte
// aligned as required, so we just leave the arguments on the stack in
// `wasmtime_fiber_start` and immediately do the call.
lea ecx, -0x24[eax]
mov -0x08[eax], ecx
ret
",
);
asm_func!(
"wasmtime_fiber_start",
"
.cfi_startproc simple
.cfi_def_cfa_offset 0
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x74, 0x08, /* DW_OP_breg4 (%esp) + 8 */ \
0x06, /* DW_OP_deref */ \
0x23, 0x14 /* DW_OP_plus_uconst 0x14 */
.cfi_rel_offset eip, -4
.cfi_rel_offset ebp, -8
.cfi_rel_offset ebx, -12
.cfi_rel_offset esi, -16
.cfi_rel_offset edi, -20
// Our arguments and stack alignment are all prepped by
// `wasmtime_fiber_init`.
call ebp
ud2
.cfi_endproc
",
);

View File

@@ -0,0 +1,157 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
// fn(top_of_stack(rdi): *mut u8)
asm_func!(
"wasmtime_fiber_switch",
"
// We're switching to arbitrary code somewhere else, so pessimistically
// assume that all callee-save register are clobbered. This means we need
// to save/restore all of them.
//
// Note that this order for saving is important since we use CFI directives
// below to point to where all the saved registers are.
push rbp
push rbx
push r12
push r13
push r14
push r15
// Load pointer that we're going to resume at and store where we're going
// to get resumed from. This is in accordance with the diagram at the top
// of unix.rs.
mov rax, -0x10[rdi]
mov -0x10[rdi], rsp
// Swap stacks and restore all our callee-saved registers
mov rsp, rax
pop r15
pop r14
pop r13
pop r12
pop rbx
pop rbp
ret
",
);
// fn(
// top_of_stack(rdi): *mut u8,
// entry_point(rsi): extern fn(*mut u8, *mut u8),
// entry_arg0(rdx): *mut u8,
// )
#[rustfmt::skip]
asm_func!(
"wasmtime_fiber_init",
"
// Here we're going to set up a stack frame as expected by
// `wasmtime_fiber_switch`. The values we store here will get restored into
// registers by that function and the `wasmtime_fiber_start` function will
// take over and understands which values are in which registers.
//
// The first 16 bytes of stack are reserved for metadata, so we start
// storing values beneath that.
lea rax, ", asm_sym!("wasmtime_fiber_start"), "[rip]
mov -0x18[rdi], rax
mov -0x20[rdi], rdi // loaded into rbp during switch
mov -0x28[rdi], rsi // loaded into rbx during switch
mov -0x30[rdi], rdx // loaded into r12 during switch
// And then we specify the stack pointer resumption should begin at. Our
// `wasmtime_fiber_switch` function consumes 6 registers plus a return
// pointer, and the top 16 bytes are reserved, so that's:
//
// (6 + 1) * 16 + 16 = 0x48
lea rax, -0x48[rdi]
mov -0x10[rdi], rax
ret
",
);
// This is a pretty special function that has no real signature. Its use is to
// be the "base" function of all fibers. This entrypoint is used in
// `wasmtime_fiber_init` to bootstrap the execution of a new fiber.
//
// We also use this function as a persistent frame on the stack to emit dwarf
// information to unwind into the caller. This allows us to unwind from the
// fiber's stack back to the main stack that the fiber was called from. We use
// special dwarf directives here to do so since this is a pretty nonstandard
// function.
//
// If you're curious a decent introduction to CFI things and unwinding is at
// https://www.imperialviolet.org/2017/01/18/cfi.html
asm_func!(
"wasmtime_fiber_start",
"
// Use the `simple` directive on the startproc here which indicates that
// some default settings for the platform are omitted, since this
// function is so nonstandard
.cfi_startproc simple
.cfi_def_cfa_offset 0
// This is where things get special, we're specifying a custom dwarf
// expression for how to calculate the CFA. The goal here is that we
// need to load the parent's stack pointer just before the call it made
// into `wasmtime_fiber_switch`. Note that the CFA value changes over
// time as well because a fiber may be resumed multiple times from
// different points on the original stack. This means that our custom
// CFA directive involves `DW_OP_deref`, which loads data from memory.
//
// The expression we're encoding here is that the CFA, the stack pointer
// of whatever called into `wasmtime_fiber_start`, is:
//
// *$rsp + 0x38
//
// $rsp is the stack pointer of `wasmtime_fiber_start` at the time the
// next instruction after the `.cfi_escape` is executed. Our $rsp at the
// start of this function is 16 bytes below the top of the stack (0xAff0
// in the diagram in unix.rs). The $rsp to resume at is stored at that
// location, so we dereference the stack pointer to load it.
//
// After dereferencing, though, we have the $rsp value for
// `wasmtime_fiber_switch` itself. That's a weird function which sort of
// and sort of doesn't exist on the stack. We want to point to the
// caller of `wasmtime_fiber_switch`, so to do that we need to skip the
// stack space reserved by `wasmtime_fiber_switch`, which is the 6 saved
// registers plus the return address of the caller's `call` instruction.
// Hence we offset another 0x38 bytes.
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
4, /* the byte length of this expression */ \
0x57, /* DW_OP_reg7 (rsp) */ \
0x06, /* DW_OP_deref */ \
0x23, 0x38 /* DW_OP_plus_uconst 0x38 */
// And now after we've indicated where our CFA is for our parent
// function, we can define that where all of the saved registers are
// located. This uses standard `.cfi` directives which indicate that
// these registers are all stored relative to the CFA. Note that this
// order is kept in sync with the above register spills in
// `wasmtime_fiber_switch`.
.cfi_rel_offset rip, -8
.cfi_rel_offset rbp, -16
.cfi_rel_offset rbx, -24
.cfi_rel_offset r12, -32
.cfi_rel_offset r13, -40
.cfi_rel_offset r14, -48
.cfi_rel_offset r15, -56
// The body of this function is pretty similar. All our parameters are
// already loaded into registers by the switch function. The
// `wasmtime_fiber_init` routine arranged the various values to be
// materialized into the registers used here. Our job is to then move
// the values into the ABI-defined registers and call the entry-point.
// Note that `call` is used here to leave this frame on the stack so we
// can use the dwarf info here for unwinding. The trailing `ud2` is just
// for safety.
mov rdi, r12
mov rsi, rbp
call rbx
ud2
.cfi_endproc
",
);