diff --git a/Cargo.lock b/Cargo.lock index 1ce4e0ce29..e5950ed3fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3501,6 +3501,7 @@ version = "0.39.0" dependencies = [ "backtrace", "cc", + "cfg-if", "rustix", "winapi", ] diff --git a/crates/fiber/Cargo.toml b/crates/fiber/Cargo.toml index 932af42327..15aba167dd 100644 --- a/crates/fiber/Cargo.toml +++ b/crates/fiber/Cargo.toml @@ -13,6 +13,9 @@ edition = "2021" # directives or similar to embed a version number of this crate in symbols. links = "wasmtime-fiber-shims" +[dependencies] +cfg-if = "1.0" + [target.'cfg(unix)'.dependencies] rustix = { version = "0.35.6", features = ["mm", "param"] } diff --git a/crates/fiber/build.rs b/crates/fiber/build.rs index 8be570e835..9e7b8b5a39 100644 --- a/crates/fiber/build.rs +++ b/crates/fiber/build.rs @@ -1,23 +1,18 @@ use std::env; -use std::fs; fn main() { let mut build = cc::Build::new(); let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - let family = env::var("CARGO_CFG_TARGET_FAMILY").unwrap(); let os = env::var("CARGO_CFG_TARGET_OS").unwrap(); - - let family_file = format!("src/arch/{}.c", family); - let arch_file = format!("src/arch/{}.S", arch); - if fs::metadata(&family_file).is_ok() { - build.file(&family_file); - } else if fs::metadata(&arch_file).is_ok() { - build.file(&arch_file); + if os == "windows" { + build.file("src/windows.c"); + } else if arch == "s390x" { + build.file("src/unix/s390x.S"); } else { - panic!( - "wasmtime doesn't support fibers on platform: {}", - env::var("TARGET").unwrap() - ); + // assume that this is included via inline assembly in the crate itself, + // and the crate will otherwise have a `compile_error!` for unsupported + // platforms. + return; } build.define(&format!("CFG_TARGET_OS_{}", os), None); build.define(&format!("CFG_TARGET_ARCH_{}", arch), None); diff --git a/crates/fiber/src/arch/aarch64.S b/crates/fiber/src/arch/aarch64.S deleted file mode 100644 index d772b1c3c2..0000000000 --- a/crates/fiber/src/arch/aarch64.S +++ /dev/null @@ -1,194 +0,0 @@ -// A WORD OF CAUTION -// -// This entire file basically needs to be kept in sync with itself. It's not -// really possible to modify just one bit of this file without understanding -// all the other bits. Documentation tries to reference various bits here and -// there but try to make sure to read over everything before tweaking things! -// -// Also at this time this file is heavily based off the x86_64 file, so you'll -// probably want to read that one as well. -// -// Finally, control flow integrity hardening has been applied to the code using -// the Pointer Authentication (PAuth) and Branch Target Identification (BTI) -// technologies from the Arm instruction set architecture: -// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP` -// instructions -// * Return addresses are signed and authenticated using the stack pointer -// value as a modifier (similarly to the salt in a HMAC operation); the -// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the -// `.cfi_window_save` assembler directive) informs an unwinder about this - -#include "header.h" - -#ifndef CFG_TARGET_OS_macos -// We need to tell whatever loads the following code (e.g. the dynamic linker) -// that it is compatible with BTI, so that the corresponding executable memory -// pages have the necessary attribute set (if supported by the environment). To -// this end, we follow the ELF for the ArmĀ® 64-bit Architecture standard, and -// use a special metadata section. Further details are in section 6.2 of the -// specification: -// -// https://github.com/ARM-software/abi-aa/blob/2022Q1/aaelf64/aaelf64.rst#program-property -// -// We also set the PAuth (PAC) property, even though it is optional, for the -// sake of completeness. -.pushsection .note.gnu.property, "a"; -.p2align 3; -.word 4; -.word 16; -.word 5; -.asciz "GNU"; -.word 0xc0000000; // GNU_PROPERTY_AARCH64_FEATURE_1_AND -.word 4; -.word 3; // GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC -.word 0; -.popsection -#endif - -// fn(top_of_stack(%x0): *mut u8) -HIDDEN(wasmtime_fiber_switch) -GLOBL(wasmtime_fiber_switch) -.p2align 2 -TYPE(wasmtime_fiber_switch) -FUNCTION(wasmtime_fiber_switch): - .cfi_startproc -#ifndef CFG_TARGET_OS_macos - hint #25 // paciasp - .cfi_window_save -#endif - // Save all callee-saved registers on the stack since we're assuming - // they're clobbered as a result of the stack switch. - stp x29, x30, [sp, -16]! - stp x20, x19, [sp, -16]! - stp x22, x21, [sp, -16]! - stp x24, x23, [sp, -16]! - stp x26, x25, [sp, -16]! - stp x28, x27, [sp, -16]! - stp d9, d8, [sp, -16]! - stp d11, d10, [sp, -16]! - stp d13, d12, [sp, -16]! - stp d15, d14, [sp, -16]! - - // Load our previously saved stack pointer to resume to, and save off our - // current stack pointer on where to come back to eventually. - ldr x8, [x0, -0x10] - mov x9, sp - str x9, [x0, -0x10] - - // Switch to the new stack and restore all our callee-saved registers after - // the switch and return to our new stack. - mov sp, x8 - ldp d15, d14, [sp], 16 - ldp d13, d12, [sp], 16 - ldp d11, d10, [sp], 16 - ldp d9, d8, [sp], 16 - ldp x28, x27, [sp], 16 - ldp x26, x25, [sp], 16 - ldp x24, x23, [sp], 16 - ldp x22, x21, [sp], 16 - ldp x20, x19, [sp], 16 - ldp x29, x30, [sp], 16 -#ifndef CFG_TARGET_OS_macos - hint #29 // autiasp - .cfi_window_save -#endif - ret - .cfi_endproc -SIZE(wasmtime_fiber_switch) - -// fn( -// top_of_stack(%x0): *mut u8, -// entry_point(%x1): extern fn(*mut u8, *mut u8), -// entry_arg0(%x2): *mut u8, -// ) -HIDDEN(wasmtime_fiber_init) -GLOBL(wasmtime_fiber_init) -.p2align 2 -TYPE(wasmtime_fiber_init) -FUNCTION(wasmtime_fiber_init): - .cfi_startproc - hint #34 // bti c - // We set up the newly initialized fiber, so that it resumes execution from - // wasmtime_fiber_start(). As a result, we need a signed address of this - // function, so there are 2 requirements: - // * The fiber stack pointer value that is used by the signing operation - // must match the value when the pointer is authenticated inside - // wasmtime_fiber_switch(), otherwise the latter would fault - // * We would like to use an instruction that is executed as a no-op by - // processors that do not support PAuth, so that the code is backward- - // compatible and there is no duplication; `PACIA1716` is a suitable - // one, which has the following operand register conventions: - // * X17 contains the pointer value to sign - // * X16 contains the modifier value - // - // TODO: Use the PACGA instruction to authenticate the saved register state, - // which avoids creating signed pointers to wasmtime_fiber_start(), and - // provides wider coverage. - sub x16, x0, #16 - adr x17, FUNCTION(wasmtime_fiber_start) -#ifndef CFG_TARGET_OS_macos - hint #8 // pacia1716 -#endif - str x17, [x16, -0x8] // x17 => lr - str x0, [x16, -0x18] // x0 => x19 - stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21 - - // `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for - // the original reserved 16 bytes. - add x8, x0, -0xb0 - str x8, [x0, -0x10] - ret - .cfi_endproc -SIZE(wasmtime_fiber_init) - -.p2align 2 -TYPE(wasmtime_fiber_start) -FUNCTION(wasmtime_fiber_start): -.cfi_startproc simple - - // See the x86_64 file for more commentary on what these CFI directives are - // doing. Like over there note that the relative offsets to registers here - // match the frame layout in `wasmtime_fiber_switch`. - .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ - 5, /* the byte length of this expression */ \ - 0x6f, /* DW_OP_reg31(%sp) */ \ - 0x06, /* DW_OP_deref */ \ - 0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */ - - .cfi_rel_offset x29, -0x10 -#ifndef CFG_TARGET_OS_macos - .cfi_window_save -#endif - .cfi_rel_offset x30, -0x08 - .cfi_rel_offset x19, -0x18 - .cfi_rel_offset x20, -0x20 - .cfi_rel_offset x21, -0x28 - .cfi_rel_offset x22, -0x30 - .cfi_rel_offset x23, -0x38 - .cfi_rel_offset x24, -0x40 - .cfi_rel_offset x25, -0x48 - .cfi_rel_offset x26, -0x50 - .cfi_rel_offset x27, -0x58 - - // Load our two arguments from the stack, where x1 is our start procedure - // and x0 is its first argument. This also blows away the stack space used - // by those two arguments. - mov x0, x21 - mov x1, x19 - - // ... and then we call the function! Note that this is a function call so - // our frame stays on the stack to backtrace through. - blr x20 - // Unreachable, here for safety. This should help catch unexpected behaviors. - // Use a noticeable payload so one can grep for it in the codebase. - brk 0xf1b3 - .cfi_endproc -SIZE(wasmtime_fiber_start) - -// This omits the `.subsections_via_symbols` directive on macOS which means we -// can't GC specific intrinsics from this file, but it enables usage of the -// `adr` instruction above in lieu of figuring out a slightly more complicated -// way of implementing that. -#ifndef CFG_TARGET_OS_macos -FOOTER -#endif diff --git a/crates/fiber/src/arch/arm.S b/crates/fiber/src/arch/arm.S deleted file mode 100644 index 54eb507aba..0000000000 --- a/crates/fiber/src/arch/arm.S +++ /dev/null @@ -1,83 +0,0 @@ -// A WORD OF CAUTION -// -// This entire file basically needs to be kept in sync with itself. It's not -// really possible to modify just one bit of this file without understanding -// all the other bits. Documentation tries to reference various bits here and -// there but try to make sure to read over everything before tweaking things! -// -// Also at this time this file is heavily based off the x86_64 file, so you'll -// probably want to read that one as well. - -#include "header.h" - -// fn(top_of_stack(%r0): *mut u8) -HIDDEN(wasmtime_fiber_switch) -GLOBL(wasmtime_fiber_switch) -TYPE(wasmtime_fiber_switch) -FUNCTION(wasmtime_fiber_switch): - // Save callee-saved registers - push {r4-r11,lr} - - // Swap stacks, recording our current stack pointer - ldr r4, [r0, #-0x08] - str sp, [r0, #-0x08] - mov sp, r4 - - // Restore and return - pop {r4-r11,lr} - bx lr -SIZE(wasmtime_fiber_switch) - -// fn( -// top_of_stack(%r0): *mut u8, -// entry_point(%r1): extern fn(*mut u8, *mut u8), -// entry_arg0(%r2): *mut u8, -// ) -HIDDEN(wasmtime_fiber_init) -GLOBL(wasmtime_fiber_init) -TYPE(wasmtime_fiber_init) -FUNCTION(wasmtime_fiber_init): - adr r3, FUNCTION(wasmtime_fiber_start) - str r3, [r0, #-0x0c] // => lr - str r0, [r0, #-0x10] // => r11 - str r1, [r0, #-0x14] // => r10 - str r2, [r0, #-0x18] // => r9 - - add r3, r0, #-0x2c - str r3, [r0, #-0x08] - bx lr -SIZE(wasmtime_fiber_init) - -FUNCTION(wasmtime_fiber_start): -.cfi_startproc simple - // See the x86_64 file for more commentary on what these CFI directives are - // doing. Like over there note that the relative offsets to registers here - // match the frame layout in `wasmtime_fiber_switch`. - // - // TODO: this is only lightly tested. This gets backtraces in gdb but not - // at runtime. Perhaps the libgcc at runtime was too old? Doesn't support - // something here? Unclear. Will need investigation if someone ends up - // needing this and it still doesn't work. - .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ - 5, /* the byte length of this expression */ \ - 0x7d, 0x00, /* DW_OP_breg14(%sp) + 0 */ \ - 0x06, /* DW_OP_deref */ \ - 0x23, 0x24 /* DW_OP_plus_uconst 0x24 */ - - .cfi_rel_offset lr, -0x04 - .cfi_rel_offset r11, -0x08 - .cfi_rel_offset r10, -0x0c - .cfi_rel_offset r9, -0x10 - .cfi_rel_offset r8, -0x14 - .cfi_rel_offset r7, -0x18 - .cfi_rel_offset r6, -0x1c - .cfi_rel_offset r5, -0x20 - .cfi_rel_offset r4, -0x24 - - mov r1, r11 - mov r0, r9 - blx r10 - .cfi_endproc -SIZE(wasmtime_fiber_start) - -FOOTER diff --git a/crates/fiber/src/arch/header.h b/crates/fiber/src/arch/header.h deleted file mode 100644 index 5afc28e2ab..0000000000 --- a/crates/fiber/src/arch/header.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef __wasmtime_common_h -#define __wasmtime_common_h - -#if CFG_TARGET_OS_macos - -.section __TEXT,__text,regular,pure_instructions - -#define GLOBL(fnname) .globl _##fnname -#define HIDDEN(fnname) .private_extern _##fnname -#define TYPE(fnname) -#define FUNCTION(fnname) _##fnname -#define SIZE(fnname) - -// Tells the linker it's safe to gc symbols away if not used. -#define FOOTER .subsections_via_symbols - -#else - -.text - -#define GLOBL(fnname) .globl fnname -#define HIDDEN(fnname) .hidden fnname -#ifdef CFG_TARGET_ARCH_arm -#define TYPE(fnname) .type fnname,%function -#else -#define TYPE(fnname) .type fnname,@function -#endif -#define FUNCTION(fnname) fnname -#define SIZE(fnname) .size fnname,.-fnname - -// Mark that we don't need executable stack. -#define FOOTER .section .note.GNU-stack,"",%progbits - -#endif - -#endif // __wasmtime_common_h diff --git a/crates/fiber/src/arch/x86.S b/crates/fiber/src/arch/x86.S deleted file mode 100644 index c147289a31..0000000000 --- a/crates/fiber/src/arch/x86.S +++ /dev/null @@ -1,107 +0,0 @@ -// A WORD OF CAUTION -// -// This entire file basically needs to be kept in sync with itself. It's not -// really possible to modify just one bit of this file without understanding -// all the other bits. Documentation tries to reference various bits here and -// there but try to make sure to read over everything before tweaking things! -// -// This file is modeled after x86_64.S and comments are not copied over. For -// reference be sure to review the other file. Note that the pointer size is -// different so the reserved space at the top of the stack is 8 bytes, not 16 -// bytes. Still two pointers though. - -#include "header.h" - -// fn(top_of_stack: *mut u8) -HIDDEN(wasmtime_fiber_switch) -GLOBL(wasmtime_fiber_switch) -TYPE(wasmtime_fiber_switch) -FUNCTION(wasmtime_fiber_switch): - // Load our stack-to-use - mov 0x4(%esp), %eax - mov -0x8(%eax), %ecx - - // Save callee-saved registers - push %ebp - push %ebx - push %esi - push %edi - - // Save our current stack and jump to the stack-to-use - mov %esp, -0x8(%eax) - mov %ecx, %esp - - // Restore callee-saved registers - pop %edi - pop %esi - pop %ebx - pop %ebp - ret -SIZE(wasmtime_fiber_switch) - -// fn( -// top_of_stack: *mut u8, -// entry_point: extern fn(*mut u8, *mut u8), -// entry_arg0: *mut u8, -// ) -HIDDEN(wasmtime_fiber_init) -GLOBL(wasmtime_fiber_init) -TYPE(wasmtime_fiber_init) -FUNCTION(wasmtime_fiber_init): - mov 4(%esp), %eax - - // move top_of_stack to the 2nd argument - mov %eax, -0x0c(%eax) - - // move entry_arg0 to the 1st argument - mov 12(%esp), %ecx - mov %ecx, -0x10(%eax) - - // Move our start function to the return address which the `ret` in - // `wasmtime_fiber_start` will return to. - lea FUNCTION(wasmtime_fiber_start), %ecx - mov %ecx, -0x14(%eax) - - // And move `entry_point` to get loaded into `%ebp` through the context - // switch. This'll get jumped to in `wasmtime_fiber_start`. - mov 8(%esp), %ecx - mov %ecx, -0x18(%eax) - - // Our stack from top-to-bottom looks like: - // - // * 8 bytes of reserved space per unix.rs (two-pointers space) - // * 8 bytes of arguments (two arguments wasmtime_fiber_start forwards) - // * 4 bytes of return address - // * 16 bytes of saved registers - // - // Note that after the return address the stack is conveniently 16-byte - // aligned as required, so we just leave the arguments on the stack in - // `wasmtime_fiber_start` and immediately do the call. - lea -0x24(%eax), %ecx - mov %ecx, -0x08(%eax) - ret -SIZE(wasmtime_fiber_init) - -TYPE(wasmtime_fiber_start) -FUNCTION(wasmtime_fiber_start): -.cfi_startproc simple - .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ - 5, /* the byte length of this expression */ \ - 0x74, 0x08, /* DW_OP_breg4 (%esp) + 8 */ \ - 0x06, /* DW_OP_deref */ \ - 0x23, 0x14 /* DW_OP_plus_uconst 0x14 */ - - .cfi_rel_offset eip, -4 - .cfi_rel_offset ebp, -8 - .cfi_rel_offset ebx, -12 - .cfi_rel_offset esi, -16 - .cfi_rel_offset edi, -20 - - // Our arguments and stack alignment are all prepped by - // `wasmtime_fiber_init`. - call *%ebp - ud2 - .cfi_endproc -SIZE(wasmtime_fiber_start) - -FOOTER diff --git a/crates/fiber/src/arch/x86_64.S b/crates/fiber/src/arch/x86_64.S deleted file mode 100644 index 2ae01fc290..0000000000 --- a/crates/fiber/src/arch/x86_64.S +++ /dev/null @@ -1,159 +0,0 @@ -// A WORD OF CAUTION -// -// This entire file basically needs to be kept in sync with itself. It's not -// really possible to modify just one bit of this file without understanding -// all the other bits. Documentation tries to reference various bits here and -// there but try to make sure to read over everything before tweaking things! - -#include "header.h" - -// fn(top_of_stack(%rdi): *mut u8) -HIDDEN(wasmtime_fiber_switch) -GLOBL(wasmtime_fiber_switch) -.align 16 -TYPE(wasmtime_fiber_switch) -FUNCTION(wasmtime_fiber_switch): - // We're switching to arbitrary code somewhere else, so pessimistically - // assume that all callee-save register are clobbered. This means we need - // to save/restore all of them. - // - // Note that this order for saving is important since we use CFI directives - // below to point to where all the saved registers are. - pushq %rbp - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - // Load pointer that we're going to resume at and store where we're going - // to get resumed from. This is in accordance with the diagram at the top - // of unix.rs. - movq -0x10(%rdi), %rax - mov %rsp, -0x10(%rdi) - - // Swap stacks and restore all our callee-saved registers - mov %rax, %rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp - ret -SIZE(wasmtime_fiber_switch) - -// fn( -// top_of_stack(%rdi): *mut u8, -// entry_point(%rsi): extern fn(*mut u8, *mut u8), -// entry_arg0(%rdx): *mut u8, -// ) -HIDDEN(wasmtime_fiber_init) -GLOBL(wasmtime_fiber_init) -.align 16 -TYPE(wasmtime_fiber_init) -FUNCTION(wasmtime_fiber_init): - // Here we're going to set up a stack frame as expected by - // `wasmtime_fiber_switch`. The values we store here will get restored into - // registers by that function and the `wasmtime_fiber_start` function will - // take over and understands which values are in which registers. - // - // The first 16 bytes of stack are reserved for metadata, so we start - // storing values beneath that. - lea FUNCTION(wasmtime_fiber_start)(%rip), %rax - movq %rax, -0x18(%rdi) - movq %rdi, -0x20(%rdi) // loaded into rbp during switch - movq %rsi, -0x28(%rdi) // loaded into rbx during switch - movq %rdx, -0x30(%rdi) // loaded into r12 during switch - - // And then we specify the stack pointer resumption should begin at. Our - // `wasmtime_fiber_switch` function consumes 6 registers plus a return - // pointer, and the top 16 bytes are reserved, so that's: - // - // (6 + 1) * 16 + 16 = 0x48 - lea -0x48(%rdi), %rax - movq %rax, -0x10(%rdi) - ret -SIZE(wasmtime_fiber_init) - -// This is a pretty special function that has no real signature. Its use is to -// be the "base" function of all fibers. This entrypoint is used in -// `wasmtime_fiber_init` to bootstrap the execution of a new fiber. -// -// We also use this function as a persistent frame on the stack to emit dwarf -// information to unwind into the caller. This allows us to unwind from the -// fiber's stack back to the main stack that the fiber was called from. We use -// special dwarf directives here to do so since this is a pretty nonstandard -// function. -// -// If you're curious a decent introduction to CFI things and unwinding is at -// https://www.imperialviolet.org/2017/01/18/cfi.html -.align 16 -TYPE(wasmtime_fiber_start) -FUNCTION(wasmtime_fiber_start): -// Use the `simple` directive on the startproc here which indicates that some -// default settings for the platform are omitted, since this function is so -// nonstandard -.cfi_startproc simple - // This is where things get special, we're specifying a custom dwarf - // expression for how to calculate the CFA. The goal here is that we need - // to load the parent's stack pointer just before the call it made into - // `wasmtime_fiber_switch`. Note that the CFA value changes over time as - // well because a fiber may be resumed multiple times from different points - // on the original stack. This means that our custom CFA directive involves - // `DW_OP_deref`, which loads data from memory. - // - // The expression we're encoding here is that the CFA, the stack pointer of - // whatever called into `wasmtime_fiber_start`, is: - // - // *$rsp + 0x38 - // - // $rsp is the stack pointer of `wasmtime_fiber_start` at the time the next - // instruction after the `.cfi_escape` is executed. Our $rsp at the start - // of this function is 16 bytes below the top of the stack (0xAff0 in - // the diagram in unix.rs). The $rsp to resume at is stored at that - // location, so we dereference the stack pointer to load it. - // - // After dereferencing, though, we have the $rsp value for - // `wasmtime_fiber_switch` itself. That's a weird function which sort of - // and sort of doesn't exist on the stack. We want to point to the caller - // of `wasmtime_fiber_switch`, so to do that we need to skip the stack space - // reserved by `wasmtime_fiber_switch`, which is the 6 saved registers plus - // the return address of the caller's `call` instruction. Hence we offset - // another 0x38 bytes. - .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ - 4, /* the byte length of this expression */ \ - 0x57, /* DW_OP_reg7 (%rsp) */ \ - 0x06, /* DW_OP_deref */ \ - 0x23, 0x38 /* DW_OP_plus_uconst 0x38 */ - - // And now after we've indicated where our CFA is for our parent function, - // we can define that where all of the saved registers are located. This - // uses standard `.cfi` directives which indicate that these registers are - // all stored relative to the CFA. Note that this order is kept in sync - // with the above register spills in `wasmtime_fiber_switch`. - .cfi_rel_offset rip, -8 - .cfi_rel_offset rbp, -16 - .cfi_rel_offset rbx, -24 - .cfi_rel_offset r12, -32 - .cfi_rel_offset r13, -40 - .cfi_rel_offset r14, -48 - .cfi_rel_offset r15, -56 - - - // The body of this function is pretty similar. All our parameters are - // already loaded into registers by the switch function. The - // `wasmtime_fiber_init` routine arranged the various values to be - // materialized into the registers used here. Our job is to then move the - // values into the ABI-defined registers and call the entry-point. Note that - // `callq` is used here to leave this frame on the stack so we can use the - // dwarf info here for unwinding. The trailing `ud2` is just for safety. - mov %r12,%rdi - mov %rbp,%rsi - callq *%rbx - ud2 - .cfi_endproc -SIZE(wasmtime_fiber_start) - -FOOTER - diff --git a/crates/fiber/src/lib.rs b/crates/fiber/src/lib.rs index 6a21285206..abeddcd140 100644 --- a/crates/fiber/src/lib.rs +++ b/crates/fiber/src/lib.rs @@ -4,15 +4,17 @@ use std::io; use std::marker::PhantomData; use std::panic::{self, AssertUnwindSafe}; -#[cfg(windows)] -mod windows; -#[cfg(windows)] -use windows as imp; - -#[cfg(unix)] -mod unix; -#[cfg(unix)] -use unix as imp; +cfg_if::cfg_if! { + if #[cfg(windows)] { + mod windows; + use windows as imp; + } else if #[cfg(unix)] { + mod unix; + use unix as imp; + } else { + compile_error!("fibers are not supported on this platform"); + } +} /// Represents an execution stack to use for a fiber. #[derive(Debug)] diff --git a/crates/fiber/src/unix.rs b/crates/fiber/src/unix.rs index 839e011970..9da6b564e6 100644 --- a/crates/fiber/src/unix.rs +++ b/crates/fiber/src/unix.rs @@ -29,6 +29,8 @@ //! `suspend`, which has 0xB000 so it can find this, will read that and write //! its own resumption information into this slot as well. +#![allow(unused_macros)] + use crate::RunResult; use std::cell::Cell; use std::io; @@ -174,3 +176,77 @@ impl Suspend { ret.cast() } } + +// This macro itself generates a macro named `asm_func!` which is suitable for +// generating a single `global_asm!`-defined function. This takes care of +// platform-specific directives to get the symbol attributes correct (e.g. ELF +// symbols get a size and are flagged as a function) and additionally handles +// visibility across platforms. All symbols should be visible to Rust but not +// visible externally outside of a `*.so`. +cfg_if::cfg_if! { + if #[cfg(target_os = "macos")] { + macro_rules! asm_func { + ($name:tt, $($body:tt)*) => { + std::arch::global_asm!(concat!( + ".p2align 4\n", + ".private_extern _", $name, "\n", + ".global _", $name, "\n", + "_", $name, ":\n", + $($body)* + )); + }; + } + macro_rules! asm_sym { + ($name:tt) => (concat!("_", $name)) + } + } else { + // Note that for now this "else" clause just assumes that everything + // other than macOS is ELF and has the various directives here for + // that. + cfg_if::cfg_if! { + if #[cfg(target_arch = "arm")] { + macro_rules! elf_func_type_header { + ($name:tt) => (concat!(".type ", $name, ",%function\n")) + } + } else { + macro_rules! elf_func_type_header { + ($name:tt) => (concat!(".type ", $name, ",@function\n")) + } + } + } + + macro_rules! asm_func { + ($name:tt, $($body:tt)*) => { + std::arch::global_asm!(concat!( + ".p2align 4\n", + ".hidden ", $name, "\n", + ".global ", $name, "\n", + elf_func_type_header!($name), + $name, ":\n", + $($body)* + ".size ", $name, ",.-", $name, + )); + }; + } + macro_rules! asm_sym { + ($name:tt) => ($name) + } + } +} + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + } else if #[cfg(target_arch = "x86_64")] { + mod x86_64; + } else if #[cfg(target_arch = "x86")] { + mod x86; + } else if #[cfg(target_arch = "arm")] { + mod arm; + } else if #[cfg(target_arch = "s390x")] { + // currently `global_asm!` isn't stable on s390x so this is an external + // assembler file built with the `build.rs`. + } else { + compile_error!("fibers are not supported on this CPU architecture"); + } +} diff --git a/crates/fiber/src/unix/aarch64.rs b/crates/fiber/src/unix/aarch64.rs new file mode 100644 index 0000000000..a0b4201b9d --- /dev/null +++ b/crates/fiber/src/unix/aarch64.rs @@ -0,0 +1,175 @@ +// A WORD OF CAUTION +// +// This entire file basically needs to be kept in sync with itself. It's not +// really possible to modify just one bit of this file without understanding +// all the other bits. Documentation tries to reference various bits here and +// there but try to make sure to read over everything before tweaking things! +// +// Also at this time this file is heavily based off the x86_64 file, so you'll +// probably want to read that one as well. +// +// Finally, control flow integrity hardening has been applied to the code using +// the Pointer Authentication (PAuth) and Branch Target Identification (BTI) +// technologies from the Arm instruction set architecture: +// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP` +// instructions +// * Return addresses are signed and authenticated using the stack pointer +// value as a modifier (similarly to the salt in a HMAC operation); the +// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the +// `.cfi_window_save` assembler directive) informs an unwinder about this + +cfg_if::cfg_if! { + if #[cfg(target_os = "macos")] { + macro_rules! cfi_window_save { () => (); } + macro_rules! pacia1716 { () => (); } + macro_rules! paciasp { () => (); } + macro_rules! autiasp { () => (); } + } else { + macro_rules! cfi_window_save { () => (".cfi_window_save\n"); } + macro_rules! pacia1716 { () => ("pacia1716\n"); } + macro_rules! paciasp { () => ("paciasp\n"); } + macro_rules! autiasp { () => ("autiasp\n"); } + } +} + +// fn(top_of_stack(%x0): *mut u8) +asm_func!( + "wasmtime_fiber_switch", + " + .cfi_startproc + ", + paciasp!(), + cfi_window_save!(), + " + // Save all callee-saved registers on the stack since we're + // assuming they're clobbered as a result of the stack switch. + stp x29, x30, [sp, -16]! + stp x20, x19, [sp, -16]! + stp x22, x21, [sp, -16]! + stp x24, x23, [sp, -16]! + stp x26, x25, [sp, -16]! + stp x28, x27, [sp, -16]! + stp d9, d8, [sp, -16]! + stp d11, d10, [sp, -16]! + stp d13, d12, [sp, -16]! + stp d15, d14, [sp, -16]! + + // Load our previously saved stack pointer to resume to, and save + // off our current stack pointer on where to come back to + // eventually. + ldr x8, [x0, -0x10] + mov x9, sp + str x9, [x0, -0x10] + + // Switch to the new stack and restore all our callee-saved + // registers after the switch and return to our new stack. + mov sp, x8 + ldp d15, d14, [sp], 16 + ldp d13, d12, [sp], 16 + ldp d11, d10, [sp], 16 + ldp d9, d8, [sp], 16 + ldp x28, x27, [sp], 16 + ldp x26, x25, [sp], 16 + ldp x24, x23, [sp], 16 + ldp x22, x21, [sp], 16 + ldp x20, x19, [sp], 16 + ldp x29, x30, [sp], 16 + ", + autiasp!(), + cfi_window_save!(), + " + ret + .cfi_endproc + ", +); + +// fn( +// top_of_stack(%x0): *mut u8, +// entry_point(%x1): extern fn(*mut u8, *mut u8), +// entry_arg0(%x2): *mut u8, +// ) +// We set up the newly initialized fiber, so that it resumes execution +// from wasmtime_fiber_start(). As a result, we need a signed address +// of this function, so there are 2 requirements: +// * The fiber stack pointer value that is used by the signing operation +// must match the value when the pointer is authenticated inside +// wasmtime_fiber_switch(), otherwise the latter would fault +// * We would like to use an instruction that is executed as a no-op by +// processors that do not support PAuth, so that the code is +// backward-compatible and there is no duplication; `PACIA1716` is a +// suitable one, which has the following operand register +// conventions: +// * X17 contains the pointer value to sign +// * X16 contains the modifier value +// +// TODO: Use the PACGA instruction to authenticate the saved register +// state, which avoids creating signed pointers to +// wasmtime_fiber_start(), and provides wider coverage. +#[rustfmt::skip] +asm_func!( + "wasmtime_fiber_init", + " + .cfi_startproc + hint #34 // bti c + sub x16, x0, #16 + adr x17, ", asm_sym!("wasmtime_fiber_start"), " + ", + pacia1716!(), + " + str x17, [x16, -0x8] // x17 => lr + str x0, [x16, -0x18] // x0 => x19 + stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21 + + // `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for + // the original reserved 16 bytes. + add x8, x0, -0xb0 + str x8, [x0, -0x10] + ret + .cfi_endproc + ", +); + +// See the x86_64 file for more commentary on what these CFI directives are +// doing. Like over there note that the relative offsets to registers here +// match the frame layout in `wasmtime_fiber_switch`. +asm_func!( + "wasmtime_fiber_start", + " + .cfi_startproc simple + .cfi_def_cfa_offset 0 + .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ + 5, /* the byte length of this expression */ \ + 0x6f, /* DW_OP_reg31(%sp) */ \ + 0x06, /* DW_OP_deref */ \ + 0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */ + .cfi_rel_offset x29, -0x10 + .cfi_rel_offset x30, -0x08 + ", + cfi_window_save!(), + " + .cfi_rel_offset x19, -0x18 + .cfi_rel_offset x20, -0x20 + .cfi_rel_offset x21, -0x28 + .cfi_rel_offset x22, -0x30 + .cfi_rel_offset x23, -0x38 + .cfi_rel_offset x24, -0x40 + .cfi_rel_offset x25, -0x48 + .cfi_rel_offset x26, -0x50 + .cfi_rel_offset x27, -0x58 + + // Load our two arguments from the stack, where x1 is our start + // procedure and x0 is its first argument. This also blows away the + // stack space used by those two arguments. + mov x0, x21 + mov x1, x19 + + // ... and then we call the function! Note that this is a function call + // so our frame stays on the stack to backtrace through. + blr x20 + // Unreachable, here for safety. This should help catch unexpected + // behaviors. Use a noticeable payload so one can grep for it in the + // codebase. + brk 0xf1b3 + .cfi_endproc + ", +); diff --git a/crates/fiber/src/unix/arm.rs b/crates/fiber/src/unix/arm.rs new file mode 100644 index 0000000000..c580ef98ce --- /dev/null +++ b/crates/fiber/src/unix/arm.rs @@ -0,0 +1,83 @@ +// A WORD OF CAUTION +// +// This entire file basically needs to be kept in sync with itself. It's not +// really possible to modify just one bit of this file without understanding +// all the other bits. Documentation tries to reference various bits here and +// there but try to make sure to read over everything before tweaking things! +// +// Also at this time this file is heavily based off the x86_64 file, so you'll +// probably want to read that one as well. + +// fn(top_of_stack(%r0): *mut u8) +asm_func!( + "wasmtime_fiber_switch", + " + // Save callee-saved registers + push {{r4-r11,lr}} + + // Swap stacks, recording our current stack pointer + ldr r4, [r0, #-0x08] + str sp, [r0, #-0x08] + mov sp, r4 + + // Restore and return + pop {{r4-r11,lr}} + bx lr + ", +); + +// fn( +// top_of_stack(%r0): *mut u8, +// entry_point(%r1): extern fn(*mut u8, *mut u8), +// entry_arg0(%r2): *mut u8, +// ) +asm_func!( + "wasmtime_fiber_init", + " + adr r3, wasmtime_fiber_start + str r3, [r0, #-0x0c] // => lr + str r0, [r0, #-0x10] // => r11 + str r1, [r0, #-0x14] // => r10 + str r2, [r0, #-0x18] // => r9 + + add r3, r0, #-0x2c + str r3, [r0, #-0x08] + bx lr + ", +); + +asm_func!( + "wasmtime_fiber_start", + " + .cfi_startproc simple + .cfi_def_cfa_offset 0 + // See the x86_64 file for more commentary on what these CFI directives + // are doing. Like over there note that the relative offsets to + // registers here match the frame layout in `wasmtime_fiber_switch`. + // + // TODO: this is only lightly tested. This gets backtraces in gdb but + // not at runtime. Perhaps the libgcc at runtime was too old? Doesn't + // support something here? Unclear. Will need investigation if someone + // ends up needing this and it still doesn't work. + .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ + 5, /* the byte length of this expression */ \ + 0x7d, 0x00, /* DW_OP_breg14(%sp) + 0 */ \ + 0x06, /* DW_OP_deref */ \ + 0x23, 0x24 /* DW_OP_plus_uconst 0x24 */ + + .cfi_rel_offset lr, -0x04 + .cfi_rel_offset r11, -0x08 + .cfi_rel_offset r10, -0x0c + .cfi_rel_offset r9, -0x10 + .cfi_rel_offset r8, -0x14 + .cfi_rel_offset r7, -0x18 + .cfi_rel_offset r6, -0x1c + .cfi_rel_offset r5, -0x20 + .cfi_rel_offset r4, -0x24 + + mov r1, r11 + mov r0, r9 + blx r10 + .cfi_endproc + ", +); diff --git a/crates/fiber/src/arch/s390x.S b/crates/fiber/src/unix/s390x.S similarity index 92% rename from crates/fiber/src/arch/s390x.S rename to crates/fiber/src/unix/s390x.S index 8d9548bbb0..a5a47118be 100644 --- a/crates/fiber/src/arch/s390x.S +++ b/crates/fiber/src/unix/s390x.S @@ -8,7 +8,13 @@ // Also at this time this file is heavily based off the x86_64 file, so you'll // probably want to read that one as well. -#include "header.h" +.text + +#define GLOBL(fnname) .globl fnname +#define HIDDEN(fnname) .hidden fnname +#define TYPE(fnname) .type fnname,@function +#define FUNCTION(fnname) fnname +#define SIZE(fnname) .size fnname,.-fnname // fn(top_of_stack(%x0): *mut u8) HIDDEN(wasmtime_fiber_switch) @@ -76,6 +82,7 @@ SIZE(wasmtime_fiber_init) TYPE(wasmtime_fiber_start) FUNCTION(wasmtime_fiber_start): .cfi_startproc simple +.cfi_def_cfa_offset 0 // See the x86_64 file for more commentary on what these CFI directives are // doing. Like over there note that the relative offsets to registers here @@ -109,4 +116,5 @@ FUNCTION(wasmtime_fiber_start): .cfi_endproc SIZE(wasmtime_fiber_start) -FOOTER +// Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/crates/fiber/src/unix/x86.rs b/crates/fiber/src/unix/x86.rs new file mode 100644 index 0000000000..88eb81df34 --- /dev/null +++ b/crates/fiber/src/unix/x86.rs @@ -0,0 +1,107 @@ +// A WORD OF CAUTION +// +// This entire file basically needs to be kept in sync with itself. It's not +// really possible to modify just one bit of this file without understanding +// all the other bits. Documentation tries to reference various bits here and +// there but try to make sure to read over everything before tweaking things! +// +// This file is modeled after x86_64.rs and comments are not copied over. For +// reference be sure to review the other file. Note that the pointer size is +// different so the reserved space at the top of the stack is 8 bytes, not 16 +// bytes. Still two pointers though. + +// fn(top_of_stack: *mut u8) +asm_func!( + "wasmtime_fiber_switch", + " + // Load our stack-to-use + mov eax, 0x4[esp] + mov ecx, -0x8[eax] + + // Save callee-saved registers + push ebp + push ebx + push esi + push edi + + // Save our current stack and jump to the stack-to-use + mov -0x8[eax], esp + mov esp, ecx + + // Restore callee-saved registers + pop edi + pop esi + pop ebx + pop ebp + ret + ", +); + +// fn( +// top_of_stack: *mut u8, +// entry_point: extern fn(*mut u8, *mut u8), +// entry_arg0: *mut u8, +// ) +asm_func!( + "wasmtime_fiber_init", + " + mov eax, 4[esp] + + // move top_of_stack to the 2nd argument + mov -0x0c[eax], eax + + // move entry_arg0 to the 1st argument + mov ecx, 12[esp] + mov -0x10[eax], ecx + + // Move our start function to the return address which the `ret` in + // `wasmtime_fiber_start` will return to. + lea ecx, wasmtime_fiber_start2 + lea ecx, wasmtime_fiber_start + mov -0x14[eax], ecx + + // And move `entry_point` to get loaded into `%ebp` through the context + // switch. This'll get jumped to in `wasmtime_fiber_start`. + mov ecx, 8[esp] + mov -0x18[eax], ecx + + // Our stack from top-to-bottom looks like: + // + // * 8 bytes of reserved space per unix.rs (two-pointers space) + // * 8 bytes of arguments (two arguments wasmtime_fiber_start forwards) + // * 4 bytes of return address + // * 16 bytes of saved registers + // + // Note that after the return address the stack is conveniently 16-byte + // aligned as required, so we just leave the arguments on the stack in + // `wasmtime_fiber_start` and immediately do the call. + lea ecx, -0x24[eax] + mov -0x08[eax], ecx + ret + ", +); + +asm_func!( + "wasmtime_fiber_start", + " + .cfi_startproc simple + .cfi_def_cfa_offset 0 + .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ + 5, /* the byte length of this expression */ \ + 0x74, 0x08, /* DW_OP_breg4 (%esp) + 8 */ \ + 0x06, /* DW_OP_deref */ \ + 0x23, 0x14 /* DW_OP_plus_uconst 0x14 */ + + .cfi_rel_offset eip, -4 + .cfi_rel_offset ebp, -8 + .cfi_rel_offset ebx, -12 + .cfi_rel_offset esi, -16 + .cfi_rel_offset edi, -20 + + // Our arguments and stack alignment are all prepped by + // `wasmtime_fiber_init`. + call ebp + ud2 + .cfi_endproc + ", +); diff --git a/crates/fiber/src/unix/x86_64.rs b/crates/fiber/src/unix/x86_64.rs new file mode 100644 index 0000000000..d057763a45 --- /dev/null +++ b/crates/fiber/src/unix/x86_64.rs @@ -0,0 +1,157 @@ +// A WORD OF CAUTION +// +// This entire file basically needs to be kept in sync with itself. It's not +// really possible to modify just one bit of this file without understanding +// all the other bits. Documentation tries to reference various bits here and +// there but try to make sure to read over everything before tweaking things! + +// fn(top_of_stack(rdi): *mut u8) +asm_func!( + "wasmtime_fiber_switch", + " + // We're switching to arbitrary code somewhere else, so pessimistically + // assume that all callee-save register are clobbered. This means we need + // to save/restore all of them. + // + // Note that this order for saving is important since we use CFI directives + // below to point to where all the saved registers are. + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + + // Load pointer that we're going to resume at and store where we're going + // to get resumed from. This is in accordance with the diagram at the top + // of unix.rs. + mov rax, -0x10[rdi] + mov -0x10[rdi], rsp + + // Swap stacks and restore all our callee-saved registers + mov rsp, rax + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + ret + ", +); + +// fn( +// top_of_stack(rdi): *mut u8, +// entry_point(rsi): extern fn(*mut u8, *mut u8), +// entry_arg0(rdx): *mut u8, +// ) +#[rustfmt::skip] +asm_func!( + "wasmtime_fiber_init", + " + // Here we're going to set up a stack frame as expected by + // `wasmtime_fiber_switch`. The values we store here will get restored into + // registers by that function and the `wasmtime_fiber_start` function will + // take over and understands which values are in which registers. + // + // The first 16 bytes of stack are reserved for metadata, so we start + // storing values beneath that. + lea rax, ", asm_sym!("wasmtime_fiber_start"), "[rip] + mov -0x18[rdi], rax + mov -0x20[rdi], rdi // loaded into rbp during switch + mov -0x28[rdi], rsi // loaded into rbx during switch + mov -0x30[rdi], rdx // loaded into r12 during switch + + // And then we specify the stack pointer resumption should begin at. Our + // `wasmtime_fiber_switch` function consumes 6 registers plus a return + // pointer, and the top 16 bytes are reserved, so that's: + // + // (6 + 1) * 16 + 16 = 0x48 + lea rax, -0x48[rdi] + mov -0x10[rdi], rax + ret + ", +); + +// This is a pretty special function that has no real signature. Its use is to +// be the "base" function of all fibers. This entrypoint is used in +// `wasmtime_fiber_init` to bootstrap the execution of a new fiber. +// +// We also use this function as a persistent frame on the stack to emit dwarf +// information to unwind into the caller. This allows us to unwind from the +// fiber's stack back to the main stack that the fiber was called from. We use +// special dwarf directives here to do so since this is a pretty nonstandard +// function. +// +// If you're curious a decent introduction to CFI things and unwinding is at +// https://www.imperialviolet.org/2017/01/18/cfi.html +asm_func!( + "wasmtime_fiber_start", + " + // Use the `simple` directive on the startproc here which indicates that + // some default settings for the platform are omitted, since this + // function is so nonstandard + .cfi_startproc simple + .cfi_def_cfa_offset 0 + + // This is where things get special, we're specifying a custom dwarf + // expression for how to calculate the CFA. The goal here is that we + // need to load the parent's stack pointer just before the call it made + // into `wasmtime_fiber_switch`. Note that the CFA value changes over + // time as well because a fiber may be resumed multiple times from + // different points on the original stack. This means that our custom + // CFA directive involves `DW_OP_deref`, which loads data from memory. + // + // The expression we're encoding here is that the CFA, the stack pointer + // of whatever called into `wasmtime_fiber_start`, is: + // + // *$rsp + 0x38 + // + // $rsp is the stack pointer of `wasmtime_fiber_start` at the time the + // next instruction after the `.cfi_escape` is executed. Our $rsp at the + // start of this function is 16 bytes below the top of the stack (0xAff0 + // in the diagram in unix.rs). The $rsp to resume at is stored at that + // location, so we dereference the stack pointer to load it. + // + // After dereferencing, though, we have the $rsp value for + // `wasmtime_fiber_switch` itself. That's a weird function which sort of + // and sort of doesn't exist on the stack. We want to point to the + // caller of `wasmtime_fiber_switch`, so to do that we need to skip the + // stack space reserved by `wasmtime_fiber_switch`, which is the 6 saved + // registers plus the return address of the caller's `call` instruction. + // Hence we offset another 0x38 bytes. + .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ + 4, /* the byte length of this expression */ \ + 0x57, /* DW_OP_reg7 (rsp) */ \ + 0x06, /* DW_OP_deref */ \ + 0x23, 0x38 /* DW_OP_plus_uconst 0x38 */ + + // And now after we've indicated where our CFA is for our parent + // function, we can define that where all of the saved registers are + // located. This uses standard `.cfi` directives which indicate that + // these registers are all stored relative to the CFA. Note that this + // order is kept in sync with the above register spills in + // `wasmtime_fiber_switch`. + .cfi_rel_offset rip, -8 + .cfi_rel_offset rbp, -16 + .cfi_rel_offset rbx, -24 + .cfi_rel_offset r12, -32 + .cfi_rel_offset r13, -40 + .cfi_rel_offset r14, -48 + .cfi_rel_offset r15, -56 + + // The body of this function is pretty similar. All our parameters are + // already loaded into registers by the switch function. The + // `wasmtime_fiber_init` routine arranged the various values to be + // materialized into the registers used here. Our job is to then move + // the values into the ABI-defined registers and call the entry-point. + // Note that `call` is used here to leave this frame on the stack so we + // can use the dwarf info here for unwinding. The trailing `ud2` is just + // for safety. + mov rdi, r12 + mov rsi, rbp + call rbx + ud2 + .cfi_endproc + ", +); diff --git a/crates/fiber/src/arch/windows.c b/crates/fiber/src/windows.c similarity index 100% rename from crates/fiber/src/arch/windows.c rename to crates/fiber/src/windows.c