Use global_asm! instead of external assembly files (#4306)

* Use `global_asm!` instead of external assembly files

This commit moves the external assembly files of the `wasmtime-fiber`
crate into `global_asm!` blocks defined in Rust. The motivation for
doing this is not very strong at this time, but the points in favor of
this are:

* One less tool needed to cross-compile Wasmtime. A linker is still
  needed but perhaps one day that will improve as well.
* A "modern" assembler, built-in to LLVM, is used instead of whatever
  appears on the system.

The first point hasn't really cropped up that much and typically getting
an assembler is just as hard as getting a linker nowadays. The second
point though has us using `hint #xx` in aarch64 assembly instead of the
actual instructions for assembler compatibility, and I believe that's no
longer necessary because the LLVM assembler supports the modern
instruction names.

The translation of the x86/x86_64 assembly has been done to Intel
syntax as well as opposed to the old AT&T syntax since that's Rust's
default. Additionally s390x still remains in an external assembler file
because `global_asm!` is still unstable in Rust on that platform.

* Simplify alignment specification

* Temporarily disable fail-fast

* Add `.cfi_def_cfa_offset 0` to fix CI

* Turn off fail-fast

* Review comments
This commit is contained in:
Alex Crichton
2022-06-27 13:20:19 -05:00
committed by GitHub
parent 0ef873f1bd
commit 4543a07bb5
16 changed files with 631 additions and 603 deletions

View File

@@ -0,0 +1,175 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well.
//
// Finally, control flow integrity hardening has been applied to the code using
// the Pointer Authentication (PAuth) and Branch Target Identification (BTI)
// technologies from the Arm instruction set architecture:
// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP`
// instructions
// * Return addresses are signed and authenticated using the stack pointer
// value as a modifier (similarly to the salt in a HMAC operation); the
// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the
// `.cfi_window_save` assembler directive) informs an unwinder about this
cfg_if::cfg_if! {
if #[cfg(target_os = "macos")] {
macro_rules! cfi_window_save { () => (); }
macro_rules! pacia1716 { () => (); }
macro_rules! paciasp { () => (); }
macro_rules! autiasp { () => (); }
} else {
macro_rules! cfi_window_save { () => (".cfi_window_save\n"); }
macro_rules! pacia1716 { () => ("pacia1716\n"); }
macro_rules! paciasp { () => ("paciasp\n"); }
macro_rules! autiasp { () => ("autiasp\n"); }
}
}
// fn(top_of_stack(%x0): *mut u8)
asm_func!(
"wasmtime_fiber_switch",
"
.cfi_startproc
",
paciasp!(),
cfi_window_save!(),
"
// Save all callee-saved registers on the stack since we're
// assuming they're clobbered as a result of the stack switch.
stp x29, x30, [sp, -16]!
stp x20, x19, [sp, -16]!
stp x22, x21, [sp, -16]!
stp x24, x23, [sp, -16]!
stp x26, x25, [sp, -16]!
stp x28, x27, [sp, -16]!
stp d9, d8, [sp, -16]!
stp d11, d10, [sp, -16]!
stp d13, d12, [sp, -16]!
stp d15, d14, [sp, -16]!
// Load our previously saved stack pointer to resume to, and save
// off our current stack pointer on where to come back to
// eventually.
ldr x8, [x0, -0x10]
mov x9, sp
str x9, [x0, -0x10]
// Switch to the new stack and restore all our callee-saved
// registers after the switch and return to our new stack.
mov sp, x8
ldp d15, d14, [sp], 16
ldp d13, d12, [sp], 16
ldp d11, d10, [sp], 16
ldp d9, d8, [sp], 16
ldp x28, x27, [sp], 16
ldp x26, x25, [sp], 16
ldp x24, x23, [sp], 16
ldp x22, x21, [sp], 16
ldp x20, x19, [sp], 16
ldp x29, x30, [sp], 16
",
autiasp!(),
cfi_window_save!(),
"
ret
.cfi_endproc
",
);
// fn(
// top_of_stack(%x0): *mut u8,
// entry_point(%x1): extern fn(*mut u8, *mut u8),
// entry_arg0(%x2): *mut u8,
// )
// We set up the newly initialized fiber, so that it resumes execution
// from wasmtime_fiber_start(). As a result, we need a signed address
// of this function, so there are 2 requirements:
// * The fiber stack pointer value that is used by the signing operation
// must match the value when the pointer is authenticated inside
// wasmtime_fiber_switch(), otherwise the latter would fault
// * We would like to use an instruction that is executed as a no-op by
// processors that do not support PAuth, so that the code is
// backward-compatible and there is no duplication; `PACIA1716` is a
// suitable one, which has the following operand register
// conventions:
// * X17 contains the pointer value to sign
// * X16 contains the modifier value
//
// TODO: Use the PACGA instruction to authenticate the saved register
// state, which avoids creating signed pointers to
// wasmtime_fiber_start(), and provides wider coverage.
#[rustfmt::skip]
asm_func!(
"wasmtime_fiber_init",
"
.cfi_startproc
hint #34 // bti c
sub x16, x0, #16
adr x17, ", asm_sym!("wasmtime_fiber_start"), "
",
pacia1716!(),
"
str x17, [x16, -0x8] // x17 => lr
str x0, [x16, -0x18] // x0 => x19
stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21
// `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for
// the original reserved 16 bytes.
add x8, x0, -0xb0
str x8, [x0, -0x10]
ret
.cfi_endproc
",
);
// See the x86_64 file for more commentary on what these CFI directives are
// doing. Like over there note that the relative offsets to registers here
// match the frame layout in `wasmtime_fiber_switch`.
asm_func!(
"wasmtime_fiber_start",
"
.cfi_startproc simple
.cfi_def_cfa_offset 0
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x6f, /* DW_OP_reg31(%sp) */ \
0x06, /* DW_OP_deref */ \
0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */
.cfi_rel_offset x29, -0x10
.cfi_rel_offset x30, -0x08
",
cfi_window_save!(),
"
.cfi_rel_offset x19, -0x18
.cfi_rel_offset x20, -0x20
.cfi_rel_offset x21, -0x28
.cfi_rel_offset x22, -0x30
.cfi_rel_offset x23, -0x38
.cfi_rel_offset x24, -0x40
.cfi_rel_offset x25, -0x48
.cfi_rel_offset x26, -0x50
.cfi_rel_offset x27, -0x58
// Load our two arguments from the stack, where x1 is our start
// procedure and x0 is its first argument. This also blows away the
// stack space used by those two arguments.
mov x0, x21
mov x1, x19
// ... and then we call the function! Note that this is a function call
// so our frame stays on the stack to backtrace through.
blr x20
// Unreachable, here for safety. This should help catch unexpected
// behaviors. Use a noticeable payload so one can grep for it in the
// codebase.
brk 0xf1b3
.cfi_endproc
",
);

View File

@@ -0,0 +1,83 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well.
// fn(top_of_stack(%r0): *mut u8)
asm_func!(
"wasmtime_fiber_switch",
"
// Save callee-saved registers
push {{r4-r11,lr}}
// Swap stacks, recording our current stack pointer
ldr r4, [r0, #-0x08]
str sp, [r0, #-0x08]
mov sp, r4
// Restore and return
pop {{r4-r11,lr}}
bx lr
",
);
// fn(
// top_of_stack(%r0): *mut u8,
// entry_point(%r1): extern fn(*mut u8, *mut u8),
// entry_arg0(%r2): *mut u8,
// )
asm_func!(
"wasmtime_fiber_init",
"
adr r3, wasmtime_fiber_start
str r3, [r0, #-0x0c] // => lr
str r0, [r0, #-0x10] // => r11
str r1, [r0, #-0x14] // => r10
str r2, [r0, #-0x18] // => r9
add r3, r0, #-0x2c
str r3, [r0, #-0x08]
bx lr
",
);
asm_func!(
"wasmtime_fiber_start",
"
.cfi_startproc simple
.cfi_def_cfa_offset 0
// See the x86_64 file for more commentary on what these CFI directives
// are doing. Like over there note that the relative offsets to
// registers here match the frame layout in `wasmtime_fiber_switch`.
//
// TODO: this is only lightly tested. This gets backtraces in gdb but
// not at runtime. Perhaps the libgcc at runtime was too old? Doesn't
// support something here? Unclear. Will need investigation if someone
// ends up needing this and it still doesn't work.
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x7d, 0x00, /* DW_OP_breg14(%sp) + 0 */ \
0x06, /* DW_OP_deref */ \
0x23, 0x24 /* DW_OP_plus_uconst 0x24 */
.cfi_rel_offset lr, -0x04
.cfi_rel_offset r11, -0x08
.cfi_rel_offset r10, -0x0c
.cfi_rel_offset r9, -0x10
.cfi_rel_offset r8, -0x14
.cfi_rel_offset r7, -0x18
.cfi_rel_offset r6, -0x1c
.cfi_rel_offset r5, -0x20
.cfi_rel_offset r4, -0x24
mov r1, r11
mov r0, r9
blx r10
.cfi_endproc
",
);

View File

@@ -0,0 +1,120 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// Also at this time this file is heavily based off the x86_64 file, so you'll
// probably want to read that one as well.
.text
#define GLOBL(fnname) .globl fnname
#define HIDDEN(fnname) .hidden fnname
#define TYPE(fnname) .type fnname,@function
#define FUNCTION(fnname) fnname
#define SIZE(fnname) .size fnname,.-fnname
// fn(top_of_stack(%x0): *mut u8)
HIDDEN(wasmtime_fiber_switch)
GLOBL(wasmtime_fiber_switch)
.p2align 2
TYPE(wasmtime_fiber_switch)
FUNCTION(wasmtime_fiber_switch):
// Save all callee-saved registers on the stack since we're assuming
// they're clobbered as a result of the stack switch.
stmg %r6, %r15, 48(%r15)
aghi %r15, -64
std %f8, 0(%r15)
std %f9, 8(%r15)
std %f10, 16(%r15)
std %f11, 24(%r15)
std %f12, 32(%r15)
std %f13, 40(%r15)
std %f14, 48(%r15)
std %f15, 56(%r15)
// Load our previously saved stack pointer to resume to, and save off our
// current stack pointer on where to come back to eventually.
lg %r1, -16(%r2)
stg %r15, -16(%r2)
// Switch to the new stack and restore all our callee-saved registers after
// the switch and return to our new stack.
ld %f8, 0(%r1)
ld %f9, 8(%r1)
ld %f10, 16(%r1)
ld %f11, 24(%r1)
ld %f12, 32(%r1)
ld %f13, 40(%r1)
ld %f14, 48(%r1)
ld %f15, 56(%r1)
lmg %r6, %r15, 112(%r1)
br %r14
SIZE(wasmtime_fiber_switch)
// fn(
// top_of_stack(%x0): *mut u8,
// entry_point(%x1): extern fn(*mut u8, *mut u8),
// entry_arg0(%x2): *mut u8,
// )
HIDDEN(wasmtime_fiber_init)
GLOBL(wasmtime_fiber_init)
.p2align 2
TYPE(wasmtime_fiber_init)
FUNCTION(wasmtime_fiber_init):
larl %r1, FUNCTION(wasmtime_fiber_start)
stg %r1, -48(%r2) // wasmtime_fiber_start - restored into %r14
stg %r2, -112(%r2) // top_of_stack - restored into %r6
stg %r3, -104(%r2) // entry_point - restored into %r7
stg %r4, -96(%r2) // entry_arg0 - restored into %r8
aghi %r2, -160 // 160 bytes register save area
stg %r2, 120(%r2) // bottom of register save area - restored into %r15
// `wasmtime_fiber_switch` has a 64 byte stack.
aghi %r2, -64
stg %r2, 208(%r2)
br %r14
SIZE(wasmtime_fiber_init)
.p2align 2
TYPE(wasmtime_fiber_start)
FUNCTION(wasmtime_fiber_start):
.cfi_startproc simple
.cfi_def_cfa_offset 0
// See the x86_64 file for more commentary on what these CFI directives are
// doing. Like over there note that the relative offsets to registers here
// match the frame layout in `wasmtime_fiber_switch`.
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
7, /* the byte length of this expression */ \
0x7f, 0x90, 0x1, /* DW_OP_breg15 0x90 */ \
0x06, /* DW_OP_deref */ \
0x23, 0xe0, 0x1 /* DW_OP_plus_uconst 0xe0 */
.cfi_rel_offset 6, -112
.cfi_rel_offset 7, -104
.cfi_rel_offset 8, -96
.cfi_rel_offset 9, -88
.cfi_rel_offset 10, -80
.cfi_rel_offset 11, -72
.cfi_rel_offset 12, -64
.cfi_rel_offset 13, -56
.cfi_rel_offset 14, -48
.cfi_rel_offset 15, -40
// Load our two arguments prepared by `wasmtime_fiber_init`.
lgr %r2, %r8 // entry_arg0
lgr %r3, %r6 // top_of_stack
// ... and then we call the function! Note that this is a function call so
// our frame stays on the stack to backtrace through.
basr %r14, %r7 // entry_point
// .. technically we shouldn't get here, so just trap.
.word 0x0000
.cfi_endproc
SIZE(wasmtime_fiber_start)
// Mark that we don't need executable stack.
.section .note.GNU-stack,"",%progbits

View File

@@ -0,0 +1,107 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// This file is modeled after x86_64.rs and comments are not copied over. For
// reference be sure to review the other file. Note that the pointer size is
// different so the reserved space at the top of the stack is 8 bytes, not 16
// bytes. Still two pointers though.
// fn(top_of_stack: *mut u8)
asm_func!(
"wasmtime_fiber_switch",
"
// Load our stack-to-use
mov eax, 0x4[esp]
mov ecx, -0x8[eax]
// Save callee-saved registers
push ebp
push ebx
push esi
push edi
// Save our current stack and jump to the stack-to-use
mov -0x8[eax], esp
mov esp, ecx
// Restore callee-saved registers
pop edi
pop esi
pop ebx
pop ebp
ret
",
);
// fn(
// top_of_stack: *mut u8,
// entry_point: extern fn(*mut u8, *mut u8),
// entry_arg0: *mut u8,
// )
asm_func!(
"wasmtime_fiber_init",
"
mov eax, 4[esp]
// move top_of_stack to the 2nd argument
mov -0x0c[eax], eax
// move entry_arg0 to the 1st argument
mov ecx, 12[esp]
mov -0x10[eax], ecx
// Move our start function to the return address which the `ret` in
// `wasmtime_fiber_start` will return to.
lea ecx, wasmtime_fiber_start2
lea ecx, wasmtime_fiber_start
mov -0x14[eax], ecx
// And move `entry_point` to get loaded into `%ebp` through the context
// switch. This'll get jumped to in `wasmtime_fiber_start`.
mov ecx, 8[esp]
mov -0x18[eax], ecx
// Our stack from top-to-bottom looks like:
//
// * 8 bytes of reserved space per unix.rs (two-pointers space)
// * 8 bytes of arguments (two arguments wasmtime_fiber_start forwards)
// * 4 bytes of return address
// * 16 bytes of saved registers
//
// Note that after the return address the stack is conveniently 16-byte
// aligned as required, so we just leave the arguments on the stack in
// `wasmtime_fiber_start` and immediately do the call.
lea ecx, -0x24[eax]
mov -0x08[eax], ecx
ret
",
);
asm_func!(
"wasmtime_fiber_start",
"
.cfi_startproc simple
.cfi_def_cfa_offset 0
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x74, 0x08, /* DW_OP_breg4 (%esp) + 8 */ \
0x06, /* DW_OP_deref */ \
0x23, 0x14 /* DW_OP_plus_uconst 0x14 */
.cfi_rel_offset eip, -4
.cfi_rel_offset ebp, -8
.cfi_rel_offset ebx, -12
.cfi_rel_offset esi, -16
.cfi_rel_offset edi, -20
// Our arguments and stack alignment are all prepped by
// `wasmtime_fiber_init`.
call ebp
ud2
.cfi_endproc
",
);

View File

@@ -0,0 +1,157 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
// fn(top_of_stack(rdi): *mut u8)
asm_func!(
"wasmtime_fiber_switch",
"
// We're switching to arbitrary code somewhere else, so pessimistically
// assume that all callee-save register are clobbered. This means we need
// to save/restore all of them.
//
// Note that this order for saving is important since we use CFI directives
// below to point to where all the saved registers are.
push rbp
push rbx
push r12
push r13
push r14
push r15
// Load pointer that we're going to resume at and store where we're going
// to get resumed from. This is in accordance with the diagram at the top
// of unix.rs.
mov rax, -0x10[rdi]
mov -0x10[rdi], rsp
// Swap stacks and restore all our callee-saved registers
mov rsp, rax
pop r15
pop r14
pop r13
pop r12
pop rbx
pop rbp
ret
",
);
// fn(
// top_of_stack(rdi): *mut u8,
// entry_point(rsi): extern fn(*mut u8, *mut u8),
// entry_arg0(rdx): *mut u8,
// )
#[rustfmt::skip]
asm_func!(
"wasmtime_fiber_init",
"
// Here we're going to set up a stack frame as expected by
// `wasmtime_fiber_switch`. The values we store here will get restored into
// registers by that function and the `wasmtime_fiber_start` function will
// take over and understands which values are in which registers.
//
// The first 16 bytes of stack are reserved for metadata, so we start
// storing values beneath that.
lea rax, ", asm_sym!("wasmtime_fiber_start"), "[rip]
mov -0x18[rdi], rax
mov -0x20[rdi], rdi // loaded into rbp during switch
mov -0x28[rdi], rsi // loaded into rbx during switch
mov -0x30[rdi], rdx // loaded into r12 during switch
// And then we specify the stack pointer resumption should begin at. Our
// `wasmtime_fiber_switch` function consumes 6 registers plus a return
// pointer, and the top 16 bytes are reserved, so that's:
//
// (6 + 1) * 16 + 16 = 0x48
lea rax, -0x48[rdi]
mov -0x10[rdi], rax
ret
",
);
// This is a pretty special function that has no real signature. Its use is to
// be the "base" function of all fibers. This entrypoint is used in
// `wasmtime_fiber_init` to bootstrap the execution of a new fiber.
//
// We also use this function as a persistent frame on the stack to emit dwarf
// information to unwind into the caller. This allows us to unwind from the
// fiber's stack back to the main stack that the fiber was called from. We use
// special dwarf directives here to do so since this is a pretty nonstandard
// function.
//
// If you're curious a decent introduction to CFI things and unwinding is at
// https://www.imperialviolet.org/2017/01/18/cfi.html
asm_func!(
"wasmtime_fiber_start",
"
// Use the `simple` directive on the startproc here which indicates that
// some default settings for the platform are omitted, since this
// function is so nonstandard
.cfi_startproc simple
.cfi_def_cfa_offset 0
// This is where things get special, we're specifying a custom dwarf
// expression for how to calculate the CFA. The goal here is that we
// need to load the parent's stack pointer just before the call it made
// into `wasmtime_fiber_switch`. Note that the CFA value changes over
// time as well because a fiber may be resumed multiple times from
// different points on the original stack. This means that our custom
// CFA directive involves `DW_OP_deref`, which loads data from memory.
//
// The expression we're encoding here is that the CFA, the stack pointer
// of whatever called into `wasmtime_fiber_start`, is:
//
// *$rsp + 0x38
//
// $rsp is the stack pointer of `wasmtime_fiber_start` at the time the
// next instruction after the `.cfi_escape` is executed. Our $rsp at the
// start of this function is 16 bytes below the top of the stack (0xAff0
// in the diagram in unix.rs). The $rsp to resume at is stored at that
// location, so we dereference the stack pointer to load it.
//
// After dereferencing, though, we have the $rsp value for
// `wasmtime_fiber_switch` itself. That's a weird function which sort of
// and sort of doesn't exist on the stack. We want to point to the
// caller of `wasmtime_fiber_switch`, so to do that we need to skip the
// stack space reserved by `wasmtime_fiber_switch`, which is the 6 saved
// registers plus the return address of the caller's `call` instruction.
// Hence we offset another 0x38 bytes.
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
4, /* the byte length of this expression */ \
0x57, /* DW_OP_reg7 (rsp) */ \
0x06, /* DW_OP_deref */ \
0x23, 0x38 /* DW_OP_plus_uconst 0x38 */
// And now after we've indicated where our CFA is for our parent
// function, we can define that where all of the saved registers are
// located. This uses standard `.cfi` directives which indicate that
// these registers are all stored relative to the CFA. Note that this
// order is kept in sync with the above register spills in
// `wasmtime_fiber_switch`.
.cfi_rel_offset rip, -8
.cfi_rel_offset rbp, -16
.cfi_rel_offset rbx, -24
.cfi_rel_offset r12, -32
.cfi_rel_offset r13, -40
.cfi_rel_offset r14, -48
.cfi_rel_offset r15, -56
// The body of this function is pretty similar. All our parameters are
// already loaded into registers by the switch function. The
// `wasmtime_fiber_init` routine arranged the various values to be
// materialized into the registers used here. Our job is to then move
// the values into the ABI-defined registers and call the entry-point.
// Note that `call` is used here to leave this frame on the stack so we
// can use the dwarf info here for unwinding. The trailing `ud2` is just
// for safety.
mov rdi, r12
mov rsi, rbp
call rbx
ud2
.cfi_endproc
",
);