Use global_asm! instead of external assembly files (#4306)
* Use `global_asm!` instead of external assembly files This commit moves the external assembly files of the `wasmtime-fiber` crate into `global_asm!` blocks defined in Rust. The motivation for doing this is not very strong at this time, but the points in favor of this are: * One less tool needed to cross-compile Wasmtime. A linker is still needed but perhaps one day that will improve as well. * A "modern" assembler, built-in to LLVM, is used instead of whatever appears on the system. The first point hasn't really cropped up that much and typically getting an assembler is just as hard as getting a linker nowadays. The second point though has us using `hint #xx` in aarch64 assembly instead of the actual instructions for assembler compatibility, and I believe that's no longer necessary because the LLVM assembler supports the modern instruction names. The translation of the x86/x86_64 assembly has been done to Intel syntax as well as opposed to the old AT&T syntax since that's Rust's default. Additionally s390x still remains in an external assembler file because `global_asm!` is still unstable in Rust on that platform. * Simplify alignment specification * Temporarily disable fail-fast * Add `.cfi_def_cfa_offset 0` to fix CI * Turn off fail-fast * Review comments
This commit is contained in:
175
crates/fiber/src/unix/aarch64.rs
Normal file
175
crates/fiber/src/unix/aarch64.rs
Normal file
@@ -0,0 +1,175 @@
|
||||
// A WORD OF CAUTION
|
||||
//
|
||||
// This entire file basically needs to be kept in sync with itself. It's not
|
||||
// really possible to modify just one bit of this file without understanding
|
||||
// all the other bits. Documentation tries to reference various bits here and
|
||||
// there but try to make sure to read over everything before tweaking things!
|
||||
//
|
||||
// Also at this time this file is heavily based off the x86_64 file, so you'll
|
||||
// probably want to read that one as well.
|
||||
//
|
||||
// Finally, control flow integrity hardening has been applied to the code using
|
||||
// the Pointer Authentication (PAuth) and Branch Target Identification (BTI)
|
||||
// technologies from the Arm instruction set architecture:
|
||||
// * All callable functions start with either the `BTI c` or `PACIASP`/`PACIBSP`
|
||||
// instructions
|
||||
// * Return addresses are signed and authenticated using the stack pointer
|
||||
// value as a modifier (similarly to the salt in a HMAC operation); the
|
||||
// `DW_CFA_AARCH64_negate_ra_state` DWARF operation (aliased with the
|
||||
// `.cfi_window_save` assembler directive) informs an unwinder about this
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "macos")] {
|
||||
macro_rules! cfi_window_save { () => (); }
|
||||
macro_rules! pacia1716 { () => (); }
|
||||
macro_rules! paciasp { () => (); }
|
||||
macro_rules! autiasp { () => (); }
|
||||
} else {
|
||||
macro_rules! cfi_window_save { () => (".cfi_window_save\n"); }
|
||||
macro_rules! pacia1716 { () => ("pacia1716\n"); }
|
||||
macro_rules! paciasp { () => ("paciasp\n"); }
|
||||
macro_rules! autiasp { () => ("autiasp\n"); }
|
||||
}
|
||||
}
|
||||
|
||||
// fn(top_of_stack(%x0): *mut u8)
|
||||
asm_func!(
|
||||
"wasmtime_fiber_switch",
|
||||
"
|
||||
.cfi_startproc
|
||||
",
|
||||
paciasp!(),
|
||||
cfi_window_save!(),
|
||||
"
|
||||
// Save all callee-saved registers on the stack since we're
|
||||
// assuming they're clobbered as a result of the stack switch.
|
||||
stp x29, x30, [sp, -16]!
|
||||
stp x20, x19, [sp, -16]!
|
||||
stp x22, x21, [sp, -16]!
|
||||
stp x24, x23, [sp, -16]!
|
||||
stp x26, x25, [sp, -16]!
|
||||
stp x28, x27, [sp, -16]!
|
||||
stp d9, d8, [sp, -16]!
|
||||
stp d11, d10, [sp, -16]!
|
||||
stp d13, d12, [sp, -16]!
|
||||
stp d15, d14, [sp, -16]!
|
||||
|
||||
// Load our previously saved stack pointer to resume to, and save
|
||||
// off our current stack pointer on where to come back to
|
||||
// eventually.
|
||||
ldr x8, [x0, -0x10]
|
||||
mov x9, sp
|
||||
str x9, [x0, -0x10]
|
||||
|
||||
// Switch to the new stack and restore all our callee-saved
|
||||
// registers after the switch and return to our new stack.
|
||||
mov sp, x8
|
||||
ldp d15, d14, [sp], 16
|
||||
ldp d13, d12, [sp], 16
|
||||
ldp d11, d10, [sp], 16
|
||||
ldp d9, d8, [sp], 16
|
||||
ldp x28, x27, [sp], 16
|
||||
ldp x26, x25, [sp], 16
|
||||
ldp x24, x23, [sp], 16
|
||||
ldp x22, x21, [sp], 16
|
||||
ldp x20, x19, [sp], 16
|
||||
ldp x29, x30, [sp], 16
|
||||
",
|
||||
autiasp!(),
|
||||
cfi_window_save!(),
|
||||
"
|
||||
ret
|
||||
.cfi_endproc
|
||||
",
|
||||
);
|
||||
|
||||
// fn(
|
||||
// top_of_stack(%x0): *mut u8,
|
||||
// entry_point(%x1): extern fn(*mut u8, *mut u8),
|
||||
// entry_arg0(%x2): *mut u8,
|
||||
// )
|
||||
// We set up the newly initialized fiber, so that it resumes execution
|
||||
// from wasmtime_fiber_start(). As a result, we need a signed address
|
||||
// of this function, so there are 2 requirements:
|
||||
// * The fiber stack pointer value that is used by the signing operation
|
||||
// must match the value when the pointer is authenticated inside
|
||||
// wasmtime_fiber_switch(), otherwise the latter would fault
|
||||
// * We would like to use an instruction that is executed as a no-op by
|
||||
// processors that do not support PAuth, so that the code is
|
||||
// backward-compatible and there is no duplication; `PACIA1716` is a
|
||||
// suitable one, which has the following operand register
|
||||
// conventions:
|
||||
// * X17 contains the pointer value to sign
|
||||
// * X16 contains the modifier value
|
||||
//
|
||||
// TODO: Use the PACGA instruction to authenticate the saved register
|
||||
// state, which avoids creating signed pointers to
|
||||
// wasmtime_fiber_start(), and provides wider coverage.
|
||||
#[rustfmt::skip]
|
||||
asm_func!(
|
||||
"wasmtime_fiber_init",
|
||||
"
|
||||
.cfi_startproc
|
||||
hint #34 // bti c
|
||||
sub x16, x0, #16
|
||||
adr x17, ", asm_sym!("wasmtime_fiber_start"), "
|
||||
",
|
||||
pacia1716!(),
|
||||
"
|
||||
str x17, [x16, -0x8] // x17 => lr
|
||||
str x0, [x16, -0x18] // x0 => x19
|
||||
stp x2, x1, [x0, -0x38] // x1 => x20, x2 => x21
|
||||
|
||||
// `wasmtime_fiber_switch` has an 0xa0 byte stack, and we add 0x10 more for
|
||||
// the original reserved 16 bytes.
|
||||
add x8, x0, -0xb0
|
||||
str x8, [x0, -0x10]
|
||||
ret
|
||||
.cfi_endproc
|
||||
",
|
||||
);
|
||||
|
||||
// See the x86_64 file for more commentary on what these CFI directives are
|
||||
// doing. Like over there note that the relative offsets to registers here
|
||||
// match the frame layout in `wasmtime_fiber_switch`.
|
||||
asm_func!(
|
||||
"wasmtime_fiber_start",
|
||||
"
|
||||
.cfi_startproc simple
|
||||
.cfi_def_cfa_offset 0
|
||||
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
|
||||
5, /* the byte length of this expression */ \
|
||||
0x6f, /* DW_OP_reg31(%sp) */ \
|
||||
0x06, /* DW_OP_deref */ \
|
||||
0x23, 0xa0, 0x1 /* DW_OP_plus_uconst 0xa0 */
|
||||
.cfi_rel_offset x29, -0x10
|
||||
.cfi_rel_offset x30, -0x08
|
||||
",
|
||||
cfi_window_save!(),
|
||||
"
|
||||
.cfi_rel_offset x19, -0x18
|
||||
.cfi_rel_offset x20, -0x20
|
||||
.cfi_rel_offset x21, -0x28
|
||||
.cfi_rel_offset x22, -0x30
|
||||
.cfi_rel_offset x23, -0x38
|
||||
.cfi_rel_offset x24, -0x40
|
||||
.cfi_rel_offset x25, -0x48
|
||||
.cfi_rel_offset x26, -0x50
|
||||
.cfi_rel_offset x27, -0x58
|
||||
|
||||
// Load our two arguments from the stack, where x1 is our start
|
||||
// procedure and x0 is its first argument. This also blows away the
|
||||
// stack space used by those two arguments.
|
||||
mov x0, x21
|
||||
mov x1, x19
|
||||
|
||||
// ... and then we call the function! Note that this is a function call
|
||||
// so our frame stays on the stack to backtrace through.
|
||||
blr x20
|
||||
// Unreachable, here for safety. This should help catch unexpected
|
||||
// behaviors. Use a noticeable payload so one can grep for it in the
|
||||
// codebase.
|
||||
brk 0xf1b3
|
||||
.cfi_endproc
|
||||
",
|
||||
);
|
||||
83
crates/fiber/src/unix/arm.rs
Normal file
83
crates/fiber/src/unix/arm.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
// A WORD OF CAUTION
|
||||
//
|
||||
// This entire file basically needs to be kept in sync with itself. It's not
|
||||
// really possible to modify just one bit of this file without understanding
|
||||
// all the other bits. Documentation tries to reference various bits here and
|
||||
// there but try to make sure to read over everything before tweaking things!
|
||||
//
|
||||
// Also at this time this file is heavily based off the x86_64 file, so you'll
|
||||
// probably want to read that one as well.
|
||||
|
||||
// fn(top_of_stack(%r0): *mut u8)
|
||||
asm_func!(
|
||||
"wasmtime_fiber_switch",
|
||||
"
|
||||
// Save callee-saved registers
|
||||
push {{r4-r11,lr}}
|
||||
|
||||
// Swap stacks, recording our current stack pointer
|
||||
ldr r4, [r0, #-0x08]
|
||||
str sp, [r0, #-0x08]
|
||||
mov sp, r4
|
||||
|
||||
// Restore and return
|
||||
pop {{r4-r11,lr}}
|
||||
bx lr
|
||||
",
|
||||
);
|
||||
|
||||
// fn(
|
||||
// top_of_stack(%r0): *mut u8,
|
||||
// entry_point(%r1): extern fn(*mut u8, *mut u8),
|
||||
// entry_arg0(%r2): *mut u8,
|
||||
// )
|
||||
asm_func!(
|
||||
"wasmtime_fiber_init",
|
||||
"
|
||||
adr r3, wasmtime_fiber_start
|
||||
str r3, [r0, #-0x0c] // => lr
|
||||
str r0, [r0, #-0x10] // => r11
|
||||
str r1, [r0, #-0x14] // => r10
|
||||
str r2, [r0, #-0x18] // => r9
|
||||
|
||||
add r3, r0, #-0x2c
|
||||
str r3, [r0, #-0x08]
|
||||
bx lr
|
||||
",
|
||||
);
|
||||
|
||||
asm_func!(
|
||||
"wasmtime_fiber_start",
|
||||
"
|
||||
.cfi_startproc simple
|
||||
.cfi_def_cfa_offset 0
|
||||
// See the x86_64 file for more commentary on what these CFI directives
|
||||
// are doing. Like over there note that the relative offsets to
|
||||
// registers here match the frame layout in `wasmtime_fiber_switch`.
|
||||
//
|
||||
// TODO: this is only lightly tested. This gets backtraces in gdb but
|
||||
// not at runtime. Perhaps the libgcc at runtime was too old? Doesn't
|
||||
// support something here? Unclear. Will need investigation if someone
|
||||
// ends up needing this and it still doesn't work.
|
||||
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
|
||||
5, /* the byte length of this expression */ \
|
||||
0x7d, 0x00, /* DW_OP_breg14(%sp) + 0 */ \
|
||||
0x06, /* DW_OP_deref */ \
|
||||
0x23, 0x24 /* DW_OP_plus_uconst 0x24 */
|
||||
|
||||
.cfi_rel_offset lr, -0x04
|
||||
.cfi_rel_offset r11, -0x08
|
||||
.cfi_rel_offset r10, -0x0c
|
||||
.cfi_rel_offset r9, -0x10
|
||||
.cfi_rel_offset r8, -0x14
|
||||
.cfi_rel_offset r7, -0x18
|
||||
.cfi_rel_offset r6, -0x1c
|
||||
.cfi_rel_offset r5, -0x20
|
||||
.cfi_rel_offset r4, -0x24
|
||||
|
||||
mov r1, r11
|
||||
mov r0, r9
|
||||
blx r10
|
||||
.cfi_endproc
|
||||
",
|
||||
);
|
||||
120
crates/fiber/src/unix/s390x.S
Normal file
120
crates/fiber/src/unix/s390x.S
Normal file
@@ -0,0 +1,120 @@
|
||||
// A WORD OF CAUTION
|
||||
//
|
||||
// This entire file basically needs to be kept in sync with itself. It's not
|
||||
// really possible to modify just one bit of this file without understanding
|
||||
// all the other bits. Documentation tries to reference various bits here and
|
||||
// there but try to make sure to read over everything before tweaking things!
|
||||
//
|
||||
// Also at this time this file is heavily based off the x86_64 file, so you'll
|
||||
// probably want to read that one as well.
|
||||
|
||||
.text
|
||||
|
||||
#define GLOBL(fnname) .globl fnname
|
||||
#define HIDDEN(fnname) .hidden fnname
|
||||
#define TYPE(fnname) .type fnname,@function
|
||||
#define FUNCTION(fnname) fnname
|
||||
#define SIZE(fnname) .size fnname,.-fnname
|
||||
|
||||
// fn(top_of_stack(%x0): *mut u8)
|
||||
HIDDEN(wasmtime_fiber_switch)
|
||||
GLOBL(wasmtime_fiber_switch)
|
||||
.p2align 2
|
||||
TYPE(wasmtime_fiber_switch)
|
||||
FUNCTION(wasmtime_fiber_switch):
|
||||
// Save all callee-saved registers on the stack since we're assuming
|
||||
// they're clobbered as a result of the stack switch.
|
||||
stmg %r6, %r15, 48(%r15)
|
||||
aghi %r15, -64
|
||||
std %f8, 0(%r15)
|
||||
std %f9, 8(%r15)
|
||||
std %f10, 16(%r15)
|
||||
std %f11, 24(%r15)
|
||||
std %f12, 32(%r15)
|
||||
std %f13, 40(%r15)
|
||||
std %f14, 48(%r15)
|
||||
std %f15, 56(%r15)
|
||||
|
||||
// Load our previously saved stack pointer to resume to, and save off our
|
||||
// current stack pointer on where to come back to eventually.
|
||||
lg %r1, -16(%r2)
|
||||
stg %r15, -16(%r2)
|
||||
|
||||
// Switch to the new stack and restore all our callee-saved registers after
|
||||
// the switch and return to our new stack.
|
||||
ld %f8, 0(%r1)
|
||||
ld %f9, 8(%r1)
|
||||
ld %f10, 16(%r1)
|
||||
ld %f11, 24(%r1)
|
||||
ld %f12, 32(%r1)
|
||||
ld %f13, 40(%r1)
|
||||
ld %f14, 48(%r1)
|
||||
ld %f15, 56(%r1)
|
||||
lmg %r6, %r15, 112(%r1)
|
||||
br %r14
|
||||
SIZE(wasmtime_fiber_switch)
|
||||
|
||||
// fn(
|
||||
// top_of_stack(%x0): *mut u8,
|
||||
// entry_point(%x1): extern fn(*mut u8, *mut u8),
|
||||
// entry_arg0(%x2): *mut u8,
|
||||
// )
|
||||
HIDDEN(wasmtime_fiber_init)
|
||||
GLOBL(wasmtime_fiber_init)
|
||||
.p2align 2
|
||||
TYPE(wasmtime_fiber_init)
|
||||
FUNCTION(wasmtime_fiber_init):
|
||||
larl %r1, FUNCTION(wasmtime_fiber_start)
|
||||
stg %r1, -48(%r2) // wasmtime_fiber_start - restored into %r14
|
||||
stg %r2, -112(%r2) // top_of_stack - restored into %r6
|
||||
stg %r3, -104(%r2) // entry_point - restored into %r7
|
||||
stg %r4, -96(%r2) // entry_arg0 - restored into %r8
|
||||
aghi %r2, -160 // 160 bytes register save area
|
||||
stg %r2, 120(%r2) // bottom of register save area - restored into %r15
|
||||
|
||||
// `wasmtime_fiber_switch` has a 64 byte stack.
|
||||
aghi %r2, -64
|
||||
stg %r2, 208(%r2)
|
||||
br %r14
|
||||
SIZE(wasmtime_fiber_init)
|
||||
|
||||
.p2align 2
|
||||
TYPE(wasmtime_fiber_start)
|
||||
FUNCTION(wasmtime_fiber_start):
|
||||
.cfi_startproc simple
|
||||
.cfi_def_cfa_offset 0
|
||||
|
||||
// See the x86_64 file for more commentary on what these CFI directives are
|
||||
// doing. Like over there note that the relative offsets to registers here
|
||||
// match the frame layout in `wasmtime_fiber_switch`.
|
||||
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
|
||||
7, /* the byte length of this expression */ \
|
||||
0x7f, 0x90, 0x1, /* DW_OP_breg15 0x90 */ \
|
||||
0x06, /* DW_OP_deref */ \
|
||||
0x23, 0xe0, 0x1 /* DW_OP_plus_uconst 0xe0 */
|
||||
|
||||
.cfi_rel_offset 6, -112
|
||||
.cfi_rel_offset 7, -104
|
||||
.cfi_rel_offset 8, -96
|
||||
.cfi_rel_offset 9, -88
|
||||
.cfi_rel_offset 10, -80
|
||||
.cfi_rel_offset 11, -72
|
||||
.cfi_rel_offset 12, -64
|
||||
.cfi_rel_offset 13, -56
|
||||
.cfi_rel_offset 14, -48
|
||||
.cfi_rel_offset 15, -40
|
||||
|
||||
// Load our two arguments prepared by `wasmtime_fiber_init`.
|
||||
lgr %r2, %r8 // entry_arg0
|
||||
lgr %r3, %r6 // top_of_stack
|
||||
|
||||
// ... and then we call the function! Note that this is a function call so
|
||||
// our frame stays on the stack to backtrace through.
|
||||
basr %r14, %r7 // entry_point
|
||||
// .. technically we shouldn't get here, so just trap.
|
||||
.word 0x0000
|
||||
.cfi_endproc
|
||||
SIZE(wasmtime_fiber_start)
|
||||
|
||||
// Mark that we don't need executable stack.
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
107
crates/fiber/src/unix/x86.rs
Normal file
107
crates/fiber/src/unix/x86.rs
Normal file
@@ -0,0 +1,107 @@
|
||||
// A WORD OF CAUTION
|
||||
//
|
||||
// This entire file basically needs to be kept in sync with itself. It's not
|
||||
// really possible to modify just one bit of this file without understanding
|
||||
// all the other bits. Documentation tries to reference various bits here and
|
||||
// there but try to make sure to read over everything before tweaking things!
|
||||
//
|
||||
// This file is modeled after x86_64.rs and comments are not copied over. For
|
||||
// reference be sure to review the other file. Note that the pointer size is
|
||||
// different so the reserved space at the top of the stack is 8 bytes, not 16
|
||||
// bytes. Still two pointers though.
|
||||
|
||||
// fn(top_of_stack: *mut u8)
|
||||
asm_func!(
|
||||
"wasmtime_fiber_switch",
|
||||
"
|
||||
// Load our stack-to-use
|
||||
mov eax, 0x4[esp]
|
||||
mov ecx, -0x8[eax]
|
||||
|
||||
// Save callee-saved registers
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
|
||||
// Save our current stack and jump to the stack-to-use
|
||||
mov -0x8[eax], esp
|
||||
mov esp, ecx
|
||||
|
||||
// Restore callee-saved registers
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
",
|
||||
);
|
||||
|
||||
// fn(
|
||||
// top_of_stack: *mut u8,
|
||||
// entry_point: extern fn(*mut u8, *mut u8),
|
||||
// entry_arg0: *mut u8,
|
||||
// )
|
||||
asm_func!(
|
||||
"wasmtime_fiber_init",
|
||||
"
|
||||
mov eax, 4[esp]
|
||||
|
||||
// move top_of_stack to the 2nd argument
|
||||
mov -0x0c[eax], eax
|
||||
|
||||
// move entry_arg0 to the 1st argument
|
||||
mov ecx, 12[esp]
|
||||
mov -0x10[eax], ecx
|
||||
|
||||
// Move our start function to the return address which the `ret` in
|
||||
// `wasmtime_fiber_start` will return to.
|
||||
lea ecx, wasmtime_fiber_start2
|
||||
lea ecx, wasmtime_fiber_start
|
||||
mov -0x14[eax], ecx
|
||||
|
||||
// And move `entry_point` to get loaded into `%ebp` through the context
|
||||
// switch. This'll get jumped to in `wasmtime_fiber_start`.
|
||||
mov ecx, 8[esp]
|
||||
mov -0x18[eax], ecx
|
||||
|
||||
// Our stack from top-to-bottom looks like:
|
||||
//
|
||||
// * 8 bytes of reserved space per unix.rs (two-pointers space)
|
||||
// * 8 bytes of arguments (two arguments wasmtime_fiber_start forwards)
|
||||
// * 4 bytes of return address
|
||||
// * 16 bytes of saved registers
|
||||
//
|
||||
// Note that after the return address the stack is conveniently 16-byte
|
||||
// aligned as required, so we just leave the arguments on the stack in
|
||||
// `wasmtime_fiber_start` and immediately do the call.
|
||||
lea ecx, -0x24[eax]
|
||||
mov -0x08[eax], ecx
|
||||
ret
|
||||
",
|
||||
);
|
||||
|
||||
asm_func!(
|
||||
"wasmtime_fiber_start",
|
||||
"
|
||||
.cfi_startproc simple
|
||||
.cfi_def_cfa_offset 0
|
||||
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
|
||||
5, /* the byte length of this expression */ \
|
||||
0x74, 0x08, /* DW_OP_breg4 (%esp) + 8 */ \
|
||||
0x06, /* DW_OP_deref */ \
|
||||
0x23, 0x14 /* DW_OP_plus_uconst 0x14 */
|
||||
|
||||
.cfi_rel_offset eip, -4
|
||||
.cfi_rel_offset ebp, -8
|
||||
.cfi_rel_offset ebx, -12
|
||||
.cfi_rel_offset esi, -16
|
||||
.cfi_rel_offset edi, -20
|
||||
|
||||
// Our arguments and stack alignment are all prepped by
|
||||
// `wasmtime_fiber_init`.
|
||||
call ebp
|
||||
ud2
|
||||
.cfi_endproc
|
||||
",
|
||||
);
|
||||
157
crates/fiber/src/unix/x86_64.rs
Normal file
157
crates/fiber/src/unix/x86_64.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
// A WORD OF CAUTION
|
||||
//
|
||||
// This entire file basically needs to be kept in sync with itself. It's not
|
||||
// really possible to modify just one bit of this file without understanding
|
||||
// all the other bits. Documentation tries to reference various bits here and
|
||||
// there but try to make sure to read over everything before tweaking things!
|
||||
|
||||
// fn(top_of_stack(rdi): *mut u8)
|
||||
asm_func!(
|
||||
"wasmtime_fiber_switch",
|
||||
"
|
||||
// We're switching to arbitrary code somewhere else, so pessimistically
|
||||
// assume that all callee-save register are clobbered. This means we need
|
||||
// to save/restore all of them.
|
||||
//
|
||||
// Note that this order for saving is important since we use CFI directives
|
||||
// below to point to where all the saved registers are.
|
||||
push rbp
|
||||
push rbx
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
// Load pointer that we're going to resume at and store where we're going
|
||||
// to get resumed from. This is in accordance with the diagram at the top
|
||||
// of unix.rs.
|
||||
mov rax, -0x10[rdi]
|
||||
mov -0x10[rdi], rsp
|
||||
|
||||
// Swap stacks and restore all our callee-saved registers
|
||||
mov rsp, rax
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbx
|
||||
pop rbp
|
||||
ret
|
||||
",
|
||||
);
|
||||
|
||||
// fn(
|
||||
// top_of_stack(rdi): *mut u8,
|
||||
// entry_point(rsi): extern fn(*mut u8, *mut u8),
|
||||
// entry_arg0(rdx): *mut u8,
|
||||
// )
|
||||
#[rustfmt::skip]
|
||||
asm_func!(
|
||||
"wasmtime_fiber_init",
|
||||
"
|
||||
// Here we're going to set up a stack frame as expected by
|
||||
// `wasmtime_fiber_switch`. The values we store here will get restored into
|
||||
// registers by that function and the `wasmtime_fiber_start` function will
|
||||
// take over and understands which values are in which registers.
|
||||
//
|
||||
// The first 16 bytes of stack are reserved for metadata, so we start
|
||||
// storing values beneath that.
|
||||
lea rax, ", asm_sym!("wasmtime_fiber_start"), "[rip]
|
||||
mov -0x18[rdi], rax
|
||||
mov -0x20[rdi], rdi // loaded into rbp during switch
|
||||
mov -0x28[rdi], rsi // loaded into rbx during switch
|
||||
mov -0x30[rdi], rdx // loaded into r12 during switch
|
||||
|
||||
// And then we specify the stack pointer resumption should begin at. Our
|
||||
// `wasmtime_fiber_switch` function consumes 6 registers plus a return
|
||||
// pointer, and the top 16 bytes are reserved, so that's:
|
||||
//
|
||||
// (6 + 1) * 16 + 16 = 0x48
|
||||
lea rax, -0x48[rdi]
|
||||
mov -0x10[rdi], rax
|
||||
ret
|
||||
",
|
||||
);
|
||||
|
||||
// This is a pretty special function that has no real signature. Its use is to
|
||||
// be the "base" function of all fibers. This entrypoint is used in
|
||||
// `wasmtime_fiber_init` to bootstrap the execution of a new fiber.
|
||||
//
|
||||
// We also use this function as a persistent frame on the stack to emit dwarf
|
||||
// information to unwind into the caller. This allows us to unwind from the
|
||||
// fiber's stack back to the main stack that the fiber was called from. We use
|
||||
// special dwarf directives here to do so since this is a pretty nonstandard
|
||||
// function.
|
||||
//
|
||||
// If you're curious a decent introduction to CFI things and unwinding is at
|
||||
// https://www.imperialviolet.org/2017/01/18/cfi.html
|
||||
asm_func!(
|
||||
"wasmtime_fiber_start",
|
||||
"
|
||||
// Use the `simple` directive on the startproc here which indicates that
|
||||
// some default settings for the platform are omitted, since this
|
||||
// function is so nonstandard
|
||||
.cfi_startproc simple
|
||||
.cfi_def_cfa_offset 0
|
||||
|
||||
// This is where things get special, we're specifying a custom dwarf
|
||||
// expression for how to calculate the CFA. The goal here is that we
|
||||
// need to load the parent's stack pointer just before the call it made
|
||||
// into `wasmtime_fiber_switch`. Note that the CFA value changes over
|
||||
// time as well because a fiber may be resumed multiple times from
|
||||
// different points on the original stack. This means that our custom
|
||||
// CFA directive involves `DW_OP_deref`, which loads data from memory.
|
||||
//
|
||||
// The expression we're encoding here is that the CFA, the stack pointer
|
||||
// of whatever called into `wasmtime_fiber_start`, is:
|
||||
//
|
||||
// *$rsp + 0x38
|
||||
//
|
||||
// $rsp is the stack pointer of `wasmtime_fiber_start` at the time the
|
||||
// next instruction after the `.cfi_escape` is executed. Our $rsp at the
|
||||
// start of this function is 16 bytes below the top of the stack (0xAff0
|
||||
// in the diagram in unix.rs). The $rsp to resume at is stored at that
|
||||
// location, so we dereference the stack pointer to load it.
|
||||
//
|
||||
// After dereferencing, though, we have the $rsp value for
|
||||
// `wasmtime_fiber_switch` itself. That's a weird function which sort of
|
||||
// and sort of doesn't exist on the stack. We want to point to the
|
||||
// caller of `wasmtime_fiber_switch`, so to do that we need to skip the
|
||||
// stack space reserved by `wasmtime_fiber_switch`, which is the 6 saved
|
||||
// registers plus the return address of the caller's `call` instruction.
|
||||
// Hence we offset another 0x38 bytes.
|
||||
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
|
||||
4, /* the byte length of this expression */ \
|
||||
0x57, /* DW_OP_reg7 (rsp) */ \
|
||||
0x06, /* DW_OP_deref */ \
|
||||
0x23, 0x38 /* DW_OP_plus_uconst 0x38 */
|
||||
|
||||
// And now after we've indicated where our CFA is for our parent
|
||||
// function, we can define that where all of the saved registers are
|
||||
// located. This uses standard `.cfi` directives which indicate that
|
||||
// these registers are all stored relative to the CFA. Note that this
|
||||
// order is kept in sync with the above register spills in
|
||||
// `wasmtime_fiber_switch`.
|
||||
.cfi_rel_offset rip, -8
|
||||
.cfi_rel_offset rbp, -16
|
||||
.cfi_rel_offset rbx, -24
|
||||
.cfi_rel_offset r12, -32
|
||||
.cfi_rel_offset r13, -40
|
||||
.cfi_rel_offset r14, -48
|
||||
.cfi_rel_offset r15, -56
|
||||
|
||||
// The body of this function is pretty similar. All our parameters are
|
||||
// already loaded into registers by the switch function. The
|
||||
// `wasmtime_fiber_init` routine arranged the various values to be
|
||||
// materialized into the registers used here. Our job is to then move
|
||||
// the values into the ABI-defined registers and call the entry-point.
|
||||
// Note that `call` is used here to leave this frame on the stack so we
|
||||
// can use the dwarf info here for unwinding. The trailing `ud2` is just
|
||||
// for safety.
|
||||
mov rdi, r12
|
||||
mov rsi, rbp
|
||||
call rbx
|
||||
ud2
|
||||
.cfi_endproc
|
||||
",
|
||||
);
|
||||
Reference in New Issue
Block a user