Use __builtin_setjmp instead of sigsetjmp. (#3360)
* Use `__builtin_setjmp` instead of `sigsetjmp`. Use [`__builtin_setjmp`] instead of `sigsetjmp`, as it is implemented in the compiler, performed inline, and saves much less state. This speeds up calls into wasm by about 8% on my machine. [`__builtin_setjmp`]: https://gcc.gnu.org/onlinedocs/gcc/Nonlocal-Gotos.html * Add a comment confirming that 5 really is the documented size. * Add a comment about callee-saved state and __builtin_setjmp. * On clang on aarch64, use sigsetjmp. * Fix a stray `#endif`.
This commit is contained in:
@@ -2,18 +2,44 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef CFG_TARGET_OS_windows
|
||||
|
||||
#define platform_setjmp(buf) setjmp(buf)
|
||||
#define platform_longjmp(buf, arg) longjmp(buf, arg)
|
||||
typedef jmp_buf platform_jmp_buf;
|
||||
|
||||
#elif defined(__clang__) && defined(__aarch64__)
|
||||
|
||||
// Clang on aarch64 doesn't support `__builtin_setjmp`, so use `sigsetjmp`
|
||||
// from libc.
|
||||
//
|
||||
// Note that `sigsetjmp` and `siglongjmp` are used here where possible to
|
||||
// explicitly pass a 0 argument to `sigsetjmp` that we don't need to preserve
|
||||
// the process signal mask. This should make this call a bit faster b/c it
|
||||
// doesn't need to touch the kernel signal handling routines.
|
||||
#ifdef CFG_TARGET_OS_windows
|
||||
#define platform_setjmp(buf) setjmp(buf)
|
||||
#define platform_longjmp(buf, arg) longjmp(buf, arg)
|
||||
#define platform_jmp_buf jmp_buf
|
||||
#else
|
||||
#define platform_setjmp(buf) sigsetjmp(buf, 0)
|
||||
#define platform_longjmp(buf, arg) siglongjmp(buf, arg)
|
||||
#define platform_jmp_buf sigjmp_buf
|
||||
typedef sigjmp_buf platform_jmp_buf;
|
||||
|
||||
#else
|
||||
|
||||
// GCC and Clang both provide `__builtin_setjmp`/`__builtin_longjmp`, which
|
||||
// differ from plain `setjmp` and `longjmp` in that they're implemented by
|
||||
// the compiler inline rather than in libc, and the compiler can avoid saving
|
||||
// and restoring most of the registers. See the [GCC docs] and [clang docs]
|
||||
// for more information.
|
||||
//
|
||||
// Per the caveat in the GCC docs, this assumes that the host compiler (which
|
||||
// may be compiling for a generic architecture family) knows about all the
|
||||
// register state that Cranelift (which may be specializing for the hardware at
|
||||
// runtime) is assuming is callee-saved.
|
||||
//
|
||||
// [GCC docs]: https://gcc.gnu.org/onlinedocs/gcc/Nonlocal-Gotos.html
|
||||
// [clang docs]: https://llvm.org/docs/ExceptionHandling.html#llvm-eh-sjlj-setjmp
|
||||
#define platform_setjmp(buf) __builtin_setjmp(buf)
|
||||
#define platform_longjmp(buf, arg) __builtin_longjmp(buf, arg)
|
||||
typedef void *platform_jmp_buf[5]; // this is the documented size; see the docs links for details.
|
||||
|
||||
#endif
|
||||
|
||||
int wasmtime_setjmp(
|
||||
|
||||
Reference in New Issue
Block a user