Files
wasmtime/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
Alex Crichton a3b21031d4 Add a MachBuffer::defer_trap method (#6011)
* Add a `MachBuffer::defer_trap` method

This commit adds a new method to `MachBuffer` to defer trap opcodes to
the end of a function in a similar manner to how constants are deferred
to the end of the function. This is useful for backends which frequently
use `TrapIf`-style opcodes. Currently a jump is emitted which skips the
next instruction, a trap, and then execution continues normally. While
there isn't any pressing problem with this construction the trap opcode
is in the middle of the instruction stream as opposed to "off on the
side" despite rarely being taken.

With this method in place all the backends (except riscv64 since I
couldn't figure it out easily enough) have a new lowering of their
`TrapIf` opcode. Now a trap is deferred, which returns a label, and then
that label is jumped to when executing the trap. A fixup is then
recorded in `MachBuffer` to get patched later on during emission, or at
the end of the function. Subsequently all `TrapIf` instructions
translate to a single branch plus a single trap at the end of the
function.

I've additionally further updated some more lowerings in the x64 backend
which were explicitly using traps to instead use `TrapIf` where
applicable to avoid jumping over traps mid-function. Other backends
didn't appear to have many jump-over-the-next-trap patterns.

Lots of tests have had their expectations updated here which should
reflect all the traps being sunk to the end of functions.

* Print trap code on all platforms

* Emit traps before constants

* Preserve source location information for traps

* Fix test expectations

* Attempt to fix s390x

The MachBuffer was registering trap codes with the first byte of the
trap, but the SIGILL handler was expecting it to be registered with the
last byte of the trap. Exploit that SIGILL is always represented with a
2-byte instruction and always march 2-backwards for SIGILL, continuing
to march backwards 1 byte for SIGFPE-generating instructions.

* Back out s390x changes

* Back out more s390x bits

* Review comments
2023-03-20 21:24:47 +00:00

341 lines
6.6 KiB
Plaintext

test compile precise-output
set unwind_info=false
target aarch64
function %foo() {
block0:
return
}
; VCode:
; block0:
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ret
function %stack_limit_leaf_zero(i64 stack_limit) {
block0(v0: i64):
return
}
; VCode:
; block0:
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ret
function %stack_limit_gv_leaf_zero(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i64 notrap aligned gv1+4
stack_limit = gv2
block0(v0: i64):
return
}
; VCode:
; block0:
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ret
function %stack_limit_call_zero(i64 stack_limit) {
fn0 = %foo()
block0(v0: i64):
call fn0()
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; subs xzr, sp, x0, UXTX
; b.lo #trap=stk_ovf
; block0:
; load_ext_name x2, TestCase(%foo)+0
; blr x2
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; cmp sp, x0
; b.lo #0x2c
; block1: ; offset 0x10
; ldr x2, #0x18
; b #0x20
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
function %stack_limit_gv_call_zero(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i64 notrap aligned gv1+4
stack_limit = gv2
fn0 = %foo()
block0(v0: i64):
call fn0()
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
; load_ext_name x2, TestCase(%foo)+0
; blr x2
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; ldur x16, [x0]
; ldur x16, [x16, #4]
; cmp sp, x16
; b.lo #0x34
; block1: ; offset 0x18
; ldr x2, #0x20
; b #0x28
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
function %stack_limit(i64 stack_limit) {
ss0 = explicit_slot 168
block0(v0: i64):
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; add x16, x0, #176
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; sub sp, sp, #176
; block0:
; add sp, sp, #176
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; add x16, x0, #0xb0
; cmp sp, x16
; b.lo #0x24
; sub sp, sp, #0xb0
; block1: ; offset 0x18
; add sp, sp, #0xb0
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
function %huge_stack_limit(i64 stack_limit) {
ss0 = explicit_slot 400000
block0(v0: i64):
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; subs xzr, sp, x0, UXTX
; b.lo #trap=stk_ovf
; movz w17, #6784
; movk w17, w17, #6, LSL #16
; add x16, x0, x17, UXTX
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; movz w16, #6784
; movk w16, w16, #6, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; movz w16, #6784
; movk w16, w16, #6, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; cmp sp, x0
; b.lo #0x44
; mov w17, #0x1a80
; movk w17, #6, lsl #16
; add x16, x0, x17, uxtx
; cmp sp, x16
; b.lo #0x48
; mov w16, #0x1a80
; movk w16, #6, lsl #16
; sub sp, sp, x16
; block1: ; offset 0x30
; mov w16, #0x1a80
; movk w16, #6, lsl #16
; add sp, sp, x16
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
function %limit_preamble(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i64 notrap aligned gv1+4
stack_limit = gv2
ss0 = explicit_slot 20
block0(v0: i64):
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; add x16, x16, #32
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; sub sp, sp, #32
; block0:
; add sp, sp, #32
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; ldur x16, [x0]
; ldur x16, [x16, #4]
; add x16, x16, #0x20
; cmp sp, x16
; b.lo #0x2c
; sub sp, sp, #0x20
; block1: ; offset 0x20
; add sp, sp, #0x20
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
function %limit_preamble_huge(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i64 notrap aligned gv1+4
stack_limit = gv2
ss0 = explicit_slot 400000
block0(v0: i64):
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; movz w17, #6784
; movk w17, w17, #6, LSL #16
; add x16, x16, x17, UXTX
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; movz w16, #6784
; movk w16, w16, #6, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; movz w16, #6784
; movk w16, w16, #6, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; ldur x16, [x0]
; ldur x16, [x16, #4]
; cmp sp, x16
; b.lo #0x4c
; mov w17, #0x1a80
; movk w17, #6, lsl #16
; add x16, x16, x17, uxtx
; cmp sp, x16
; b.lo #0x50
; mov w16, #0x1a80
; movk w16, #6, lsl #16
; sub sp, sp, x16
; block1: ; offset 0x38
; mov w16, #0x1a80
; movk w16, #6, lsl #16
; add sp, sp, x16
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
function %limit_preamble_huge_offset(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0+400000
stack_limit = gv1
ss0 = explicit_slot 20
block0(v0: i64):
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; movz w16, #6784 ; movk w16, w16, #6, LSL #16 ; ldr x16, [x0, x16, SXTX]
; add x16, x16, #32
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; sub sp, sp, #32
; block0:
; add sp, sp, #32
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; mov w16, #0x1a80
; movk w16, #6, lsl #16
; ldr x16, [x0, x16, sxtx]
; add x16, x16, #0x20
; cmp sp, x16
; b.lo #0x30
; sub sp, sp, #0x20
; block1: ; offset 0x24
; add sp, sp, #0x20
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf