Files
wasmtime/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
Jamey Sharp 9cb987c678 Don't limit ExternalName::TestName length (#4764)
In order to keep the `ExternalName` enum small, the `TestcaseName`
struct was limited to 17 bytes: a 1 byte length and a 16 byte buffer.
Due to alignment, that made `ExternalName` 20 bytes.

That fixed-size buffer means that the names of functions in Cranelift
filetests are truncated to fit, which limits our ability to give tests
meaningful names. And I think meaningful names are important in tests.

This patch replaces the inline `TestcaseName` buffer with a
heap-allocated slice. We don't care about performance for test names, so
an indirection out to the heap is fine in that case. But we do care
somewhat about the size of `ExternalName` when it's used during
compiles.

On 64-bit systems, `Box<[u8]>` is 16 bytes, so `TestcaseName` gets one
byte smaller. Unfortunately, its alignment is 8 bytes, so `ExternalName`
grows from 20 to 24 bytes.

According to `valgrind --tool=dhat`, this change has very little effect
on compiler performance. Building wasmtime with `--no-default-features
--release`, and compiling the pulldown-cmark benchmark from Sightglass,
I measured these differences between `main` and this patch:

- total number of allocations didn't change (`ExternalName::TestCase` is
  not used in normal compiles)
- 592 more bytes allocated over the process lifetime, out of 171.5MiB
- 320 more bytes allocated at peak heap size, out of 12MiB
- 0.24% more instructions executed
- 16,987 more bytes written
- 12,120 _fewer_ bytes read
2022-08-23 21:17:30 -07:00

190 lines
3.5 KiB
Plaintext

test compile precise-output
set unwind_info=false
target aarch64
function %foo() {
block0:
return
}
; block0:
; ret
function %stack_limit_leaf_zero(i64 stack_limit) {
block0(v0: i64):
return
}
; block0:
; ret
function %stack_limit_gv_leaf_zero(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i64 notrap aligned gv1+4
stack_limit = gv2
block0(v0: i64):
return
}
; block0:
; ret
function %stack_limit_call_zero(i64 stack_limit) {
fn0 = %foo()
block0(v0: i64):
call fn0()
return
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; subs xzr, sp, x0, UXTX
; b.hs 8 ; udf
; block0:
; ldr x2, 8 ; b 12 ; data TestCase(%foo) + 0
; blr x2
; ldp fp, lr, [sp], #16
; ret
function %stack_limit_gv_call_zero(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i64 notrap aligned gv1+4
stack_limit = gv2
fn0 = %foo()
block0(v0: i64):
call fn0()
return
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; block0:
; ldr x2, 8 ; b 12 ; data TestCase(%foo) + 0
; blr x2
; ldp fp, lr, [sp], #16
; ret
function %stack_limit(i64 stack_limit) {
ss0 = explicit_slot 168
block0(v0: i64):
return
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; add x16, x0, #176
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; sub sp, sp, #176
; block0:
; add sp, sp, #176
; ldp fp, lr, [sp], #16
; ret
function %huge_stack_limit(i64 stack_limit) {
ss0 = explicit_slot 400000
block0(v0: i64):
return
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; subs xzr, sp, x0, UXTX
; b.hs 8 ; udf
; movz w17, #6784
; movk w17, #6, LSL #16
; add x16, x0, x17, UXTX
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; movz w16, #6784
; movk w16, #6, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; movz w16, #6784
; movk w16, #6, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
function %limit_preamble(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i64 notrap aligned gv1+4
stack_limit = gv2
ss0 = explicit_slot 20
block0(v0: i64):
return
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; add x16, x16, #32
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; sub sp, sp, #32
; block0:
; add sp, sp, #32
; ldp fp, lr, [sp], #16
; ret
function %limit_preamble_huge(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0
gv2 = load.i64 notrap aligned gv1+4
stack_limit = gv2
ss0 = explicit_slot 400000
block0(v0: i64):
return
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; movz w17, #6784
; movk w17, #6, LSL #16
; add x16, x16, x17, UXTX
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; movz w16, #6784
; movk w16, #6, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; movz w16, #6784
; movk w16, #6, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
function %limit_preamble_huge_offset(i64 vmctx) {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0+400000
stack_limit = gv1
ss0 = explicit_slot 20
block0(v0: i64):
return
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
; add x16, x16, #32
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; sub sp, sp, #32
; block0:
; add sp, sp, #32
; ldp fp, lr, [sp], #16
; ret