In order to keep the `ExternalName` enum small, the `TestcaseName` struct was limited to 17 bytes: a 1 byte length and a 16 byte buffer. Due to alignment, that made `ExternalName` 20 bytes. That fixed-size buffer means that the names of functions in Cranelift filetests are truncated to fit, which limits our ability to give tests meaningful names. And I think meaningful names are important in tests. This patch replaces the inline `TestcaseName` buffer with a heap-allocated slice. We don't care about performance for test names, so an indirection out to the heap is fine in that case. But we do care somewhat about the size of `ExternalName` when it's used during compiles. On 64-bit systems, `Box<[u8]>` is 16 bytes, so `TestcaseName` gets one byte smaller. Unfortunately, its alignment is 8 bytes, so `ExternalName` grows from 20 to 24 bytes. According to `valgrind --tool=dhat`, this change has very little effect on compiler performance. Building wasmtime with `--no-default-features --release`, and compiling the pulldown-cmark benchmark from Sightglass, I measured these differences between `main` and this patch: - total number of allocations didn't change (`ExternalName::TestCase` is not used in normal compiles) - 592 more bytes allocated over the process lifetime, out of 171.5MiB - 320 more bytes allocated at peak heap size, out of 12MiB - 0.24% more instructions executed - 16,987 more bytes written - 12,120 _fewer_ bytes read
190 lines
3.5 KiB
Plaintext
190 lines
3.5 KiB
Plaintext
test compile precise-output
|
|
set unwind_info=false
|
|
target aarch64
|
|
|
|
function %foo() {
|
|
block0:
|
|
return
|
|
}
|
|
|
|
; block0:
|
|
; ret
|
|
|
|
function %stack_limit_leaf_zero(i64 stack_limit) {
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; block0:
|
|
; ret
|
|
|
|
function %stack_limit_gv_leaf_zero(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; block0:
|
|
; ret
|
|
|
|
function %stack_limit_call_zero(i64 stack_limit) {
|
|
fn0 = %foo()
|
|
block0(v0: i64):
|
|
call fn0()
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; subs xzr, sp, x0, UXTX
|
|
; b.hs 8 ; udf
|
|
; block0:
|
|
; ldr x2, 8 ; b 12 ; data TestCase(%foo) + 0
|
|
; blr x2
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %stack_limit_gv_call_zero(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
fn0 = %foo()
|
|
block0(v0: i64):
|
|
call fn0()
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; ldr x16, [x0]
|
|
; ldr x16, [x16, #4]
|
|
; subs xzr, sp, x16, UXTX
|
|
; b.hs 8 ; udf
|
|
; block0:
|
|
; ldr x2, 8 ; b 12 ; data TestCase(%foo) + 0
|
|
; blr x2
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %stack_limit(i64 stack_limit) {
|
|
ss0 = explicit_slot 168
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; add x16, x0, #176
|
|
; subs xzr, sp, x16, UXTX
|
|
; b.hs 8 ; udf
|
|
; sub sp, sp, #176
|
|
; block0:
|
|
; add sp, sp, #176
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %huge_stack_limit(i64 stack_limit) {
|
|
ss0 = explicit_slot 400000
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; subs xzr, sp, x0, UXTX
|
|
; b.hs 8 ; udf
|
|
; movz w17, #6784
|
|
; movk w17, #6, LSL #16
|
|
; add x16, x0, x17, UXTX
|
|
; subs xzr, sp, x16, UXTX
|
|
; b.hs 8 ; udf
|
|
; movz w16, #6784
|
|
; movk w16, #6, LSL #16
|
|
; sub sp, sp, x16, UXTX
|
|
; block0:
|
|
; movz w16, #6784
|
|
; movk w16, #6, LSL #16
|
|
; add sp, sp, x16, UXTX
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %limit_preamble(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
ss0 = explicit_slot 20
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; ldr x16, [x0]
|
|
; ldr x16, [x16, #4]
|
|
; add x16, x16, #32
|
|
; subs xzr, sp, x16, UXTX
|
|
; b.hs 8 ; udf
|
|
; sub sp, sp, #32
|
|
; block0:
|
|
; add sp, sp, #32
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %limit_preamble_huge(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
ss0 = explicit_slot 400000
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; ldr x16, [x0]
|
|
; ldr x16, [x16, #4]
|
|
; subs xzr, sp, x16, UXTX
|
|
; b.hs 8 ; udf
|
|
; movz w17, #6784
|
|
; movk w17, #6, LSL #16
|
|
; add x16, x16, x17, UXTX
|
|
; subs xzr, sp, x16, UXTX
|
|
; b.hs 8 ; udf
|
|
; movz w16, #6784
|
|
; movk w16, #6, LSL #16
|
|
; sub sp, sp, x16, UXTX
|
|
; block0:
|
|
; movz w16, #6784
|
|
; movk w16, #6, LSL #16
|
|
; add sp, sp, x16, UXTX
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %limit_preamble_huge_offset(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0+400000
|
|
stack_limit = gv1
|
|
ss0 = explicit_slot 20
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
|
|
; add x16, x16, #32
|
|
; subs xzr, sp, x16, UXTX
|
|
; b.hs 8 ; udf
|
|
; sub sp, sp, #32
|
|
; block0:
|
|
; add sp, sp, #32
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|