In order to keep the `ExternalName` enum small, the `TestcaseName` struct was limited to 17 bytes: a 1 byte length and a 16 byte buffer. Due to alignment, that made `ExternalName` 20 bytes. That fixed-size buffer means that the names of functions in Cranelift filetests are truncated to fit, which limits our ability to give tests meaningful names. And I think meaningful names are important in tests. This patch replaces the inline `TestcaseName` buffer with a heap-allocated slice. We don't care about performance for test names, so an indirection out to the heap is fine in that case. But we do care somewhat about the size of `ExternalName` when it's used during compiles. On 64-bit systems, `Box<[u8]>` is 16 bytes, so `TestcaseName` gets one byte smaller. Unfortunately, its alignment is 8 bytes, so `ExternalName` grows from 20 to 24 bytes. According to `valgrind --tool=dhat`, this change has very little effect on compiler performance. Building wasmtime with `--no-default-features --release`, and compiling the pulldown-cmark benchmark from Sightglass, I measured these differences between `main` and this patch: - total number of allocations didn't change (`ExternalName::TestCase` is not used in normal compiles) - 592 more bytes allocated over the process lifetime, out of 171.5MiB - 320 more bytes allocated at peak heap size, out of 12MiB - 0.24% more instructions executed - 16,987 more bytes written - 12,120 _fewer_ bytes read
104 lines
2.2 KiB
Plaintext
104 lines
2.2 KiB
Plaintext
test interpret
|
|
test run
|
|
target x86_64
|
|
target s390x
|
|
target aarch64
|
|
|
|
function %stack_simple(i64) -> i64 {
|
|
ss0 = explicit_slot 8
|
|
|
|
block0(v0: i64):
|
|
stack_store.i64 v0, ss0
|
|
v1 = stack_load.i64 ss0
|
|
return v1
|
|
}
|
|
; run: %stack_simple(0) == 0
|
|
; run: %stack_simple(1) == 1
|
|
; run: %stack_simple(-1) == -1
|
|
|
|
|
|
function %stack_offset(i64) -> i64 {
|
|
ss0 = explicit_slot 16
|
|
|
|
block0(v0: i64):
|
|
stack_store.i64 v0, ss0+8
|
|
v1 = stack_load.i64 ss0+8
|
|
return v1
|
|
}
|
|
; run: %stack_offset(0) == 0
|
|
; run: %stack_offset(1) == 1
|
|
; run: %stack_offset(-1) == -1
|
|
|
|
|
|
function %offset_unaligned(i64) -> i64 {
|
|
ss0 = explicit_slot 11
|
|
|
|
block0(v0: i64):
|
|
stack_store.i64 v0, ss0+3
|
|
v1 = stack_load.i64 ss0+3
|
|
return v1
|
|
}
|
|
; run: %offset_unaligned(0) == 0
|
|
; run: %offset_unaligned(1) == 1
|
|
; run: %offset_unaligned(-1) == -1
|
|
|
|
|
|
|
|
function %multi_slot_stack(i64, i64) -> i64 {
|
|
ss0 = explicit_slot 8
|
|
ss1 = explicit_slot 8
|
|
|
|
block0(v0: i64, v1: i64):
|
|
stack_store.i64 v0, ss0
|
|
stack_store.i64 v1, ss1
|
|
v2 = stack_load.i64 ss0
|
|
v3 = stack_load.i64 ss1
|
|
v4 = iadd.i64 v2, v3
|
|
return v4
|
|
}
|
|
; run: %multi_slot_stack(0, 1) == 1
|
|
; run: %multi_slot_stack(1, 2) == 3
|
|
|
|
|
|
|
|
function %multi_slot_out_of_bounds_writes(i8, i64) -> i8, i64 {
|
|
ss0 = explicit_slot 1
|
|
ss1 = explicit_slot 8
|
|
|
|
block0(v0: i8, v1: i64):
|
|
stack_store.i8 v0, ss0
|
|
stack_store.i64 v1, ss1
|
|
v2 = stack_load.i8 ss0
|
|
v3 = stack_load.i64 ss1
|
|
return v2, v3
|
|
}
|
|
; run: %multi_slot_out_of_bounds_writes(10, 1) == [10, 1]
|
|
; run: %multi_slot_out_of_bounds_writes(0, 2) == [0, 2]
|
|
|
|
|
|
function %multi_slot_offset_writes(i8, i64) -> i8, i64 {
|
|
ss0 = explicit_slot 8
|
|
ss1 = explicit_slot 8
|
|
|
|
block0(v0: i8, v1: i64):
|
|
stack_store.i8 v0, ss0
|
|
stack_store.i64 v1, ss1
|
|
v2 = stack_load.i8 ss0
|
|
v3 = stack_load.i64 ss1
|
|
return v2, v3
|
|
}
|
|
; run: %multi_slot_offset_writes(0, 1) == [0, 1]
|
|
; run: %multi_slot_offset_writes(1, 2) == [1, 2]
|
|
|
|
function %huge_slots(i64) -> i64 {
|
|
ss0 = explicit_slot 1048576 ; 1MB Slot
|
|
|
|
block0(v0: i64):
|
|
stack_store.i64 v0, ss0+1048568 ; Store at 1MB - 8bytes
|
|
v1 = stack_load.i64 ss0+1048568
|
|
return v1
|
|
}
|
|
; run: %huge_slots(0) == 0
|
|
; run: %huge_slots(1) == 1
|
|
; run: %huge_slots(-1) == -1
|