Files
wasmtime/cranelift/filetests/filetests/isa/x64/load-op.clif
Chris Fallin b4426be072 machinst lowering: update inst color when scanning across branch to allow more load-op merging.
A branch is considered side-effecting and so updates the instruction
color (which is our way of computing how far instructions can sink).
However, in the lowering loop, we did not update current instruction
color when scanning backward across branches, which are side-effecting.
As a result, the color was stale and fewer load-op merges were permitted
than are actually possible.

Note that this would not have resulted in any correctness issues, as the
stale color is too high (so no merges are permitted that should have
been disallowed).

Fixes #2562.
2021-01-11 11:20:44 -08:00

73 lines
1.6 KiB
Plaintext

test compile
target x86_64
feature "experimental_x64"
function %add_from_mem_u32_1(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = load.i32 v0
v3 = iadd.i32 v2, v1
; check: addl 0(%rdi), %esi
return v3
}
function %add_from_mem_u32_2(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = load.i32 v0
v3 = iadd.i32 v1, v2
; check: addl 0(%rdi), %esi
return v3
}
function %add_from_mem_u64_1(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v0
v3 = iadd.i64 v2, v1
; check: addq 0(%rdi), %rsi
return v3
}
function %add_from_mem_u64_2(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v0
v3 = iadd.i64 v1, v2
; check: addq 0(%rdi), %rsi
return v3
}
; test narrow loads: 8-bit load should not merge because the `addl` is 32 bits
; and would load 32 bits from memory, which may go beyond the end of the heap.
function %add_from_mem_not_narrow(i64, i8) -> i8 {
block0(v0: i64, v1: i8):
v2 = load.i8 v0
v3 = iadd.i8 v2, v1
; check: movzbq 0(%rdi), %rdi
; nextln: addl %esi, %edi
return v3
}
function %no_merge_if_lookback_use(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v0
v3 = iadd.i64 v2, v0
store.i64 v3, v1
v4 = load.i64 v3
return v4
; check: movq 0(%rdi), %rax
; nextln: movq %rax, %rcx
; nextln: addq %rdi, %rcx
; nextln: movq %rcx, 0(%rsi)
; nextln: movq 0(%rax,%rdi,1), %rsi
; nextln: movq %rsi, %rax
}
function %merge_scalar_to_vector(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32 v0
v2 = scalar_to_vector.i32x4 v1
; check: movss 0(%rdi), %xmm0
jump block1
block1:
return v2
}