A branch is considered side-effecting and so updates the instruction color (which is our way of computing how far instructions can sink). However, in the lowering loop, we did not update current instruction color when scanning backward across branches, which are side-effecting. As a result, the color was stale and fewer load-op merges were permitted than are actually possible. Note that this would not have resulted in any correctness issues, as the stale color is too high (so no merges are permitted that should have been disallowed). Fixes #2562.
73 lines
1.6 KiB
Plaintext
73 lines
1.6 KiB
Plaintext
test compile
|
|
target x86_64
|
|
feature "experimental_x64"
|
|
|
|
function %add_from_mem_u32_1(i64, i32) -> i32 {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = load.i32 v0
|
|
v3 = iadd.i32 v2, v1
|
|
; check: addl 0(%rdi), %esi
|
|
return v3
|
|
}
|
|
|
|
function %add_from_mem_u32_2(i64, i32) -> i32 {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = load.i32 v0
|
|
v3 = iadd.i32 v1, v2
|
|
; check: addl 0(%rdi), %esi
|
|
return v3
|
|
}
|
|
|
|
function %add_from_mem_u64_1(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = load.i64 v0
|
|
v3 = iadd.i64 v2, v1
|
|
; check: addq 0(%rdi), %rsi
|
|
return v3
|
|
}
|
|
|
|
function %add_from_mem_u64_2(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = load.i64 v0
|
|
v3 = iadd.i64 v1, v2
|
|
; check: addq 0(%rdi), %rsi
|
|
return v3
|
|
}
|
|
|
|
; test narrow loads: 8-bit load should not merge because the `addl` is 32 bits
|
|
; and would load 32 bits from memory, which may go beyond the end of the heap.
|
|
function %add_from_mem_not_narrow(i64, i8) -> i8 {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = load.i8 v0
|
|
v3 = iadd.i8 v2, v1
|
|
; check: movzbq 0(%rdi), %rdi
|
|
; nextln: addl %esi, %edi
|
|
return v3
|
|
}
|
|
|
|
function %no_merge_if_lookback_use(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = load.i64 v0
|
|
v3 = iadd.i64 v2, v0
|
|
store.i64 v3, v1
|
|
v4 = load.i64 v3
|
|
return v4
|
|
; check: movq 0(%rdi), %rax
|
|
; nextln: movq %rax, %rcx
|
|
; nextln: addq %rdi, %rcx
|
|
; nextln: movq %rcx, 0(%rsi)
|
|
; nextln: movq 0(%rax,%rdi,1), %rsi
|
|
; nextln: movq %rsi, %rax
|
|
}
|
|
|
|
function %merge_scalar_to_vector(i64) -> i32x4 {
|
|
block0(v0: i64):
|
|
v1 = load.i32 v0
|
|
v2 = scalar_to_vector.i32x4 v1
|
|
; check: movss 0(%rdi), %xmm0
|
|
|
|
jump block1
|
|
block1:
|
|
return v2
|
|
}
|