x64 lowering fix: i32.popcnt should not merge load and make it 64-bit.
As a subtle consequence of the recent load-op fusion, popcnt of a value that came from a load.i32 was compiling into a 64-bit load. This is a result of the way in which x86 infers the width of loads: it is a consequence of the instruction containing the memory reference, not the memory reference itself. So the `input_to_reg_mem()` helper (convert an instruction input into a register or memory reference) was providing the appropriate memory reference for the result of a load.i32, but never encoded the assumption that it would only be used in a 32-bit instruction. It turns out that popcnt.i32 uses a 64-bit instruction to load this RM op, hence widening a 32-bit to 64-bit load (which is problematic when the offset is (memory_length - 4)). Separately, popcnt was using the RM operand twice, resulting in two loads if we merged a load. This isn't a correctness bug in practice because only a racy sequence (store interleaving between the loads) would produce incorrect results, but we decided earlier to treat loads as effectful for now, neither reordering nor duplicating them, to deliberately reduce complexity. Because of the second issue, the fix is just to force the operand into a register always, so any source load will not be merged. Discovered via fuzzing with oss-fuzz.
This commit is contained in:
113
cranelift/filetests/filetests/isa/x64/popcnt.clif
Normal file
113
cranelift/filetests/filetests/isa/x64/popcnt.clif
Normal file
@@ -0,0 +1,113 @@
|
||||
test compile
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
; TODO: test with popcnt feature available too, once new backend supports that.
|
||||
|
||||
function %popcnt64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = popcnt v0
|
||||
; check: movq %rdi, %rsi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: movabsq $$8608480567731124087, %rax
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrq $$4, %rsi
|
||||
; nextln: addq %rdi, %rsi
|
||||
; nextln: movabsq $$1085102592571150095, %rdi
|
||||
; nextln: andq %rdi, %rsi
|
||||
; nextln: movabsq $$72340172838076673, %rdi
|
||||
; nextln: imulq %rdi, %rsi
|
||||
; nextln: shrq $$56, %rsi
|
||||
; nextln: movq %rsi, %rax
|
||||
return v1
|
||||
}
|
||||
|
||||
function %popcnt64load(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = load.i64 v0
|
||||
v2 = popcnt v1
|
||||
return v2
|
||||
; check: movq 0(%rdi), %rdi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: movabsq $$8608480567731124087, %rax
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrq $$4, %rsi
|
||||
; nextln: addq %rdi, %rsi
|
||||
; nextln: movabsq $$1085102592571150095, %rdi
|
||||
; nextln: andq %rdi, %rsi
|
||||
; nextln: movabsq $$72340172838076673, %rdi
|
||||
; nextln: imulq %rdi, %rsi
|
||||
; nextln: shrq $$56, %rsi
|
||||
; nextln: movq %rsi, %rax
|
||||
}
|
||||
|
||||
function %popcnt32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
; check: movq %rdi, %rsi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrl $$4, %esi
|
||||
; nextln: addl %edi, %esi
|
||||
; nextln: andl $$252645135, %esi
|
||||
; nextln: imull $$16843009, %esi
|
||||
; nextln: shrl $$24, %esi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
}
|
||||
|
||||
function %popcnt32load(i64) -> i32 {
|
||||
block0(v0: i64):
|
||||
v1 = load.i32 v0
|
||||
v2 = popcnt v1
|
||||
return v2
|
||||
; check: movl 0(%rdi), %edi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrl $$4, %esi
|
||||
; nextln: addl %edi, %esi
|
||||
; nextln: andl $$252645135, %esi
|
||||
; nextln: imull $$16843009, %esi
|
||||
; nextln: shrl $$24, %esi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
}
|
||||
Reference in New Issue
Block a user