Fix spillslot reload of narrow values: zero-extend, don't sign-extend.
Previously, the x64 backend's ABI code would generate a sign-extending load when loading a less-than-64-bit integer from a spillslot. This is incorrect: e.g., for i32s > 0x80000000, this would result in all high bits set. This interacts poorly with another optimization. Normally, the invariant is that the high bits of a register holding a value of a certain type, beyond that type's bits, are undefined. However, as an optimization, we recognize and use the fact that on x86-64, 32-bit instructions zero the upper 32 bits. This allows us to elide a 32-to-64-bit zero-extend op (turning it into just a move, which can then sometimes disappear entirely due to register coalescing). If a spill and reload happen between the production of a 32-bit value from an instruction known to zero the upper bits and its use, then we will rely on zero upper bits that might actually be set by a sign-extend. This will result in incorrect execution. As a fix, we stick to a simple invariant: we always spill and reload a full 64 bits when handling integer registers on x64. This ensures that no bits are mangled.
This commit is contained in:
@@ -317,19 +317,19 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
}
|
||||
|
||||
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
|
||||
let ext_kind = match ty {
|
||||
// For integer-typed values, we always load a full 64 bits (and we always spill a full 64
|
||||
// bits as well -- see `Inst::store()`).
|
||||
let ty = match ty {
|
||||
types::B1
|
||||
| types::B8
|
||||
| types::I8
|
||||
| types::B16
|
||||
| types::I16
|
||||
| types::B32
|
||||
| types::I32 => ExtKind::SignExtend,
|
||||
types::B64 | types::I64 | types::R64 | types::F32 | types::F64 => ExtKind::None,
|
||||
_ if ty.bytes() == 16 => ExtKind::None,
|
||||
_ => panic!("load_stack({})", ty),
|
||||
| types::I32 => types::I64,
|
||||
_ => ty,
|
||||
};
|
||||
Inst::load(ty, mem, into_reg, ext_kind)
|
||||
Inst::load(ty, mem, into_reg, ExtKind::None)
|
||||
}
|
||||
|
||||
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
|
||||
|
||||
@@ -1033,6 +1033,7 @@ impl fmt::Display for Avx512Opcode {
|
||||
/// This defines the ways a value can be extended: either signed- or zero-extension, or none for
|
||||
/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which
|
||||
/// values can be extended.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum ExtKind {
|
||||
None,
|
||||
|
||||
@@ -102,7 +102,7 @@ block0(
|
||||
; nextln: movq %r8, %r11
|
||||
; nextln: movq %r9, %r12
|
||||
; nextln: movq 16(%rbp), %r13
|
||||
; nextln: movslq 24(%rbp), %r14
|
||||
; nextln: movq 24(%rbp), %r14
|
||||
; nextln: movss 32(%rbp), %xmm8
|
||||
; nextln: movsd 40(%rbp), %xmm9
|
||||
; nextln: subq $$144, %rsp
|
||||
@@ -236,7 +236,7 @@ block0:
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: lea 0(%rsp), %rdi
|
||||
; nextln: call *%rsi
|
||||
; nextln: movslq 0(%rsp), %rsi
|
||||
; nextln: movq 0(%rsp), %rsi
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: virtual_sp_offset_adjust -16
|
||||
; nextln: movq %rsi, %rdx
|
||||
@@ -282,7 +282,7 @@ block0:
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: lea 0(%rsp), %rdi
|
||||
; nextln: call *%rsi
|
||||
; nextln: movslq 0(%rsp), %rsi
|
||||
; nextln: movq 0(%rsp), %rsi
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: virtual_sp_offset_adjust -16
|
||||
; nextln: movq %rdx, 0(%r12)
|
||||
|
||||
38
cranelift/filetests/filetests/isa/x64/spill-reload.clif
Normal file
38
cranelift/filetests/filetests/isa/x64/spill-reload.clif
Normal file
@@ -0,0 +1,38 @@
|
||||
test run
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
function %f(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i64 {
|
||||
block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32, v9: i32, v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32):
|
||||
v20 = iadd.i32 v0, v1
|
||||
v21 = iadd.i32 v2, v3
|
||||
v22 = iadd.i32 v4, v5
|
||||
v23 = iadd.i32 v6, v7
|
||||
v24 = iadd.i32 v8, v9
|
||||
v25 = iadd.i32 v10, v11
|
||||
v26 = iadd.i32 v12, v13
|
||||
v27 = iadd.i32 v14, v15
|
||||
v28 = iadd.i32 v16, v17
|
||||
v29 = iadd.i32 v18, v19
|
||||
|
||||
v30 = iadd.i32 v20, v21
|
||||
v31 = iadd.i32 v22, v23
|
||||
v32 = iadd.i32 v24, v25
|
||||
v33 = iadd.i32 v26, v27
|
||||
v34 = iadd.i32 v28, v29
|
||||
|
||||
v35 = iadd.i32 v30, v31
|
||||
v36 = iadd.i32 v32, v33
|
||||
v37 = iadd.i32 v35, v34
|
||||
v38 = iadd.i32 v36, v37
|
||||
;; v38 should be zero (due to wrapping).
|
||||
|
||||
v39 = iconst.i64 1
|
||||
v40 = uextend.i64 v0 ;; should be reloaded from a spillslot
|
||||
v41 = uextend.i64 v38
|
||||
v42 = iadd.i64 v39, v40
|
||||
v43 = iadd.i64 v42, v41
|
||||
return v43
|
||||
}
|
||||
|
||||
; run: %f(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000) == 0x80000001
|
||||
Reference in New Issue
Block a user