Fix spillslot reload of narrow values: zero-extend, don't sign-extend.

Previously, the x64 backend's ABI code would generate a sign-extending
load when loading a less-than-64-bit integer from a spillslot. This is
incorrect: e.g., for i32s > 0x80000000, this would result in all high
bits set.

This interacts poorly with another optimization. Normally, the invariant
is that the high bits of a register holding a value of a certain type,
beyond that type's bits, are undefined. However, as an optimization, we
recognize and use the fact that on x86-64, 32-bit instructions zero the
upper 32 bits. This allows us to elide a 32-to-64-bit zero-extend op
(turning it into just a move, which can then sometimes disappear
entirely due to register coalescing).

If a spill and reload happen between the production of a 32-bit value
from an instruction known to zero the upper bits and its use, then we
will rely on zero upper bits that might actually be set by a
sign-extend. This will result in incorrect execution.

As a fix, we stick to a simple invariant: we always spill and reload a
full 64 bits when handling integer registers on x64. This ensures that
no bits are mangled.
This commit is contained in:
Chris Fallin
2021-04-21 14:27:30 -07:00
parent 3b3b126fe2
commit a1c9b06cea
4 changed files with 48 additions and 9 deletions

View File

@@ -317,19 +317,19 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
let ext_kind = match ty {
// For integer-typed values, we always load a full 64 bits (and we always spill a full 64
// bits as well -- see `Inst::store()`).
let ty = match ty {
types::B1
| types::B8
| types::I8
| types::B16
| types::I16
| types::B32
| types::I32 => ExtKind::SignExtend,
types::B64 | types::I64 | types::R64 | types::F32 | types::F64 => ExtKind::None,
_ if ty.bytes() == 16 => ExtKind::None,
_ => panic!("load_stack({})", ty),
| types::I32 => types::I64,
_ => ty,
};
Inst::load(ty, mem, into_reg, ext_kind)
Inst::load(ty, mem, into_reg, ExtKind::None)
}
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {

View File

@@ -1033,6 +1033,7 @@ impl fmt::Display for Avx512Opcode {
/// This defines the ways a value can be extended: either signed- or zero-extension, or none for
/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which
/// values can be extended.
#[allow(dead_code)]
#[derive(Clone, PartialEq)]
pub enum ExtKind {
None,

View File

@@ -102,7 +102,7 @@ block0(
; nextln: movq %r8, %r11
; nextln: movq %r9, %r12
; nextln: movq 16(%rbp), %r13
; nextln: movslq 24(%rbp), %r14
; nextln: movq 24(%rbp), %r14
; nextln: movss 32(%rbp), %xmm8
; nextln: movsd 40(%rbp), %xmm9
; nextln: subq $$144, %rsp
@@ -236,7 +236,7 @@ block0:
; nextln: virtual_sp_offset_adjust 16
; nextln: lea 0(%rsp), %rdi
; nextln: call *%rsi
; nextln: movslq 0(%rsp), %rsi
; nextln: movq 0(%rsp), %rsi
; nextln: addq $$16, %rsp
; nextln: virtual_sp_offset_adjust -16
; nextln: movq %rsi, %rdx
@@ -282,7 +282,7 @@ block0:
; nextln: virtual_sp_offset_adjust 16
; nextln: lea 0(%rsp), %rdi
; nextln: call *%rsi
; nextln: movslq 0(%rsp), %rsi
; nextln: movq 0(%rsp), %rsi
; nextln: addq $$16, %rsp
; nextln: virtual_sp_offset_adjust -16
; nextln: movq %rdx, 0(%r12)

View File

@@ -0,0 +1,38 @@
test run
target x86_64
feature "experimental_x64"
function %f(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i64 {
block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32, v9: i32, v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32):
v20 = iadd.i32 v0, v1
v21 = iadd.i32 v2, v3
v22 = iadd.i32 v4, v5
v23 = iadd.i32 v6, v7
v24 = iadd.i32 v8, v9
v25 = iadd.i32 v10, v11
v26 = iadd.i32 v12, v13
v27 = iadd.i32 v14, v15
v28 = iadd.i32 v16, v17
v29 = iadd.i32 v18, v19
v30 = iadd.i32 v20, v21
v31 = iadd.i32 v22, v23
v32 = iadd.i32 v24, v25
v33 = iadd.i32 v26, v27
v34 = iadd.i32 v28, v29
v35 = iadd.i32 v30, v31
v36 = iadd.i32 v32, v33
v37 = iadd.i32 v35, v34
v38 = iadd.i32 v36, v37
;; v38 should be zero (due to wrapping).
v39 = iconst.i64 1
v40 = uextend.i64 v0 ;; should be reloaded from a spillslot
v41 = uextend.i64 v38
v42 = iadd.i64 v39, v40
v43 = iadd.i64 v42, v41
return v43
}
; run: %f(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000) == 0x80000001