Fix spillslot size bug in SIMD by removing type-dependent spillslot allocation.

This patch makes spillslot allocation, spilling and reloading all based on register class only. Hence when we have a 32- or 64-bit value in a 128-bit XMM register on x86-64 or vector register on aarch64, this results in larger spillslots and spills/restores. Why make this change, if it results in less efficient stack-frame usage? Simply put, it is safer: there is always a risk when allocating spillslots or spilling/reloading that we get the wrong type and make the spillslot or the store/load too small. This was one contributing factor to CVE-2021-32629, and is now the source of a fuzzbug in SIMD code that puns an arbitrary user-controlled vector constant over another stackslot. (If this were a pointer, that could result in RCE. SIMD is not yet on by default in a release, fortunately. In particular, we have not been particularly careful about using moves between values of different types, for example with `raw_bitcast` or with certain SIMD operations, and such moves indicate to regalloc.rs that vregs are in equivalence classes and some arbitrary vreg in the class is provided when allocating the spillslot or spilling/reloading. Since regalloc.rs does not track actual type, and since we haven't been careful about moves, we can't really trust this "arbitrary vreg in equivalence class" to provide accurate type information. In the fix to CVE-2021-32629 we fixed this for integer registers by always spilling/reloading 64 bits; this fix can be seen as the analogous change for FP/vector regs.
2021-12-30 12:43:00 -08:00
parent d2d0396f40
commit 833ebeed76
10 changed files with 100 additions and 138 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/call.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/call.clif
@@ -133,28 +133,28 @@ block0:

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
-; nextln:  sub sp, sp, #32
+; nextln:  sub sp, sp, #48
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  str s0, [sp]
+; nextln:  str q0, [sp]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  str d0, [sp, #8]
+; nextln:  str q0, [sp, #16]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  str d0, [sp, #16]
+; nextln:  str q0, [sp, #32]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  ldr s0, [sp]
+; nextln:  ldr q0, [sp]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  ldr d0, [sp, #8]
+; nextln:  ldr q0, [sp, #16]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  ldr d0, [sp, #16]
+; nextln:  ldr q0, [sp, #32]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  add sp, sp, #32
+; nextln:  add sp, sp, #48
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

@@ -223,28 +223,28 @@ block0:

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
-; nextln:  sub sp, sp, #32
+; nextln:  sub sp, sp, #48
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  str s0, [sp]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str d0, [sp, #8]
+; nextln:  str q0, [sp]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
 ; nextln:  str q0, [sp, #16]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  ldr s0, [sp]
+; nextln:  str q0, [sp, #32]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  ldr d0, [sp, #8]
+; nextln:  ldr q0, [sp]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
 ; nextln:  ldr q0, [sp, #16]
 ; nextln:  ldr x0, 8 ; b 12 ; data
 ; nextln:  blr x0
-; nextln:  add sp, sp, #32
+; nextln:  ldr q0, [sp, #32]
+; nextln:  ldr x0, 8 ; b 12 ; data
+; nextln:  blr x0
+; nextln:  add sp, sp, #48
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret

--- a/cranelift/filetests/filetests/isa/x64/fastcall.clif
+++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif
@@ -238,34 +238,34 @@ block0(v0: i64):
 ; nextln: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
 ; nextln: movq    %rsp, %rbp
 ; nextln: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 }
-; nextln: subq    $$192, %rsp
-; nextln: movdqu  %xmm6, 32(%rsp)
+; nextln: subq    $$224, %rsp
+; nextln: movdqu  %xmm6, 64(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 0, reg: r6V }
-; nextln: movdqu  %xmm7, 48(%rsp)
+; nextln: movdqu  %xmm7, 80(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 16, reg: r7V }
-; nextln: movdqu  %xmm8, 64(%rsp)
+; nextln: movdqu  %xmm8, 96(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 32, reg: r8V }
-; nextln: movdqu  %xmm9, 80(%rsp)
+; nextln: movdqu  %xmm9, 112(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 48, reg: r9V }
-; nextln: movdqu  %xmm10, 96(%rsp)
+; nextln: movdqu  %xmm10, 128(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 64, reg: r10V }
-; nextln: movdqu  %xmm11, 112(%rsp)
+; nextln: movdqu  %xmm11, 144(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 80, reg: r11V }
-; nextln: movdqu  %xmm12, 128(%rsp)
+; nextln: movdqu  %xmm12, 160(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 96, reg: r12V }
-; nextln: movdqu  %xmm13, 144(%rsp)
+; nextln: movdqu  %xmm13, 176(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 112, reg: r13V }
-; nextln: movdqu  %xmm14, 160(%rsp)
+; nextln: movdqu  %xmm14, 192(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 128, reg: r14V }
-; nextln: movdqu  %xmm15, 176(%rsp)
+; nextln: movdqu  %xmm15, 208(%rsp)
 ; nextln: unwind SaveReg { clobber_offset: 144, reg: r15V }
 ; nextln: movsd   0(%rcx), %xmm4
 ; nextln: movsd   8(%rcx), %xmm1
 ; nextln: movsd   16(%rcx), %xmm0
-; nextln: movsd   %xmm0, rsp(16 + virtual offset)
+; nextln: movdqu  %xmm0, rsp(32 + virtual offset)
 ; nextln: movsd   24(%rcx), %xmm3
 ; nextln: movsd   32(%rcx), %xmm0
-; nextln: movsd   %xmm0, rsp(24 + virtual offset)
+; nextln: movdqu  %xmm0, rsp(48 + virtual offset)
 ; nextln: movsd   40(%rcx), %xmm5
 ; nextln: movsd   48(%rcx), %xmm6
 ; nextln: movsd   56(%rcx), %xmm7
@@ -278,24 +278,24 @@ block0(v0: i64):
 ; nextln: movsd   112(%rcx), %xmm14
 ; nextln: movsd   120(%rcx), %xmm15
 ; nextln: movsd   128(%rcx), %xmm0
-; nextln: movsd   %xmm0, rsp(0 + virtual offset)
+; nextln: movdqu  %xmm0, rsp(0 + virtual offset)
 ; nextln: movsd   136(%rcx), %xmm0
 ; nextln: movsd   144(%rcx), %xmm2
-; nextln: movsd   %xmm2, rsp(8 + virtual offset)
+; nextln: movdqu  %xmm2, rsp(16 + virtual offset)
 ; nextln: movsd   152(%rcx), %xmm2
 ; nextln: addsd   %xmm1, %xmm4
-; nextln: movsd   rsp(16 + virtual offset), %xmm1
+; nextln: movdqu  rsp(32 + virtual offset), %xmm1
 ; nextln: addsd   %xmm3, %xmm1
-; nextln: movsd   rsp(24 + virtual offset), %xmm3
+; nextln: movdqu  rsp(48 + virtual offset), %xmm3
 ; nextln: addsd   %xmm5, %xmm3
 ; nextln: addsd   %xmm7, %xmm6
 ; nextln: addsd   %xmm9, %xmm8
 ; nextln: addsd   %xmm11, %xmm10
 ; nextln: addsd   %xmm13, %xmm12
 ; nextln: addsd   %xmm15, %xmm14
-; nextln: movsd   rsp(0 + virtual offset), %xmm5
+; nextln: movdqu  rsp(0 + virtual offset), %xmm5
 ; nextln: addsd   %xmm0, %xmm5
-; nextln: movsd   rsp(8 + virtual offset), %xmm0
+; nextln: movdqu  rsp(16 + virtual offset), %xmm0
 ; nextln: addsd   %xmm2, %xmm0
 ; nextln: addsd   %xmm1, %xmm4
 ; nextln: addsd   %xmm6, %xmm3
@@ -307,17 +307,17 @@ block0(v0: i64):
 ; nextln: addsd   %xmm8, %xmm4
 ; nextln: addsd   %xmm5, %xmm4
 ; nextln: movaps  %xmm4, %xmm0
-; nextln: movdqu  32(%rsp), %xmm6
-; nextln: movdqu  48(%rsp), %xmm7
-; nextln: movdqu  64(%rsp), %xmm8
-; nextln: movdqu  80(%rsp), %xmm9
-; nextln: movdqu  96(%rsp), %xmm10
-; nextln: movdqu  112(%rsp), %xmm11
-; nextln: movdqu  128(%rsp), %xmm12
-; nextln: movdqu  144(%rsp), %xmm13
-; nextln: movdqu  160(%rsp), %xmm14
-; nextln: movdqu  176(%rsp), %xmm15
-; nextln: addq    $$192, %rsp
+; nextln: movdqu  64(%rsp), %xmm6
+; nextln: movdqu  80(%rsp), %xmm7
+; nextln: movdqu  96(%rsp), %xmm8
+; nextln: movdqu  112(%rsp), %xmm9
+; nextln: movdqu  128(%rsp), %xmm10
+; nextln: movdqu  144(%rsp), %xmm11
+; nextln: movdqu  160(%rsp), %xmm12
+; nextln: movdqu  176(%rsp), %xmm13
+; nextln: movdqu  192(%rsp), %xmm14
+; nextln: movdqu  208(%rsp), %xmm15
+; nextln: addq    $$224, %rsp
 ; nextln: movq    %rbp, %rsp
 ; nextln: popq    %rbp
 ; nextln: ret