x64: improve tests for heap_addr (#4841)

* x64: improve tests for `heap_addr` This change adds Cranelift `compile` tests for the various cases for `heap_addr`. The idea behind this is to more clearly show what the penalties are for dynamically- vs statically-allocated memory as well as turning Spectre mitigations on and off. * Add test case: "right" size memory with Spectre enabled
2022-09-01 13:59:55 -07:00
parent bca4dae8b0
commit ac2d4c4818
2 changed files with 156 additions and 1 deletions
--- a/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
@@ -0,0 +1,85 @@
+test compile precise-output
+set enable_heap_access_spectre_mitigation=false
+target x86_64
+
+;; Calculate a heap address on a dynamically-allocated memory with Spectre
+;; mitigations disabled. This is a 7-instruction sequence with loads, ignoring
+;; intermediate `mov`s.
+function %f(i32, i64 vmctx) -> i64 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    gv2 = load.i64 notrap aligned gv0+8
+    heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32
+
+block0(v0: i32, v1: i64):
+    v2 = heap_addr.i64 heap0, v0, 0x8000
+    return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    %edi, %eax
+;   movq    8(%rsi), %r11
+;   movq    %rax, %rdi
+;   addq    %rdi, $32768, %rdi
+;   jnb ; ud2 heap_oob ;
+;   cmpq    %r11, %rdi
+;   jbe     label1; j label2
+; block1:
+;   addq    %rax, 0(%rsi), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   ud2 heap_oob
+
+;; For a static memory with no Spectre mitigations, we observe a smaller amount
+;; of bounds checking: the offset check (`cmp + jbe + j`) and the offset
+;; calculation (`add`)--4 instructions.
+function %f(i64 vmctx, i32) -> i64 system_v {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32
+
+block0(v0: i64, v1: i32):
+    v10 = heap_addr.i64 heap0, v1, 0
+    return v10
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    %esi, %eax
+;   cmpq    $4096, %rax
+;   jbe     label1; j label2
+; block1:
+;   addq    %rax, 0(%rdi), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   ud2 heap_oob
+
+;; For a static memory with no Spectre mitigations and the "right" size (4GB
+;; memory, 2GB guard regions), Cranelift emits no bounds checking, simply
+;; `add`--a single instruction.
+function %f(i64 vmctx, i32) -> i64 system_v {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
+
+block0(v0: i64, v1: i32):
+    v10 = heap_addr.i64 heap0, v1, 0
+    return v10
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    %esi, %eax
+;   addq    %rax, 0(%rdi), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
--- a/cranelift/filetests/filetests/isa/x64/heap.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap.clif
@@ -1,6 +1,23 @@
 test compile precise-output
 target x86_64

+;; Calculate a heap address on a dynamically-allocated memory. Because the
+;; Spectre mitigations are on by default (i.e.,
+;; `set enable_heap_access_spectre_mitigation=true`), this code not only does
+;; the dynamic bounds check (`add + jnb + cmp + jbe + j`) but also re-compares
+;; the address to the upper bound (`add + xor + cmp + cmov`)--Cranelift's
+;; Spectre mitigation. With loads and ignoring intermediate `mov`s, this amounts
+;; to a 10-instruction sequence.
+;;
+;; And it uses quite a few registers; see this breakdown of what each register
+;; generally contains:
+;; - %rax holds the passed-in heap offset (argument #1) and ends up holding the
+;;   final address
+;; - %rcx also holds the passed-in heap offset; checked for overflow when added
+;;   to the `0x8000` immediate
+;; - %rsi holds the VM context pointer (argument #2)
+;; - %rdi holds the heap limit (computed from argument #2)
+;; - %rdx holds the null pointer
 function %f(i32, i64 vmctx) -> i64 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
@@ -8,7 +25,6 @@ function %f(i32, i64 vmctx) -> i64 {
    heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32

 block0(v0: i32, v1: i64):
-
    v2 = heap_addr.i64 heap0, v0, 0x8000
    return v2
 }
@@ -34,3 +50,57 @@ block0(v0: i32, v1: i64):
 ; block2:
 ;   ud2 heap_oob

+;; The heap address calculation for this statically-allocated memory checks that
+;; the passed offset (%r11) is within bounds (`cmp + jbe + j`) and then includes
+;; the same Spectre mitigation as above. This results in a 7-instruction
+;; sequence (ignoring `mov`s).
+function %f(i64 vmctx, i32) -> i64 system_v {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32
+
+block0(v0: i64, v1: i32):
+    v10 = heap_addr.i64 heap0, v1, 0
+    return v10
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    %esi, %r11d
+;   cmpq    $4096, %r11
+;   jbe     label1; j label2
+; block1:
+;   movq    %r11, %rax
+;   addq    %rax, 0(%rdi), %rax
+;   xorq    %rsi, %rsi, %rsi
+;   cmpq    $4096, %r11
+;   cmovnbeq %rsi, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   ud2 heap_oob
+
+;; When a static memory is the "right" size (4GB memory, 2GB guard regions), the
+;; Spectre mitigation is not present. Cranelift relies on the memory permissions
+;; and emits no bounds checking, simply `add`--a single instruction.
+function %f(i64 vmctx, i32) -> i64 system_v {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
+
+block0(v0: i64, v1: i32):
+    v10 = heap_addr.i64 heap0, v1, 0
+    return v10
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    %esi, %eax
+;   addq    %rax, 0(%rdi), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+