x64: improve tests for heap_addr (#4841)

* x64: improve tests for `heap_addr` This change adds Cranelift `compile` tests for the various cases for `heap_addr`. The idea behind this is to more clearly show what the penalties are for dynamically- vs statically-allocated memory as well as turning Spectre mitigations on and off. * Add test case: "right" size memory with Spectre enabled
2022-09-01 13:59:55 -07:00
parent bca4dae8b0
commit ac2d4c4818
2 changed files with 156 additions and 1 deletions
--- a/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
@@ -0,0 +1,85 @@
 test compile precise-output
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 ;; Calculate a heap address on a dynamically-allocated memory with Spectre
 ;; mitigations disabled. This is a 7-instruction sequence with loads, ignoring
 ;; intermediate `mov`s.
 function %f(i32, i64 vmctx) -> i64 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    gv2 = load.i64 notrap aligned gv0+8
    heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32
 block0(v0: i32, v1: i64):
    v2 = heap_addr.i64 heap0, v0, 0x8000
    return v2
 }
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %edi, %eax
 ;   movq    8(%rsi), %r11
 ;   movq    %rax, %rdi
 ;   addq    %rdi, $32768, %rdi
 ;   jnb ; ud2 heap_oob ;
 ;   cmpq    %r11, %rdi
 ;   jbe     label1; j label2
 ; block1:
 ;   addq    %rax, 0(%rsi), %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
 ; block2:
 ;   ud2 heap_oob
 ;; For a static memory with no Spectre mitigations, we observe a smaller amount
 ;; of bounds checking: the offset check (`cmp + jbe + j`) and the offset
 ;; calculation (`add`)--4 instructions.
 function %f(i64 vmctx, i32) -> i64 system_v {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32
 block0(v0: i64, v1: i32):
    v10 = heap_addr.i64 heap0, v1, 0
    return v10
 }
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %esi, %eax
 ;   cmpq    $4096, %rax
 ;   jbe     label1; j label2
 ; block1:
 ;   addq    %rax, 0(%rdi), %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
 ; block2:
 ;   ud2 heap_oob
 ;; For a static memory with no Spectre mitigations and the "right" size (4GB
 ;; memory, 2GB guard regions), Cranelift emits no bounds checking, simply
 ;; `add`--a single instruction.
 function %f(i64 vmctx, i32) -> i64 system_v {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
 block0(v0: i64, v1: i32):
    v10 = heap_addr.i64 heap0, v1, 0
    return v10
 }
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %esi, %eax
 ;   addq    %rax, 0(%rdi), %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
--- a/cranelift/filetests/filetests/isa/x64/heap.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap.clif
@@ -1,6 +1,23 @@
 test compile precise-output
 target x86_64
 ;; Calculate a heap address on a dynamically-allocated memory. Because the
 ;; Spectre mitigations are on by default (i.e.,
 ;; `set enable_heap_access_spectre_mitigation=true`), this code not only does
 ;; the dynamic bounds check (`add + jnb + cmp + jbe + j`) but also re-compares
 ;; the address to the upper bound (`add + xor + cmp + cmov`)--Cranelift's
 ;; Spectre mitigation. With loads and ignoring intermediate `mov`s, this amounts
 ;; to a 10-instruction sequence.
 ;;
 ;; And it uses quite a few registers; see this breakdown of what each register
 ;; generally contains:
 ;; - %rax holds the passed-in heap offset (argument #1) and ends up holding the
 ;;   final address
 ;; - %rcx also holds the passed-in heap offset; checked for overflow when added
 ;;   to the `0x8000` immediate
 ;; - %rsi holds the VM context pointer (argument #2)
 ;; - %rdi holds the heap limit (computed from argument #2)
 ;; - %rdx holds the null pointer
 function %f(i32, i64 vmctx) -> i64 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
@@ -8,7 +25,6 @@ function %f(i32, i64 vmctx) -> i64 {
    heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32
 block0(v0: i32, v1: i64):
    v2 = heap_addr.i64 heap0, v0, 0x8000
    return v2
 }
@@ -34,3 +50,57 @@ block0(v0: i32, v1: i64):
 ; block2:
 ;   ud2 heap_oob
 ;; The heap address calculation for this statically-allocated memory checks that
 ;; the passed offset (%r11) is within bounds (`cmp + jbe + j`) and then includes
 ;; the same Spectre mitigation as above. This results in a 7-instruction
 ;; sequence (ignoring `mov`s).
 function %f(i64 vmctx, i32) -> i64 system_v {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32
 block0(v0: i64, v1: i32):
    v10 = heap_addr.i64 heap0, v1, 0
    return v10
 }
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %esi, %r11d
 ;   cmpq    $4096, %r11
 ;   jbe     label1; j label2
 ; block1:
 ;   movq    %r11, %rax
 ;   addq    %rax, 0(%rdi), %rax
 ;   xorq    %rsi, %rsi, %rsi
 ;   cmpq    $4096, %r11
 ;   cmovnbeq %rsi, %rax, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
 ; block2:
 ;   ud2 heap_oob
 ;; When a static memory is the "right" size (4GB memory, 2GB guard regions), the
 ;; Spectre mitigation is not present. Cranelift relies on the memory permissions
 ;; and emits no bounds checking, simply `add`--a single instruction.
 function %f(i64 vmctx, i32) -> i64 system_v {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
 block0(v0: i64, v1: i32):
    v10 = heap_addr.i64 heap0, v1, 0
    return v10
 }
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %esi, %eax
 ;   addq    %rax, 0(%rdi), %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret