x64: improve tests for heap_addr (#4841)
* x64: improve tests for `heap_addr` This change adds Cranelift `compile` tests for the various cases for `heap_addr`. The idea behind this is to more clearly show what the penalties are for dynamically- vs statically-allocated memory as well as turning Spectre mitigations on and off. * Add test case: "right" size memory with Spectre enabled
This commit is contained in:
85
cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
Normal file
85
cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
test compile precise-output
|
||||||
|
set enable_heap_access_spectre_mitigation=false
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
;; Calculate a heap address on a dynamically-allocated memory with Spectre
|
||||||
|
;; mitigations disabled. This is a 7-instruction sequence with loads, ignoring
|
||||||
|
;; intermediate `mov`s.
|
||||||
|
function %f(i32, i64 vmctx) -> i64 {
|
||||||
|
gv0 = vmctx
|
||||||
|
gv1 = load.i64 notrap aligned gv0+0
|
||||||
|
gv2 = load.i64 notrap aligned gv0+8
|
||||||
|
heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32
|
||||||
|
|
||||||
|
block0(v0: i32, v1: i64):
|
||||||
|
v2 = heap_addr.i64 heap0, v0, 0x8000
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; movl %edi, %eax
|
||||||
|
; movq 8(%rsi), %r11
|
||||||
|
; movq %rax, %rdi
|
||||||
|
; addq %rdi, $32768, %rdi
|
||||||
|
; jnb ; ud2 heap_oob ;
|
||||||
|
; cmpq %r11, %rdi
|
||||||
|
; jbe label1; j label2
|
||||||
|
; block1:
|
||||||
|
; addq %rax, 0(%rsi), %rax
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
; block2:
|
||||||
|
; ud2 heap_oob
|
||||||
|
|
||||||
|
;; For a static memory with no Spectre mitigations, we observe a smaller amount
|
||||||
|
;; of bounds checking: the offset check (`cmp + jbe + j`) and the offset
|
||||||
|
;; calculation (`add`)--4 instructions.
|
||||||
|
function %f(i64 vmctx, i32) -> i64 system_v {
|
||||||
|
gv0 = vmctx
|
||||||
|
gv1 = load.i64 notrap aligned gv0+0
|
||||||
|
heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32
|
||||||
|
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v10 = heap_addr.i64 heap0, v1, 0
|
||||||
|
return v10
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; movl %esi, %eax
|
||||||
|
; cmpq $4096, %rax
|
||||||
|
; jbe label1; j label2
|
||||||
|
; block1:
|
||||||
|
; addq %rax, 0(%rdi), %rax
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
; block2:
|
||||||
|
; ud2 heap_oob
|
||||||
|
|
||||||
|
;; For a static memory with no Spectre mitigations and the "right" size (4GB
|
||||||
|
;; memory, 2GB guard regions), Cranelift emits no bounds checking, simply
|
||||||
|
;; `add`--a single instruction.
|
||||||
|
function %f(i64 vmctx, i32) -> i64 system_v {
|
||||||
|
gv0 = vmctx
|
||||||
|
gv1 = load.i64 notrap aligned gv0+0
|
||||||
|
heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
|
||||||
|
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v10 = heap_addr.i64 heap0, v1, 0
|
||||||
|
return v10
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; movl %esi, %eax
|
||||||
|
; addq %rax, 0(%rdi), %rax
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
@@ -1,6 +1,23 @@
|
|||||||
test compile precise-output
|
test compile precise-output
|
||||||
target x86_64
|
target x86_64
|
||||||
|
|
||||||
|
;; Calculate a heap address on a dynamically-allocated memory. Because the
|
||||||
|
;; Spectre mitigations are on by default (i.e.,
|
||||||
|
;; `set enable_heap_access_spectre_mitigation=true`), this code not only does
|
||||||
|
;; the dynamic bounds check (`add + jnb + cmp + jbe + j`) but also re-compares
|
||||||
|
;; the address to the upper bound (`add + xor + cmp + cmov`)--Cranelift's
|
||||||
|
;; Spectre mitigation. With loads and ignoring intermediate `mov`s, this amounts
|
||||||
|
;; to a 10-instruction sequence.
|
||||||
|
;;
|
||||||
|
;; And it uses quite a few registers; see this breakdown of what each register
|
||||||
|
;; generally contains:
|
||||||
|
;; - %rax holds the passed-in heap offset (argument #1) and ends up holding the
|
||||||
|
;; final address
|
||||||
|
;; - %rcx also holds the passed-in heap offset; checked for overflow when added
|
||||||
|
;; to the `0x8000` immediate
|
||||||
|
;; - %rsi holds the VM context pointer (argument #2)
|
||||||
|
;; - %rdi holds the heap limit (computed from argument #2)
|
||||||
|
;; - %rdx holds the null pointer
|
||||||
function %f(i32, i64 vmctx) -> i64 {
|
function %f(i32, i64 vmctx) -> i64 {
|
||||||
gv0 = vmctx
|
gv0 = vmctx
|
||||||
gv1 = load.i64 notrap aligned gv0+0
|
gv1 = load.i64 notrap aligned gv0+0
|
||||||
@@ -8,7 +25,6 @@ function %f(i32, i64 vmctx) -> i64 {
|
|||||||
heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32
|
heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32
|
||||||
|
|
||||||
block0(v0: i32, v1: i64):
|
block0(v0: i32, v1: i64):
|
||||||
|
|
||||||
v2 = heap_addr.i64 heap0, v0, 0x8000
|
v2 = heap_addr.i64 heap0, v0, 0x8000
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
@@ -34,3 +50,57 @@ block0(v0: i32, v1: i64):
|
|||||||
; block2:
|
; block2:
|
||||||
; ud2 heap_oob
|
; ud2 heap_oob
|
||||||
|
|
||||||
|
;; The heap address calculation for this statically-allocated memory checks that
|
||||||
|
;; the passed offset (%r11) is within bounds (`cmp + jbe + j`) and then includes
|
||||||
|
;; the same Spectre mitigation as above. This results in a 7-instruction
|
||||||
|
;; sequence (ignoring `mov`s).
|
||||||
|
function %f(i64 vmctx, i32) -> i64 system_v {
|
||||||
|
gv0 = vmctx
|
||||||
|
gv1 = load.i64 notrap aligned gv0+0
|
||||||
|
heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32
|
||||||
|
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v10 = heap_addr.i64 heap0, v1, 0
|
||||||
|
return v10
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; movl %esi, %r11d
|
||||||
|
; cmpq $4096, %r11
|
||||||
|
; jbe label1; j label2
|
||||||
|
; block1:
|
||||||
|
; movq %r11, %rax
|
||||||
|
; addq %rax, 0(%rdi), %rax
|
||||||
|
; xorq %rsi, %rsi, %rsi
|
||||||
|
; cmpq $4096, %r11
|
||||||
|
; cmovnbeq %rsi, %rax, %rax
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
; block2:
|
||||||
|
; ud2 heap_oob
|
||||||
|
|
||||||
|
;; When a static memory is the "right" size (4GB memory, 2GB guard regions), the
|
||||||
|
;; Spectre mitigation is not present. Cranelift relies on the memory permissions
|
||||||
|
;; and emits no bounds checking, simply `add`--a single instruction.
|
||||||
|
function %f(i64 vmctx, i32) -> i64 system_v {
|
||||||
|
gv0 = vmctx
|
||||||
|
gv1 = load.i64 notrap aligned gv0+0
|
||||||
|
heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
|
||||||
|
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v10 = heap_addr.i64 heap0, v1, 0
|
||||||
|
return v10
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; movl %esi, %eax
|
||||||
|
; addq %rax, 0(%rdi), %rax
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user