diff --git a/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif b/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif new file mode 100644 index 0000000000..ac5b1a8261 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif @@ -0,0 +1,85 @@ +test compile precise-output +set enable_heap_access_spectre_mitigation=false +target x86_64 + +;; Calculate a heap address on a dynamically-allocated memory with Spectre +;; mitigations disabled. This is a 7-instruction sequence with loads, ignoring +;; intermediate `mov`s. +function %f(i32, i64 vmctx) -> i64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + gv2 = load.i64 notrap aligned gv0+8 + heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 0x8000 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl %edi, %eax +; movq 8(%rsi), %r11 +; movq %rax, %rdi +; addq %rdi, $32768, %rdi +; jnb ; ud2 heap_oob ; +; cmpq %r11, %rdi +; jbe label1; j label2 +; block1: +; addq %rax, 0(%rsi), %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; ud2 heap_oob + +;; For a static memory with no Spectre mitigations, we observe a smaller amount +;; of bounds checking: the offset check (`cmp + jbe + j`) and the offset +;; calculation (`add`)--4 instructions. +function %f(i64 vmctx, i32) -> i64 system_v { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v10 = heap_addr.i64 heap0, v1, 0 + return v10 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl %esi, %eax +; cmpq $4096, %rax +; jbe label1; j label2 +; block1: +; addq %rax, 0(%rdi), %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; ud2 heap_oob + +;; For a static memory with no Spectre mitigations and the "right" size (4GB +;; memory, 2GB guard regions), Cranelift emits no bounds checking, simply +;; `add`--a single instruction. +function %f(i64 vmctx, i32) -> i64 system_v { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +block0(v0: i64, v1: i32): + v10 = heap_addr.i64 heap0, v1, 0 + return v10 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl %esi, %eax +; addq %rax, 0(%rdi), %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index 44c0ee30b8..4f5290d452 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -1,6 +1,23 @@ test compile precise-output target x86_64 +;; Calculate a heap address on a dynamically-allocated memory. Because the +;; Spectre mitigations are on by default (i.e., +;; `set enable_heap_access_spectre_mitigation=true`), this code not only does +;; the dynamic bounds check (`add + jnb + cmp + jbe + j`) but also re-compares +;; the address to the upper bound (`add + xor + cmp + cmov`)--Cranelift's +;; Spectre mitigation. With loads and ignoring intermediate `mov`s, this amounts +;; to a 10-instruction sequence. +;; +;; And it uses quite a few registers; see this breakdown of what each register +;; generally contains: +;; - %rax holds the passed-in heap offset (argument #1) and ends up holding the +;; final address +;; - %rcx also holds the passed-in heap offset; checked for overflow when added +;; to the `0x8000` immediate +;; - %rsi holds the VM context pointer (argument #2) +;; - %rdi holds the heap limit (computed from argument #2) +;; - %rdx holds the null pointer function %f(i32, i64 vmctx) -> i64 { gv0 = vmctx gv1 = load.i64 notrap aligned gv0+0 @@ -8,7 +25,6 @@ function %f(i32, i64 vmctx) -> i64 { heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 0x8000 return v2 } @@ -34,3 +50,57 @@ block0(v0: i32, v1: i64): ; block2: ; ud2 heap_oob +;; The heap address calculation for this statically-allocated memory checks that +;; the passed offset (%r11) is within bounds (`cmp + jbe + j`) and then includes +;; the same Spectre mitigation as above. This results in a 7-instruction +;; sequence (ignoring `mov`s). +function %f(i64 vmctx, i32) -> i64 system_v { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v10 = heap_addr.i64 heap0, v1, 0 + return v10 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl %esi, %r11d +; cmpq $4096, %r11 +; jbe label1; j label2 +; block1: +; movq %r11, %rax +; addq %rax, 0(%rdi), %rax +; xorq %rsi, %rsi, %rsi +; cmpq $4096, %r11 +; cmovnbeq %rsi, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; ud2 heap_oob + +;; When a static memory is the "right" size (4GB memory, 2GB guard regions), the +;; Spectre mitigation is not present. Cranelift relies on the memory permissions +;; and emits no bounds checking, simply `add`--a single instruction. +function %f(i64 vmctx, i32) -> i64 system_v { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +block0(v0: i64, v1: i32): + v10 = heap_addr.i64 heap0, v1, 0 + return v10 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl %esi, %eax +; addq %rax, 0(%rdi), %rax +; movq %rbp, %rsp +; popq %rbp +; ret +