Remove heaps from core Cranelift, push them into cranelift-wasm (#5386)

* cranelift-wasm: translate Wasm loads into lower-level CLIF operations Rather than using `heap_{load,store,addr}`. * cranelift: Remove the `heap_{addr,load,store}` instructions These are now legalized in the `cranelift-wasm` frontend. * cranelift: Remove the `ir::Heap` entity from CLIF * Port basic memory operation tests to .wat filetests * Remove test for verifying CLIF heaps * Remove `heap_addr` from replace_branching_instructions_and_cfg_predecessors.clif test * Remove `heap_addr` from readonly.clif test * Remove `heap_addr` from `table_addr.clif` test * Remove `heap_addr` from the simd-fvpromote_low.clif test * Remove `heap_addr` from simd-fvdemote.clif test * Remove `heap_addr` from the load-op-store.clif test * Remove the CLIF heap runtest * Remove `heap_addr` from the global_value.clif test * Remove `heap_addr` from fpromote.clif runtests * Remove `heap_addr` from fdemote.clif runtests * Remove `heap_addr` from memory.clif parser test * Remove `heap_addr` from reject_load_readonly.clif test * Remove `heap_addr` from reject_load_notrap.clif test * Remove `heap_addr` from load_readonly_notrap.clif test * Remove `static-heap-without-guard-pages.clif` test Will be subsumed when we port `make-heap-load-store-tests.sh` to generating `.wat` tests. * Remove `static-heap-with-guard-pages.clif` test Will be subsumed when we port `make-heap-load-store-tests.sh` over to `.wat` tests. * Remove more heap tests These will be subsumed by porting `make-heap-load-store-tests.sh` over to `.wat` tests. * Remove `heap_addr` from `simple-alias.clif` test * Remove `heap_addr` from partial-redundancy.clif test * Remove `heap_addr` from multiple-blocks.clif test * Remove `heap_addr` from fence.clif test * Remove `heap_addr` from extends.clif test * Remove runtests that rely on heaps Heaps are not a thing in CLIF or the interpreter anymore * Add generated load/store `.wat` tests * Enable memory-related wasm features in `.wat` tests * Remove CLIF heap from fcmp-mem-bug.clif test * Add a mode for compiling `.wat` all the way to assembly in filetests * Also generate WAT to assembly tests in `make-load-store-tests.sh` * cargo fmt * Reinstate `f{de,pro}mote.clif` tests without the heap bits * Remove undefined doc link * Remove outdated SVG and dot file from docs * Add docs about `None` returns for base address computation helpers * Factor out `env.heap_access_spectre_mitigation()` to a local * Expand docs for `FuncEnvironment::heaps` trait method * Restore f{de,pro}mote+load clif runtests with stack memory
2022-12-14 16:26:45 -08:00
parent e03d65cca7
commit c0b587ac5f
198 changed files with 2494 additions and 4232 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
@@ -1,88 +0,0 @@
-test compile precise-output
-set unwind_info=false
-set enable_heap_access_spectre_mitigation=true
-target aarch64
-
-function %dynamic_heap_check(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0
-    heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0, 0
-    return v2
-}
-
-; block0:
-;   mov w8, w1
-;   ldr x9, [x0]
-;   mov x9, x9
-;   add x10, x0, x1, UXTW
-;   movz x7, #0
-;   subs xzr, x8, x9
-;   csel x0, x7, x10, hi
-;   csdb
-;   ret
-
-function %static_heap_check(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0, 0
-    return v2
-}
-
-; block0:
-;   mov w6, w1
-;   add x7, x0, x1, UXTW
-;   movz x5, #0
-;   subs xzr, x6, #65536
-;   csel x0, x5, x7, hi
-;   csdb
-;   ret
-
-function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0
-    heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 16, 8
-    return v2
-}
-
-; block0:
-;   mov w10, w1
-;   movz x9, #24
-;   adds x11, x10, x9
-;   b.lo 8 ; udf
-;   ldr x12, [x0]
-;   add x13, x0, x1, UXTW
-;   add x13, x13, #16
-;   movz x10, #0
-;   subs xzr, x11, x12
-;   csel x0, x10, x13, hi
-;   csdb
-;   ret
-
-function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 16, 8
-    return v2
-}
-
-; block0:
-;   mov w8, w1
-;   add x9, x0, x1, UXTW
-;   add x9, x9, #16
-;   movz x6, #65512
-;   movz x10, #0
-;   subs xzr, x8, x6
-;   csel x0, x10, x9, hi
-;   csdb
-;   ret
-
--- a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
@@ -1,86 +0,0 @@
-test compile precise-output
-set unwind_info=false
-target riscv64
-
-function %dynamic_heap_check(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0
-    heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0, 0
-    return v2
-}
-
-; block0:
-;   uext.w a6,a1
-;   ld a7,0(a0)
-;   addi t3,a7,0
-;   add a7,a0,a6
-;   ugt a5,a6,t3##ty=i64
-;   li t3,0
-;   selectif_spectre_guard a0,t3,a7##test=a5
-;   ret
-
-function %static_heap_check(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0, 0
-    return v2
-}
-
-; block0:
-;   uext.w a6,a1
-;   add a5,a0,a6
-;   lui a3,16
-;   ugt a6,a6,a3##ty=i64
-;   li a7,0
-;   selectif_spectre_guard a0,a7,a5##test=a6
-;   ret
-
-function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0
-    heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 16, 8
-    return v2
-}
-
-; block0:
-;   uext.w t4,a1
-;   li a7,24
-;   add t0,t4,a7
-;   ult t1,t0,t4##ty=i64
-;   trap_if t1,heap_oob
-;   ld t1,0(a0)
-;   add t2,a0,t4
-;   addi t2,t2,16
-;   ugt t4,t0,t1##ty=i64
-;   li t1,0
-;   selectif_spectre_guard a0,t1,t2##test=t4
-;   ret
-
-function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 16, 8
-    return v2
-}
-
-; block0:
-;   uext.w a7,a1
-;   add t3,a0,a7
-;   addi t3,t3,16
-;   lui a5,16
-;   addi a5,a5,4072
-;   ugt t4,a7,a5##ty=i64
-;   li t0,0
-;   selectif_spectre_guard a0,t0,t3##test=t4
-;   ret
-
--- a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif
+++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif
@@ -1,81 +0,0 @@
-test compile precise-output
-target s390x
-
-function %dynamic_heap_check(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0
-    heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0, 0
-    return v2
-}
-
-; block0:
-;   llgfr %r4, %r3
-;   lghi %r3, 0
-;   ag %r3, 0(%r2)
-;   agr %r2, %r4
-;   lghi %r5, 0
-;   clgr %r4, %r3
-;   locgrh %r2, %r5
-;   br %r14
-
-function %static_heap_check(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0, 0
-    return v2
-}
-
-; block0:
-;   llgfr %r4, %r3
-;   agr %r2, %r4
-;   lghi %r3, 0
-;   clgfi %r4, 65536
-;   locgrh %r2, %r3
-;   br %r14
-
-function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0
-    heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 16, 8
-    return v2
-}
-
-; block0:
-;   llgfr %r5, %r3
-;   lghi %r4, 24
-;   algfr %r4, %r3
-;   jle 6 ; trap
-;   lg %r3, 0(%r2)
-;   agrk %r5, %r2, %r5
-;   aghik %r2, %r5, 16
-;   lghi %r5, 0
-;   clgr %r4, %r3
-;   locgrh %r2, %r5
-;   br %r14
-
-function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 16, 8
-    return v2
-}
-
-; block0:
-;   llgfr %r5, %r3
-;   agr %r2, %r5
-;   aghi %r2, 16
-;   lghi %r4, 0
-;   clgfi %r5, 65512
-;   locgrh %r2, %r4
-;   br %r14
-
--- a/cranelift/filetests/filetests/isa/x64/fcmp-mem-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/fcmp-mem-bug.clif
@@ -7,7 +7,6 @@ function u0:11335(i64 vmctx, i64, i32, i32, i32, i32, i32, i32, i32, i32) fast {
    gv2 = load.i64 notrap aligned gv1
    gv3 = vmctx
    gv4 = load.i64 notrap aligned readonly gv3+504
-    heap0 = static gv4, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
    sig0 = (i64 vmctx, i64, i32, i32, i32) -> i32 fast
    sig1 = (i64 vmctx, i64, i32, i32, i32) -> i32 fast
    sig2 = (i64 vmctx, i64, i32, i32, i32, i32, i32, i32, i32, i32) fast
--- a/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
@@ -1,86 +0,0 @@
-test compile precise-output
-set enable_heap_access_spectre_mitigation=false
-target x86_64
-
-;; Calculate a heap address on a dynamically-allocated memory with Spectre
-;; mitigations disabled. This is a 7-instruction sequence with loads, ignoring
-;; intermediate `mov`s.
-function %f(i32, i64 vmctx) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    gv2 = load.i64 notrap aligned gv0+8
-    heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32
-
-block0(v0: i32, v1: i64):
-    v2 = heap_addr.i64 heap0, v0, 0x8000, 0
-    return v2
-}
-
-;   pushq   %rbp
-;   movq    %rsp, %rbp
-; block0:
-;   movl    %edi, %eax
-;   movq    %rax, %r10
-;   addq    %r10, $32768, %r10
-;   jnb ; ud2 heap_oob ;
-;   movq    8(%rsi), %r11
-;   cmpq    %r11, %r10
-;   jbe     label1; j label2
-; block1:
-;   addq    %rax, 0(%rsi), %rax
-;   addq    %rax, $32768, %rax
-;   movq    %rbp, %rsp
-;   popq    %rbp
-;   ret
-; block2:
-;   ud2 heap_oob
-
-;; For a static memory with no Spectre mitigations, we observe a smaller amount
-;; of bounds checking: the offset check (`cmp + jbe + j`) and the offset
-;; calculation (`add`)--4 instructions.
-function %f(i64 vmctx, i32) -> i64 system_v {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v10 = heap_addr.i64 heap0, v1, 0, 0
-    return v10
-}
-
-;   pushq   %rbp
-;   movq    %rsp, %rbp
-; block0:
-;   movl    %esi, %eax
-;   cmpq    $4096, %rax
-;   jbe     label1; j label2
-; block1:
-;   addq    %rax, 0(%rdi), %rax
-;   movq    %rbp, %rsp
-;   popq    %rbp
-;   ret
-; block2:
-;   ud2 heap_oob
-
-;; For a static memory with no Spectre mitigations and the "right" size (4GB
-;; memory, 2GB guard regions), Cranelift emits no bounds checking, simply
-;; `add`--a single instruction.
-function %f(i64 vmctx, i32) -> i64 system_v {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v10 = heap_addr.i64 heap0, v1, 0, 0
-    return v10
-}
-
-;   pushq   %rbp
-;   movq    %rsp, %rbp
-; block0:
-;   movl    %esi, %eax
-;   addq    %rax, 0(%rdi), %rax
-;   movq    %rbp, %rsp
-;   popq    %rbp
-;   ret
-
--- a/cranelift/filetests/filetests/isa/x64/heap.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap.clif
@@ -1,147 +0,0 @@
-test compile precise-output
-target x86_64
-
-;; Calculate a heap address on a dynamically-allocated memory. Because the
-;; Spectre mitigations are on by default (i.e.,
-;; `set enable_heap_access_spectre_mitigation=true`), this code not only does
-;; the dynamic bounds check (`add + jnb + cmp + jbe + j`) but also re-compares
-;; the address to the upper bound (`add + xor + cmp + cmov`)--Cranelift's
-;; Spectre mitigation. With loads and ignoring intermediate `mov`s, this amounts
-;; to a 10-instruction sequence.
-;;
-;; And it uses quite a few registers; see this breakdown of what each register
-;; generally contains:
-;; - %rax holds the passed-in heap offset (argument #1) and ends up holding the
-;;   final address
-;; - %rcx also holds the passed-in heap offset; checked for overflow when added
-;;   to the `0x8000` immediate
-;; - %rsi holds the VM context pointer (argument #2)
-;; - %rdi holds the heap limit (computed from argument #2)
-;; - %rdx holds the null pointer
-function %f(i32, i64 vmctx) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    gv2 = load.i64 notrap aligned gv0+8
-    heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32
-
-block0(v0: i32, v1: i64):
-    v2 = heap_addr.i64 heap0, v0, 0x8000, 0
-    return v2
-}
-
-;   pushq   %rbp
-;   movq    %rsp, %rbp
-; block0:
-;   movl    %edi, %eax
-;   movq    %rax, %rdi
-;   addq    %rdi, $32768, %rdi
-;   jnb ; ud2 heap_oob ;
-;   movq    8(%rsi), %rcx
-;   addq    %rax, 0(%rsi), %rax
-;   addq    %rax, $32768, %rax
-;   xorq    %rsi, %rsi, %rsi
-;   cmpq    %rcx, %rdi
-;   cmovnbeq %rsi, %rax, %rax
-;   movq    %rbp, %rsp
-;   popq    %rbp
-;   ret
-
-;; The heap address calculation for this statically-allocated memory checks that
-;; the passed offset (%r11) is within bounds (`cmp + jbe + j`) and then includes
-;; the same Spectre mitigation as above. This results in a 7-instruction
-;; sequence (ignoring `mov`s).
-function %f(i64 vmctx, i32) -> i64 system_v {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v10 = heap_addr.i64 heap0, v1, 0, 0
-    return v10
-}
-
-;   pushq   %rbp
-;   movq    %rsp, %rbp
-; block0:
-;   movl    %esi, %r9d
-;   movq    %r9, %rax
-;   addq    %rax, 0(%rdi), %rax
-;   xorq    %r8, %r8, %r8
-;   cmpq    $4096, %r9
-;   cmovnbeq %r8, %rax, %rax
-;   movq    %rbp, %rsp
-;   popq    %rbp
-;   ret
-
-;; When a static memory is the "right" size (4GB memory, 2GB guard regions), the
-;; Spectre mitigation is not present. Cranelift relies on the memory permissions
-;; and emits no bounds checking, simply `add`--a single instruction.
-function %f(i64 vmctx, i32) -> i64 system_v {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v10 = heap_addr.i64 heap0, v1, 0, 0
-    return v10
-}
-
-;   pushq   %rbp
-;   movq    %rsp, %rbp
-; block0:
-;   movl    %esi, %eax
-;   addq    %rax, 0(%rdi), %rax
-;   movq    %rbp, %rsp
-;   popq    %rbp
-;   ret
-
-function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0
-    heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 16, 8
-    return v2
-}
-
-;   pushq   %rbp
-;   movq    %rsp, %rbp
-; block0:
-;   movl    %esi, %esi
-;   movq    %rsi, %r11
-;   addq    %r11, $24, %r11
-;   jnb ; ud2 heap_oob ;
-;   movq    %rdi, %rax
-;   addq    %rax, %rsi, %rax
-;   addq    %rax, $16, %rax
-;   xorq    %rsi, %rsi, %rsi
-;   cmpq    0(%rdi), %r11
-;   cmovnbeq %rsi, %rax, %rax
-;   movq    %rbp, %rsp
-;   popq    %rbp
-;   ret
-
-function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 {
-    gv0 = vmctx
-    heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32
-
-block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 16, 8
-    return v2
-}
-
-;   pushq   %rbp
-;   movq    %rsp, %rbp
-; block0:
-;   movl    %esi, %r10d
-;   movq    %rdi, %rax
-;   addq    %rax, %r10, %rax
-;   addq    %rax, $16, %rax
-;   xorq    %r9, %r9, %r9
-;   cmpq    $65512, %r10
-;   cmovnbeq %r9, %rax, %rax
-;   movq    %rbp, %rsp
-;   popq    %rbp
-;   ret
-