Cranelift: Make heap_addr return calculated base + index + offset (#5231)

* Cranelift: Make `heap_addr` return calculated `base + index + offset` Rather than return just the `base + index`. (Note: I've chosen to use the nomenclature "index" for the dynamic operand and "offset" for the static immediate.) This move the addition of the `offset` into `heap_addr`, instead of leaving it for the subsequent memory operation, so that we can Spectre-guard the full address, and not allow speculative execution to read the first 4GiB of memory. Before this commit, we were effectively doing load(spectre_guard(base + index) + offset) Now we are effectively doing load(spectre_guard(base + index + offset)) Finally, this also corrects `heap_addr`'s documented semantics to say that it returns an address that will trap on access if `index + offset + access_size` is out of bounds for the given heap, rather than saying that the `heap_addr` itself will trap. This matches the implemented behavior for static memories, and after https://github.com/bytecodealliance/wasmtime/pull/5190 lands (which is blocked on this commit) will also match the implemented behavior for dynamic memories. * Update heap_addr docs * Factor out `offset + size` to a helper
2022-11-09 11:53:51 -08:00
parent 33a192556e
commit fc62d4ad65
39 changed files with 563 additions and 284 deletions
--- a/cranelift/filetests/filetests/alias/extends.clif
+++ b/cranelift/filetests/filetests/alias/extends.clif
@@ -9,9 +9,9 @@ function %f0(i64 vmctx, i32) -> i32, i32, i32, i64, i64, i64 {
    gv0 = vmctx
    gv1 = load.i64 notrap readonly aligned gv0+8
    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
- 
+
 block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0
+    v2 = heap_addr.i64 heap0, v1, 12, 0

    ;; Initial load. This will not be reused by anything below, even
    ;; though it does access the same address.
--- a/cranelift/filetests/filetests/alias/fence.clif
+++ b/cranelift/filetests/filetests/alias/fence.clif
@@ -9,9 +9,9 @@ function %f0(i64 vmctx, i32) -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32
    gv0 = vmctx
    gv1 = load.i64 notrap readonly aligned gv0+8
    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
- 
+
 block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0
+    v2 = heap_addr.i64 heap0, v1, 12, 0

    v3 = load.i32 v2+8
    v4 = load.i32 vmctx v0+16
@@ -39,7 +39,7 @@ block0(v0: i64, v1: i32):
    v11 = atomic_load.i32 v0

    v12 = load.i32 vmctx v0+16
-    ; check: v12 = load.i32 vmctx v0+16    
+    ; check: v12 = load.i32 vmctx v0+16

    return v3, v4, v5, v6, v7, v8, v9, v10, v11, v12
 }
--- a/cranelift/filetests/filetests/alias/multiple-blocks.clif
+++ b/cranelift/filetests/filetests/alias/multiple-blocks.clif
@@ -11,7 +11,7 @@ function %f0(i64 vmctx, i32) -> i32 {


 block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0
+    v2 = heap_addr.i64 heap0, v1, 12, 0
    v3 = load.i32 v2+8
    brz v2, block1
    jump block2
--- a/cranelift/filetests/filetests/alias/partial-redundancy.clif
+++ b/cranelift/filetests/filetests/alias/partial-redundancy.clif
@@ -16,17 +16,17 @@ block0(v0: i64, v1: i32):
    jump block2

 block1:
-    v2 = heap_addr.i64 heap0, v1, 0
+    v2 = heap_addr.i64 heap0, v1, 68, 0
    v3 = load.i32 v2+64
    jump block3(v3)

 block2:
-    v4 = heap_addr.i64 heap0, v1, 0
+    v4 = heap_addr.i64 heap0, v1, 132, 0
    v5 = load.i32 v4+128
    jump block3(v5)

 block3(v6: i32):
-    v7 = heap_addr.i64 heap0, v1, 0
+    v7 = heap_addr.i64 heap0, v1, 68, 0
    v8 = load.i32 v7+64
    ;; load should survive:
    ; check: v8 = load.i32 v7+64
--- a/cranelift/filetests/filetests/alias/simple-alias.clif
+++ b/cranelift/filetests/filetests/alias/simple-alias.clif
@@ -13,13 +13,13 @@ function %f0(i64 vmctx, i32) -> i32, i32, i32, i32 {
    fn0 = %g(i64 vmctx)

 block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0
+    v2 = heap_addr.i64 heap0, v1, 12, 0
    v3 = load.i32 v2+8
    ;; This should reuse the load above.
-    v4 = heap_addr.i64 heap0, v1, 0
+    v4 = heap_addr.i64 heap0, v1, 12, 0
    v5 = load.i32 v4+8
    ; check: v5 -> v3
-    
+
    call fn0(v0)

    ;; The second load is redundant wrt the first, but the call above
@@ -27,7 +27,7 @@ block0(v0: i64, v1: i32):
    v6 = load.i32 v4+8
    v7 = load.i32 v4+8
    ; check: v7 -> v6
-    
+
    return v3, v5, v6, v7
 }

@@ -42,13 +42,13 @@ function %f1(i64 vmctx, i32) -> i32 {
    fn0 = %g(i64 vmctx)

 block0(v0: i64, v1: i32):
-    v2 = heap_addr.i64 heap0, v1, 0
+    v2 = heap_addr.i64 heap0, v1, 12, 0
    store.i32 v1, v2+8

    ;; This load should pick up the store above.
-    v3 = heap_addr.i64 heap0, v1, 0
+    v3 = heap_addr.i64 heap0, v1, 12, 0
    v4 = load.i32 v3+8
    ; check: v4 -> v1
-    
+
    return v4
 }