Cranelift: Make heap_addr return calculated base + index + offset (#5231)

* Cranelift: Make `heap_addr` return calculated `base + index + offset` Rather than return just the `base + index`. (Note: I've chosen to use the nomenclature "index" for the dynamic operand and "offset" for the static immediate.) This move the addition of the `offset` into `heap_addr`, instead of leaving it for the subsequent memory operation, so that we can Spectre-guard the full address, and not allow speculative execution to read the first 4GiB of memory. Before this commit, we were effectively doing load(spectre_guard(base + index) + offset) Now we are effectively doing load(spectre_guard(base + index + offset)) Finally, this also corrects `heap_addr`'s documented semantics to say that it returns an address that will trap on access if `index + offset + access_size` is out of bounds for the given heap, rather than saying that the `heap_addr` itself will trap. This matches the implemented behavior for static memories, and after https://github.com/bytecodealliance/wasmtime/pull/5190 lands (which is blocked on this commit) will also match the implemented behavior for dynamic memories. * Update heap_addr docs * Factor out `offset + size` to a helper
2022-11-09 11:53:51 -08:00
parent 33a192556e
commit fc62d4ad65
39 changed files with 563 additions and 284 deletions
--- a/cranelift/filetests/filetests/runtests/conversions-load-store.clif
+++ b/cranelift/filetests/filetests/runtests/conversions-load-store.clif
@@ -11,7 +11,7 @@ function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 {
    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64

 block0(v0: i64, v1: i64, v2: f32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.f32 v2, v3
    v4 = load.f32 v3
    v5 = fpromote.f64 v4
@@ -31,7 +31,7 @@ function %fdemote_test(i64 vmctx, i64, f64) -> f32 {
    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64

 block0(v0: i64, v1: i64, v2: f64):
-    v3 = heap_addr.i64 heap0, v1, 8
+    v3 = heap_addr.i64 heap0, v1, 0, 8
    store.f64 v2, v3
    v4 = load.f64 v3
    v5 = fdemote.f32 v4
@@ -51,7 +51,7 @@ function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 {
    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: f64x2):
-    v3 = heap_addr.i64 heap0, v1, 16
+    v3 = heap_addr.i64 heap0, v1, 0, 16
    store.f64x2 v2, v3
    v4 = load.f64x2 v3
    v5 = fvdemote v4
@@ -72,7 +72,7 @@ function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 {
    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: f32x4):
-    v3 = heap_addr.i64 heap0, v1, 16
+    v3 = heap_addr.i64 heap0, v1, 0, 16
    store.f32x4 v2, v3
    v4 = load.f32x4 v3
    v5 = fvpromote_low v4
--- a/cranelift/filetests/filetests/runtests/global_value.clif
+++ b/cranelift/filetests/filetests/runtests/global_value.clif
@@ -12,7 +12,7 @@ function %store_load(i64 vmctx, i64, i32) -> i32 {
    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 0
+    v3 = heap_addr.i64 heap0, v1, 0, 0
    store.i32 v2, v3

    v4 = global_value.i64 gv1
--- a/cranelift/filetests/filetests/runtests/heap.clif
+++ b/cranelift/filetests/filetests/runtests/heap.clif
@@ -11,7 +11,7 @@ function %static_heap_i64(i64 vmctx, i64, i32) -> i32 {
    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.i32 v2, v3
    v4 = load.i32 v3
    return v4
@@ -29,7 +29,7 @@ function %static_heap_i32(i64 vmctx, i32, i32) -> i32 {
    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i32

 block0(v0: i64, v1: i32, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.i32 v2, v3
    v4 = load.i32 v3
    return v4
@@ -47,7 +47,7 @@ function %heap_no_min(i64 vmctx, i32, i32) -> i32 {
    heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0, index_type i32

 block0(v0: i64, v1: i32, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.i32 v2, v3
    v4 = load.i32 v3
    return v4
@@ -66,7 +66,7 @@ function %dynamic_i64(i64 vmctx, i64, i32) -> i32 {
    heap0 = dynamic gv1, bound gv2, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.i32 v2, v3
    v4 = load.i32 v3
    return v4
@@ -85,7 +85,7 @@ function %dynamic_i32(i64 vmctx, i32, i32) -> i32 {
    heap0 = dynamic gv1, bound gv2, offset_guard 0, index_type i32

 block0(v0: i64, v1: i32, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.i32 v2, v3
    v4 = load.i32 v3
    return v4
@@ -110,11 +110,11 @@ block0(v0: i64, v1: i32, v2: i32):
    v4 = iconst.i32 0

    ; Store lhs in heap0
-    v5 = heap_addr.i64 heap0, v3, 4
+    v5 = heap_addr.i64 heap0, v3, 0, 4
    store.i32 v1, v5

    ; Store rhs in heap1
-    v6 = heap_addr.i64 heap1, v4, 4
+    v6 = heap_addr.i64 heap1, v4, 0, 4
    store.i32 v2, v6


@@ -146,11 +146,11 @@ block0(v0: i64, v1: i32, v2: i32):
    v4 = iconst.i64 0

    ; Store lhs in heap0
-    v5 = heap_addr.i64 heap0, v3, 4
+    v5 = heap_addr.i64 heap0, v3, 0, 4
    store.i32 v1, v5

    ; Store rhs in heap1
-    v6 = heap_addr.i64 heap1, v4, 4
+    v6 = heap_addr.i64 heap1, v4, 0, 4
    store.i32 v2, v6


@@ -172,7 +172,7 @@ function %unaligned_access(i64 vmctx, i64, i32) -> i32 {
    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.i32 v2, v3
    v4 = load.i32 v3
    return v4
@@ -196,7 +196,7 @@ function %iadd_imm(i64 vmctx, i32) -> i32 {

 block0(v0: i64, v1: i32):
    v2 = iconst.i64 0
-    v3 = heap_addr.i64 heap0, v2, 4
+    v3 = heap_addr.i64 heap0, v2, 0, 4
    store.i32 v1, v3
    v4 = load.i32 v3
    return v4
@@ -211,7 +211,7 @@ function %heap_limit_i64(i64 vmctx, i64, i32) -> i32 {
    heap0 = static gv1, min 0, bound 0x8, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.i32 v2, v3
    v4 = load.i32 v3
    return v4
--- a/cranelift/filetests/filetests/runtests/load-op-store.clif
+++ b/cranelift/filetests/filetests/runtests/load-op-store.clif
@@ -2,7 +2,7 @@ test run
 target x86_64
 target s390x
 target aarch64
-target riscv64 
+target riscv64


 function %load_op_store_iadd_i64(i64 vmctx, i64, i64) -> i64 {
@@ -11,7 +11,7 @@ function %load_op_store_iadd_i64(i64 vmctx, i64, i64) -> i64 {
    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i64):
-    v3 = heap_addr.i64 heap0, v1, 8
+    v3 = heap_addr.i64 heap0, v1, 0, 8
    v4 = iconst.i64 42
    store.i64 v4, v3
    v5 = load.i64 v3
@@ -30,7 +30,7 @@ function %load_op_store_iadd_i32(i64 vmctx, i64, i32) -> i32 {
    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i32):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    v4 = iconst.i32 42
    store.i32 v4, v3
    v5 = load.i32 v3
@@ -49,7 +49,7 @@ function %load_op_store_iadd_i8(i64 vmctx, i64, i8) -> i8 {
    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i8):
-    v3 = heap_addr.i64 heap0, v1, 4
+    v3 = heap_addr.i64 heap0, v1, 0, 4
    v4 = iconst.i8 42
    store.i8 v4, v3
    v5 = load.i8 v3
@@ -68,7 +68,7 @@ function %load_op_store_iadd_isub_iand_ior_ixor_i64(i64 vmctx, i64, i64) -> i64
    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64

 block0(v0: i64, v1: i64, v2: i64):
-    v3 = heap_addr.i64 heap0, v1, 8
+    v3 = heap_addr.i64 heap0, v1, 0, 8
    store.i64 v2, v3
    v4 = load.i64 v3
    v5 = iconst.i64 1
--- a/cranelift/filetests/filetests/runtests/table_addr.clif
+++ b/cranelift/filetests/filetests/runtests/table_addr.clif
@@ -128,7 +128,7 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64):
    ; v1 - heap offset (bytes)
    ; v2 - table offset (elements)
    ; v3 - store/load value
-    v4 = heap_addr.i64 heap0, v1, 0
+    v4 = heap_addr.i64 heap0, v1, 0, 0
    v5 = table_addr.i64 table0, v2, +2

    ; Store via heap, load via table