Add heap_addr custom legalization.

The expansion of a heap_addr instruction depends on the type of heap and its configuration, so this is handled by custom code. Add a couple examples of heap access code to the language reference manual.
2017-08-24 14:04:35 -07:00
parent 3b71a27632
commit aae946128b
9 changed files with 250 additions and 10 deletions
--- a/cranelift/docs/cton_lexer.py
+++ b/cranelift/docs/cton_lexer.py
@@ -31,16 +31,13 @@ class CretonneLexer(RegexLexer):
                bygroups(Comment.Single, Comment.Special, Comment.Single)),
            # Plain comments.
            (r';.*?$', Comment.Single),
-            # Strings are in double quotes, support \xx escapes only.
-            (r'"([^"\\]+|\\[0-9a-fA-F]{2})*"', String),
-            # A naked function name following 'function' is also a string.
-            (r'\b(function)([ \t]+)(\w+)\b',
-                bygroups(Keyword, Whitespace, String.Symbol)),
+            # Strings are prefixed by % or # with hex.
+            (r'%\w+|#[0-9a-fA-F]*', String),
            # Numbers.
-            (r'[-+]?0[xX][0-9a-fA-F]+', Number.Hex),
-            (r'[-+]?0[xX][0-9a-fA-F]*\.[0-9a-fA-F]*([pP]\d+)?', Number.Hex),
-            (r'[-+]?(\d+\.\d+([eE]\d+)?|s?NaN|Inf)', Number.Float),
-            (r'[-+]?\d+', Number.Integer),
+            (r'[-+]?0[xX][0-9a-fA-F_]+', Number.Hex),
+            (r'[-+]?0[xX][0-9a-fA-F_]*\.[0-9a-fA-F_]*([pP]\d+)?', Number.Hex),
+            (r'[-+]?([0-9_]+\.[0-9_]+([eE]\d+)?|s?NaN|Inf)', Number.Float),
+            (r'[-+]?[0-9_]+', Number.Integer),
            # Known attributes.
            (keywords('uext', 'sext'), Name.Attribute),
            # Well known value types.
@@ -48,7 +45,7 @@ class CretonneLexer(RegexLexer):
            # v<nn> = value
            # ss<nn> = stack slot
            # jt<nn> = jump table
-            (r'(v|ss|jt)\d+', Name.Variable),
+            (r'(v|ss|gv|jt|fn|sig|heap)\d+', Name.Variable),
            # ebb<nn> = extended basic block
            (r'(ebb)\d+', Name.Label),
            # Match instruction names in context.
--- a/cranelift/docs/heapex-dyn.cton
+++ b/cranelift/docs/heapex-dyn.cton
@@ -0,0 +1,15 @@
+test verifier
+
+function %add_members(i32) -> f32 spiderwasm {
+    gv0 = vmctx+64
+    gv1 = vmctx+72
+    heap0 = dynamic gv0, min 0x1000, bound gv1, guard 0
+
+ebb0(v0: i32):
+    v1 = heap_addr.i64 heap0, v0, 20
+    v2 = load.f32 v1+16
+    v3 = heap_addr.i64 heap0, v0, 24
+    v4 = load.f32 v3+20
+    v5 = fadd v2, v4
+    return v5
+}
--- a/cranelift/docs/heapex-sm32.cton
+++ b/cranelift/docs/heapex-sm32.cton
@@ -0,0 +1,14 @@
+test verifier
+
+function %add_members(i32) -> f32 spiderwasm {
+    gv0 = vmctx+64
+    heap0 = static gv0, min 0x1000, bound 0x10_0000, guard 0x1000
+
+ebb0(v0: i32):
+    v1 = heap_addr.i32 heap0, v0, 1
+    v2 = load.f32 v1+16
+    v3 = load.f32 v1+20
+    v4 = fadd v2, v3
+    return v4
+}
+
--- a/cranelift/docs/heapex-sm64.cton
+++ b/cranelift/docs/heapex-sm64.cton
@@ -0,0 +1,13 @@
+test verifier
+
+function %add_members(i32) -> f32 spiderwasm {
+    gv0 = vmctx+64
+    heap0 = static gv0, min 0x1000, bound 0x1_0000_0000, guard 0x8000_0000
+
+ebb0(v0: i32):
+    v1 = heap_addr.i64 heap0, v0, 1
+    v2 = load.f32 v1+16
+    v3 = load.f32 v1+20
+    v4 = fadd v2, v3
+    return v4
+}
--- a/cranelift/docs/langref.rst
+++ b/cranelift/docs/langref.rst
@@ -636,6 +636,37 @@ is resized. The bound of a dynamic heap is stored in a global variable.
    :arg BoundGV: Global variable containing the current heap bound in bytes.
    :arg GuardBytes: Size of the guard pages in bytes.

+Heap examples
+~~~~~~~~~~~~~
+
+The SpiderMonkey VM prefers to use fixed heaps with a 4 GB bound and 2 GB of
+guard pages when running WebAssembly code on 64-bit CPUs. The combination of a
+4 GB fixed bound and 1-byte bounds checks means that no code needs to be
+generated for bounds checks at all:
+
+.. literalinclude:: heapex-sm64.cton
+    :language: cton
+    :lines: 2-
+
+A static heap can also be used for 32-bit code when the WebAssembly module
+declares a small upper bound on its memory. A 1 MB static bound with a single 4
+KB guard page still has opportunities for sharing bounds checking code:
+
+.. literalinclude:: heapex-sm32.cton
+    :language: cton
+    :lines: 2-
+
+If the upper bound on the heap size is too large, a dynamic heap is required
+instead.
+
+Finally, a runtime environment that simply allocates a heap with
+:c:func:`malloc()` may not have any guard pages at all. In that case, full
+bounds checking is required for each access:
+
+.. literalinclude:: heapex-dyn.cton
+    :language: cton
+    :lines: 2-
+

 Operations
 ==========
--- a/cranelift/filetests/isa/intel/legalize-memory.cton
+++ b/cranelift/filetests/isa/intel/legalize-memory.cton
@@ -27,3 +27,26 @@ ebb1(v1: i64):
    return v2
    ; check: return $v2
 }
+
+; SpiderMonkey VM-style static 4+2 GB heap.
+; This eliminates bounds checks completely for offsets < 2GB.
+function %staticheap_sm64(i32, i64 vmctx) -> f32 spiderwasm {
+    gv0 = vmctx+64
+    heap0 = static gv0, min 0x1000, bound 0x1_0000_0000, guard 0x8000_0000
+
+ebb0(v0: i32, v999: i64):
+    ; check: $ebb0(
+    v1 = heap_addr.i64 heap0, v0, 1
+    ; Boundscheck should be eliminated.
+    ; Checks here are assuming that no pipehole opts fold the load offsets.
+    ; nextln: $(xoff=$V) = uextend.i64 $v0
+    ; nextln: $(haddr=$V) = iadd_imm $v999, 64
+    ; nextln: $(hbase=$V) = load.i64 $haddr
+    ; nextln: $v1 = iadd $hbase, $xoff
+    v2 = load.f32 v1+16
+    ; nextln: $v2 = load.f32 $v1+16
+    v3 = load.f32 v1+20
+    ; nextln: $v3 = load.f32 $v1+20
+    v4 = fadd v2, v3
+    return v4
+}
--- a/lib/cretonne/meta/base/legalize.py
+++ b/lib/cretonne/meta/base/legalize.py
@@ -46,6 +46,7 @@ expand = XFormGroup('expand', """

 # Custom expansions for memory objects.
 expand.custom_legalize(insts.global_addr, 'expand_global_addr')
+expand.custom_legalize(insts.heap_addr, 'expand_heap_addr')

 x = Var('x')
 y = Var('y')
--- a/lib/cretonne/src/legalizer/heap.rs
+++ b/lib/cretonne/src/legalizer/heap.rs
@@ -0,0 +1,144 @@
+//! Legalization of heaps.
+//!
+//! This module exports the `expand_heap_addr` function which transforms a `heap_addr`
+//! instruction into code that depends on the kind of heap referenced.
+
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::{self, InstBuilder, MemFlags};
+use ir::condcodes::IntCC;
+
+/// Expand a `heap_addr` instruction according to the definition of the heap.
+pub fn expand_heap_addr(inst: ir::Inst, func: &mut ir::Function, _cfg: &mut ControlFlowGraph) {
+    // Unpack the instruction.
+    let (heap, offset, size) = match &func.dfg[inst] {
+        &ir::InstructionData::HeapAddr {
+            opcode,
+            heap,
+            arg,
+            imm,
+        } => {
+            assert_eq!(opcode, ir::Opcode::HeapAddr);
+            (heap, arg, imm.into())
+        }
+        _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    match func.heaps[heap].style {
+        ir::HeapStyle::Dynamic { bound_gv } => {
+            dynamic_addr(inst, heap, offset, size, bound_gv, func)
+        }
+        ir::HeapStyle::Static { bound } => {
+            static_addr(inst, heap, offset, size, bound.into(), func)
+        }
+    }
+}
+
+/// Expand a `heap_addr` for a dynamic heap.
+fn dynamic_addr(inst: ir::Inst,
+                heap: ir::Heap,
+                offset: ir::Value,
+                size: u32,
+                bound_gv: ir::GlobalVar,
+                func: &mut ir::Function) {
+    let size = size as i64;
+    let offset_ty = func.dfg.value_type(offset);
+    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let min_size = func.heaps[heap].min_size.into();
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+
+    // Start with the bounds check. Trap if `offset + size > bound`.
+    let bound_addr = pos.ins().global_addr(addr_ty, bound_gv);
+    let bound = pos.ins().load(offset_ty, MemFlags::new(), bound_addr, 0);
+
+    let oob;
+    if size == 1 {
+        // `offset > bound - 1` is the same as `offset >= bound`.
+        oob = pos.ins()
+            .icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound);
+    } else if size <= min_size {
+        // We know that bound >= min_size, so here we can compare `offset > bound - size` without
+        // wrapping.
+        let adj_bound = pos.ins().iadd_imm(bound, -size);
+        oob = pos.ins()
+            .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
+    } else {
+        // We need an overflow check for the adjusted offset.
+        let size_val = pos.ins().iconst(offset_ty, size);
+        let (adj_offset, overflow) = pos.ins().iadd_cout(offset, size_val);
+        pos.ins().trapnz(overflow);
+        oob = pos.ins()
+            .icmp(IntCC::UnsignedGreaterThan, adj_offset, bound);
+    }
+    pos.ins().trapnz(oob);
+
+    offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
+}
+
+/// Expand a `heap_addr` for a static heap.
+fn static_addr(inst: ir::Inst,
+               heap: ir::Heap,
+               offset: ir::Value,
+               size: u32,
+               bound: i64,
+               func: &mut ir::Function) {
+    let size = size as i64;
+    let offset_ty = func.dfg.value_type(offset);
+    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+
+    // Start with the bounds check. Trap if `offset + size > bound`.
+    if size > bound {
+        // This will simply always trap since `offset >= 0`.
+        pos.ins().trap();
+        pos.func.dfg.replace(inst).iconst(addr_ty, 0);
+        return;
+    }
+
+    // Check `offset > limit` which is now known non-negative.
+    let limit = bound - size;
+
+    // We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
+    // more.
+    if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
+        let oob = if limit & 1 == 1 {
+            // Prefer testing `offset >= limit - 1` when limit is odd because an even number is
+            // likely to be a convenient constant on ARM and other RISC architectures.
+            pos.ins()
+                .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit - 1)
+        } else {
+            pos.ins()
+                .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit)
+        };
+        pos.ins().trapnz(oob);
+    }
+
+    offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
+}
+
+/// Emit code for the base address computation of a `heap_addr` instruction.
+///
+///
+fn offset_addr(inst: ir::Inst,
+               heap: ir::Heap,
+               addr_ty: ir::Type,
+               mut offset: ir::Value,
+               offset_ty: ir::Type,
+               func: &mut ir::Function) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+
+    // Convert `offset` to `addr_ty`.
+    if offset_ty != addr_ty {
+        offset = pos.ins().uextend(addr_ty, offset);
+    }
+
+    // Add the heap base address base
+    match pos.func.heaps[heap].base {
+        ir::HeapBase::ReservedReg => unimplemented!(),
+        ir::HeapBase::GlobalVar(base_gv) => {
+            let base_addr = pos.ins().global_addr(addr_ty, base_gv);
+            let base = pos.ins().load(addr_ty, MemFlags::new(), base_addr, 0);
+            pos.func.dfg.replace(inst).iadd(base, offset);
+        }
+    }
+}
--- a/lib/cretonne/src/legalizer/mod.rs
+++ b/lib/cretonne/src/legalizer/mod.rs
@@ -22,9 +22,11 @@ use bitset::BitSet;

 mod boundary;
 mod globalvar;
+mod heap;
 mod split;

 use self::globalvar::expand_global_addr;
+use self::heap::expand_heap_addr;

 /// Legalize `func` for `isa`.
 ///