Add heap_addr custom legalization.

The expansion of a heap_addr instruction depends on the type of heap and its configuration, so this is handled by custom code. Add a couple examples of heap access code to the language reference manual.
2017-08-24 14:04:35 -07:00
parent 3b71a27632
commit aae946128b
9 changed files with 250 additions and 10 deletions
--- a/cranelift/docs/cton_lexer.py
+++ b/cranelift/docs/cton_lexer.py
@@ -31,16 +31,13 @@ class CretonneLexer(RegexLexer):
                bygroups(Comment.Single, Comment.Special, Comment.Single)),
            # Plain comments.
            (r';.*?$', Comment.Single),
-            # Strings are in double quotes, support \xx escapes only.
+            # Strings are prefixed by % or # with hex.
-            (r'"([^"\\]+|\\[0-9a-fA-F]{2})*"', String),
+            (r'%\w+|#[0-9a-fA-F]*', String),
            # A naked function name following 'function' is also a string.
            (r'\b(function)([ \t]+)(\w+)\b',
                bygroups(Keyword, Whitespace, String.Symbol)),
            # Numbers.
-            (r'[-+]?0[xX][0-9a-fA-F]+', Number.Hex),
+            (r'[-+]?0[xX][0-9a-fA-F_]+', Number.Hex),
-            (r'[-+]?0[xX][0-9a-fA-F]*\.[0-9a-fA-F]*([pP]\d+)?', Number.Hex),
+            (r'[-+]?0[xX][0-9a-fA-F_]*\.[0-9a-fA-F_]*([pP]\d+)?', Number.Hex),
-            (r'[-+]?(\d+\.\d+([eE]\d+)?|s?NaN|Inf)', Number.Float),
+            (r'[-+]?([0-9_]+\.[0-9_]+([eE]\d+)?|s?NaN|Inf)', Number.Float),
-            (r'[-+]?\d+', Number.Integer),
+            (r'[-+]?[0-9_]+', Number.Integer),
            # Known attributes.
            (keywords('uext', 'sext'), Name.Attribute),
            # Well known value types.
@@ -48,7 +45,7 @@ class CretonneLexer(RegexLexer):
            # v<nn> = value
            # ss<nn> = stack slot
            # jt<nn> = jump table
-            (r'(v|ss|jt)\d+', Name.Variable),
+            (r'(v|ss|gv|jt|fn|sig|heap)\d+', Name.Variable),
            # ebb<nn> = extended basic block
            (r'(ebb)\d+', Name.Label),
            # Match instruction names in context.
--- a/cranelift/docs/heapex-dyn.cton
+++ b/cranelift/docs/heapex-dyn.cton
@@ -0,0 +1,15 @@
 test verifier
 function %add_members(i32) -> f32 spiderwasm {
    gv0 = vmctx+64
    gv1 = vmctx+72
    heap0 = dynamic gv0, min 0x1000, bound gv1, guard 0
 ebb0(v0: i32):
    v1 = heap_addr.i64 heap0, v0, 20
    v2 = load.f32 v1+16
    v3 = heap_addr.i64 heap0, v0, 24
    v4 = load.f32 v3+20
    v5 = fadd v2, v4
    return v5
 }
--- a/cranelift/docs/heapex-sm32.cton
+++ b/cranelift/docs/heapex-sm32.cton
@@ -0,0 +1,14 @@
 test verifier
 function %add_members(i32) -> f32 spiderwasm {
    gv0 = vmctx+64
    heap0 = static gv0, min 0x1000, bound 0x10_0000, guard 0x1000
 ebb0(v0: i32):
    v1 = heap_addr.i32 heap0, v0, 1
    v2 = load.f32 v1+16
    v3 = load.f32 v1+20
    v4 = fadd v2, v3
    return v4
 }
--- a/cranelift/docs/heapex-sm64.cton
+++ b/cranelift/docs/heapex-sm64.cton
@@ -0,0 +1,13 @@
 test verifier
 function %add_members(i32) -> f32 spiderwasm {
    gv0 = vmctx+64
    heap0 = static gv0, min 0x1000, bound 0x1_0000_0000, guard 0x8000_0000
 ebb0(v0: i32):
    v1 = heap_addr.i64 heap0, v0, 1
    v2 = load.f32 v1+16
    v3 = load.f32 v1+20
    v4 = fadd v2, v3
    return v4
 }
--- a/cranelift/docs/langref.rst
+++ b/cranelift/docs/langref.rst
@@ -636,6 +636,37 @@ is resized. The bound of a dynamic heap is stored in a global variable.
    :arg BoundGV: Global variable containing the current heap bound in bytes.
    :arg GuardBytes: Size of the guard pages in bytes.
 Heap examples
 ~~~~~~~~~~~~~
 The SpiderMonkey VM prefers to use fixed heaps with a 4 GB bound and 2 GB of
 guard pages when running WebAssembly code on 64-bit CPUs. The combination of a
 4 GB fixed bound and 1-byte bounds checks means that no code needs to be
 generated for bounds checks at all:
 .. literalinclude:: heapex-sm64.cton
    :language: cton
    :lines: 2-
 A static heap can also be used for 32-bit code when the WebAssembly module
 declares a small upper bound on its memory. A 1 MB static bound with a single 4
 KB guard page still has opportunities for sharing bounds checking code:
 .. literalinclude:: heapex-sm32.cton
    :language: cton
    :lines: 2-
 If the upper bound on the heap size is too large, a dynamic heap is required
 instead.
 Finally, a runtime environment that simply allocates a heap with
 :c:func:`malloc()` may not have any guard pages at all. In that case, full
 bounds checking is required for each access:
 .. literalinclude:: heapex-dyn.cton
    :language: cton
    :lines: 2-
 Operations
 ==========
--- a/cranelift/filetests/isa/intel/legalize-memory.cton
+++ b/cranelift/filetests/isa/intel/legalize-memory.cton
@@ -27,3 +27,26 @@ ebb1(v1: i64):
    return v2
    ; check: return $v2
 }
 ; SpiderMonkey VM-style static 4+2 GB heap.
 ; This eliminates bounds checks completely for offsets < 2GB.
 function %staticheap_sm64(i32, i64 vmctx) -> f32 spiderwasm {
    gv0 = vmctx+64
    heap0 = static gv0, min 0x1000, bound 0x1_0000_0000, guard 0x8000_0000
 ebb0(v0: i32, v999: i64):
    ; check: $ebb0(
    v1 = heap_addr.i64 heap0, v0, 1
    ; Boundscheck should be eliminated.
    ; Checks here are assuming that no pipehole opts fold the load offsets.
    ; nextln: $(xoff=$V) = uextend.i64 $v0
    ; nextln: $(haddr=$V) = iadd_imm $v999, 64
    ; nextln: $(hbase=$V) = load.i64 $haddr
    ; nextln: $v1 = iadd $hbase, $xoff
    v2 = load.f32 v1+16
    ; nextln: $v2 = load.f32 $v1+16
    v3 = load.f32 v1+20
    ; nextln: $v3 = load.f32 $v1+20
    v4 = fadd v2, v3
    return v4
 }
--- a/lib/cretonne/meta/base/legalize.py
+++ b/lib/cretonne/meta/base/legalize.py
@@ -46,6 +46,7 @@ expand = XFormGroup('expand', """
 # Custom expansions for memory objects.
 expand.custom_legalize(insts.global_addr, 'expand_global_addr')
 expand.custom_legalize(insts.heap_addr, 'expand_heap_addr')
 x = Var('x')
 y = Var('y')
--- a/lib/cretonne/src/legalizer/heap.rs
+++ b/lib/cretonne/src/legalizer/heap.rs
@@ -0,0 +1,144 @@
 //! Legalization of heaps.
 //!
 //! This module exports the `expand_heap_addr` function which transforms a `heap_addr`
 //! instruction into code that depends on the kind of heap referenced.
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
 use ir::{self, InstBuilder, MemFlags};
 use ir::condcodes::IntCC;
 /// Expand a `heap_addr` instruction according to the definition of the heap.
 pub fn expand_heap_addr(inst: ir::Inst, func: &mut ir::Function, _cfg: &mut ControlFlowGraph) {
    // Unpack the instruction.
    let (heap, offset, size) = match &func.dfg[inst] {
        &ir::InstructionData::HeapAddr {
            opcode,
            heap,
            arg,
            imm,
        } => {
            assert_eq!(opcode, ir::Opcode::HeapAddr);
            (heap, arg, imm.into())
        }
        _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
    };
    match func.heaps[heap].style {
        ir::HeapStyle::Dynamic { bound_gv } => {
            dynamic_addr(inst, heap, offset, size, bound_gv, func)
        }
        ir::HeapStyle::Static { bound } => {
            static_addr(inst, heap, offset, size, bound.into(), func)
        }
    }
 }
 /// Expand a `heap_addr` for a dynamic heap.
 fn dynamic_addr(inst: ir::Inst,
                heap: ir::Heap,
                offset: ir::Value,
                size: u32,
                bound_gv: ir::GlobalVar,
                func: &mut ir::Function) {
    let size = size as i64;
    let offset_ty = func.dfg.value_type(offset);
    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let min_size = func.heaps[heap].min_size.into();
    let mut pos = FuncCursor::new(func).at_inst(inst);
    // Start with the bounds check. Trap if `offset + size > bound`.
    let bound_addr = pos.ins().global_addr(addr_ty, bound_gv);
    let bound = pos.ins().load(offset_ty, MemFlags::new(), bound_addr, 0);
    let oob;
    if size == 1 {
        // `offset > bound - 1` is the same as `offset >= bound`.
        oob = pos.ins()
            .icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound);
    } else if size <= min_size {
        // We know that bound >= min_size, so here we can compare `offset > bound - size` without
        // wrapping.
        let adj_bound = pos.ins().iadd_imm(bound, -size);
        oob = pos.ins()
            .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
    } else {
        // We need an overflow check for the adjusted offset.
        let size_val = pos.ins().iconst(offset_ty, size);
        let (adj_offset, overflow) = pos.ins().iadd_cout(offset, size_val);
        pos.ins().trapnz(overflow);
        oob = pos.ins()
            .icmp(IntCC::UnsignedGreaterThan, adj_offset, bound);
    }
    pos.ins().trapnz(oob);
    offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
 }
 /// Expand a `heap_addr` for a static heap.
 fn static_addr(inst: ir::Inst,
               heap: ir::Heap,
               offset: ir::Value,
               size: u32,
               bound: i64,
               func: &mut ir::Function) {
    let size = size as i64;
    let offset_ty = func.dfg.value_type(offset);
    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let mut pos = FuncCursor::new(func).at_inst(inst);
    // Start with the bounds check. Trap if `offset + size > bound`.
    if size > bound {
        // This will simply always trap since `offset >= 0`.
        pos.ins().trap();
        pos.func.dfg.replace(inst).iconst(addr_ty, 0);
        return;
    }
    // Check `offset > limit` which is now known non-negative.
    let limit = bound - size;
    // We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
    // more.
    if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
        let oob = if limit & 1 == 1 {
            // Prefer testing `offset >= limit - 1` when limit is odd because an even number is
            // likely to be a convenient constant on ARM and other RISC architectures.
            pos.ins()
                .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit - 1)
        } else {
            pos.ins()
                .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit)
        };
        pos.ins().trapnz(oob);
    }
    offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
 }
 /// Emit code for the base address computation of a `heap_addr` instruction.
 ///
 ///
 fn offset_addr(inst: ir::Inst,
               heap: ir::Heap,
               addr_ty: ir::Type,
               mut offset: ir::Value,
               offset_ty: ir::Type,
               func: &mut ir::Function) {
    let mut pos = FuncCursor::new(func).at_inst(inst);
    // Convert `offset` to `addr_ty`.
    if offset_ty != addr_ty {
        offset = pos.ins().uextend(addr_ty, offset);
    }
    // Add the heap base address base
    match pos.func.heaps[heap].base {
        ir::HeapBase::ReservedReg => unimplemented!(),
        ir::HeapBase::GlobalVar(base_gv) => {
            let base_addr = pos.ins().global_addr(addr_ty, base_gv);
            let base = pos.ins().load(addr_ty, MemFlags::new(), base_addr, 0);
            pos.func.dfg.replace(inst).iadd(base, offset);
        }
    }
 }
--- a/lib/cretonne/src/legalizer/mod.rs
+++ b/lib/cretonne/src/legalizer/mod.rs
@@ -22,9 +22,11 @@ use bitset::BitSet;
 mod boundary;
 mod globalvar;
 mod heap;
 mod split;
 use self::globalvar::expand_global_addr;
 use self::heap::expand_heap_addr;
 /// Legalize `func` for `isa`.
 ///