diff --git a/cranelift/docs/cton_lexer.py b/cranelift/docs/cton_lexer.py index 1024765cbe..bd1a47758f 100644 --- a/cranelift/docs/cton_lexer.py +++ b/cranelift/docs/cton_lexer.py @@ -31,16 +31,13 @@ class CretonneLexer(RegexLexer): bygroups(Comment.Single, Comment.Special, Comment.Single)), # Plain comments. (r';.*?$', Comment.Single), - # Strings are in double quotes, support \xx escapes only. - (r'"([^"\\]+|\\[0-9a-fA-F]{2})*"', String), - # A naked function name following 'function' is also a string. - (r'\b(function)([ \t]+)(\w+)\b', - bygroups(Keyword, Whitespace, String.Symbol)), + # Strings are prefixed by % or # with hex. + (r'%\w+|#[0-9a-fA-F]*', String), # Numbers. - (r'[-+]?0[xX][0-9a-fA-F]+', Number.Hex), - (r'[-+]?0[xX][0-9a-fA-F]*\.[0-9a-fA-F]*([pP]\d+)?', Number.Hex), - (r'[-+]?(\d+\.\d+([eE]\d+)?|s?NaN|Inf)', Number.Float), - (r'[-+]?\d+', Number.Integer), + (r'[-+]?0[xX][0-9a-fA-F_]+', Number.Hex), + (r'[-+]?0[xX][0-9a-fA-F_]*\.[0-9a-fA-F_]*([pP]\d+)?', Number.Hex), + (r'[-+]?([0-9_]+\.[0-9_]+([eE]\d+)?|s?NaN|Inf)', Number.Float), + (r'[-+]?[0-9_]+', Number.Integer), # Known attributes. (keywords('uext', 'sext'), Name.Attribute), # Well known value types. @@ -48,7 +45,7 @@ class CretonneLexer(RegexLexer): # v = value # ss = stack slot # jt = jump table - (r'(v|ss|jt)\d+', Name.Variable), + (r'(v|ss|gv|jt|fn|sig|heap)\d+', Name.Variable), # ebb = extended basic block (r'(ebb)\d+', Name.Label), # Match instruction names in context. diff --git a/cranelift/docs/heapex-dyn.cton b/cranelift/docs/heapex-dyn.cton new file mode 100644 index 0000000000..2be801da86 --- /dev/null +++ b/cranelift/docs/heapex-dyn.cton @@ -0,0 +1,15 @@ +test verifier + +function %add_members(i32) -> f32 spiderwasm { + gv0 = vmctx+64 + gv1 = vmctx+72 + heap0 = dynamic gv0, min 0x1000, bound gv1, guard 0 + +ebb0(v0: i32): + v1 = heap_addr.i64 heap0, v0, 20 + v2 = load.f32 v1+16 + v3 = heap_addr.i64 heap0, v0, 24 + v4 = load.f32 v3+20 + v5 = fadd v2, v4 + return v5 +} diff --git a/cranelift/docs/heapex-sm32.cton b/cranelift/docs/heapex-sm32.cton new file mode 100644 index 0000000000..51b0c4cbcb --- /dev/null +++ b/cranelift/docs/heapex-sm32.cton @@ -0,0 +1,14 @@ +test verifier + +function %add_members(i32) -> f32 spiderwasm { + gv0 = vmctx+64 + heap0 = static gv0, min 0x1000, bound 0x10_0000, guard 0x1000 + +ebb0(v0: i32): + v1 = heap_addr.i32 heap0, v0, 1 + v2 = load.f32 v1+16 + v3 = load.f32 v1+20 + v4 = fadd v2, v3 + return v4 +} + diff --git a/cranelift/docs/heapex-sm64.cton b/cranelift/docs/heapex-sm64.cton new file mode 100644 index 0000000000..e29fa9caf5 --- /dev/null +++ b/cranelift/docs/heapex-sm64.cton @@ -0,0 +1,13 @@ +test verifier + +function %add_members(i32) -> f32 spiderwasm { + gv0 = vmctx+64 + heap0 = static gv0, min 0x1000, bound 0x1_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32): + v1 = heap_addr.i64 heap0, v0, 1 + v2 = load.f32 v1+16 + v3 = load.f32 v1+20 + v4 = fadd v2, v3 + return v4 +} diff --git a/cranelift/docs/langref.rst b/cranelift/docs/langref.rst index e31d8574a9..c8c7ede4fd 100644 --- a/cranelift/docs/langref.rst +++ b/cranelift/docs/langref.rst @@ -636,6 +636,37 @@ is resized. The bound of a dynamic heap is stored in a global variable. :arg BoundGV: Global variable containing the current heap bound in bytes. :arg GuardBytes: Size of the guard pages in bytes. +Heap examples +~~~~~~~~~~~~~ + +The SpiderMonkey VM prefers to use fixed heaps with a 4 GB bound and 2 GB of +guard pages when running WebAssembly code on 64-bit CPUs. The combination of a +4 GB fixed bound and 1-byte bounds checks means that no code needs to be +generated for bounds checks at all: + +.. literalinclude:: heapex-sm64.cton + :language: cton + :lines: 2- + +A static heap can also be used for 32-bit code when the WebAssembly module +declares a small upper bound on its memory. A 1 MB static bound with a single 4 +KB guard page still has opportunities for sharing bounds checking code: + +.. literalinclude:: heapex-sm32.cton + :language: cton + :lines: 2- + +If the upper bound on the heap size is too large, a dynamic heap is required +instead. + +Finally, a runtime environment that simply allocates a heap with +:c:func:`malloc()` may not have any guard pages at all. In that case, full +bounds checking is required for each access: + +.. literalinclude:: heapex-dyn.cton + :language: cton + :lines: 2- + Operations ========== diff --git a/cranelift/filetests/isa/intel/legalize-memory.cton b/cranelift/filetests/isa/intel/legalize-memory.cton index ade217c84d..1aaf044f9f 100644 --- a/cranelift/filetests/isa/intel/legalize-memory.cton +++ b/cranelift/filetests/isa/intel/legalize-memory.cton @@ -27,3 +27,26 @@ ebb1(v1: i64): return v2 ; check: return $v2 } + +; SpiderMonkey VM-style static 4+2 GB heap. +; This eliminates bounds checks completely for offsets < 2GB. +function %staticheap_sm64(i32, i64 vmctx) -> f32 spiderwasm { + gv0 = vmctx+64 + heap0 = static gv0, min 0x1000, bound 0x1_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v999: i64): + ; check: $ebb0( + v1 = heap_addr.i64 heap0, v0, 1 + ; Boundscheck should be eliminated. + ; Checks here are assuming that no pipehole opts fold the load offsets. + ; nextln: $(xoff=$V) = uextend.i64 $v0 + ; nextln: $(haddr=$V) = iadd_imm $v999, 64 + ; nextln: $(hbase=$V) = load.i64 $haddr + ; nextln: $v1 = iadd $hbase, $xoff + v2 = load.f32 v1+16 + ; nextln: $v2 = load.f32 $v1+16 + v3 = load.f32 v1+20 + ; nextln: $v3 = load.f32 $v1+20 + v4 = fadd v2, v3 + return v4 +} diff --git a/lib/cretonne/meta/base/legalize.py b/lib/cretonne/meta/base/legalize.py index 8c98468518..5db7e6b7c8 100644 --- a/lib/cretonne/meta/base/legalize.py +++ b/lib/cretonne/meta/base/legalize.py @@ -46,6 +46,7 @@ expand = XFormGroup('expand', """ # Custom expansions for memory objects. expand.custom_legalize(insts.global_addr, 'expand_global_addr') +expand.custom_legalize(insts.heap_addr, 'expand_heap_addr') x = Var('x') y = Var('y') diff --git a/lib/cretonne/src/legalizer/heap.rs b/lib/cretonne/src/legalizer/heap.rs new file mode 100644 index 0000000000..30cd0917c7 --- /dev/null +++ b/lib/cretonne/src/legalizer/heap.rs @@ -0,0 +1,144 @@ +//! Legalization of heaps. +//! +//! This module exports the `expand_heap_addr` function which transforms a `heap_addr` +//! instruction into code that depends on the kind of heap referenced. + +use cursor::{Cursor, FuncCursor}; +use flowgraph::ControlFlowGraph; +use ir::{self, InstBuilder, MemFlags}; +use ir::condcodes::IntCC; + +/// Expand a `heap_addr` instruction according to the definition of the heap. +pub fn expand_heap_addr(inst: ir::Inst, func: &mut ir::Function, _cfg: &mut ControlFlowGraph) { + // Unpack the instruction. + let (heap, offset, size) = match &func.dfg[inst] { + &ir::InstructionData::HeapAddr { + opcode, + heap, + arg, + imm, + } => { + assert_eq!(opcode, ir::Opcode::HeapAddr); + (heap, arg, imm.into()) + } + _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)), + }; + + match func.heaps[heap].style { + ir::HeapStyle::Dynamic { bound_gv } => { + dynamic_addr(inst, heap, offset, size, bound_gv, func) + } + ir::HeapStyle::Static { bound } => { + static_addr(inst, heap, offset, size, bound.into(), func) + } + } +} + +/// Expand a `heap_addr` for a dynamic heap. +fn dynamic_addr(inst: ir::Inst, + heap: ir::Heap, + offset: ir::Value, + size: u32, + bound_gv: ir::GlobalVar, + func: &mut ir::Function) { + let size = size as i64; + let offset_ty = func.dfg.value_type(offset); + let addr_ty = func.dfg.value_type(func.dfg.first_result(inst)); + let min_size = func.heaps[heap].min_size.into(); + let mut pos = FuncCursor::new(func).at_inst(inst); + + // Start with the bounds check. Trap if `offset + size > bound`. + let bound_addr = pos.ins().global_addr(addr_ty, bound_gv); + let bound = pos.ins().load(offset_ty, MemFlags::new(), bound_addr, 0); + + let oob; + if size == 1 { + // `offset > bound - 1` is the same as `offset >= bound`. + oob = pos.ins() + .icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound); + } else if size <= min_size { + // We know that bound >= min_size, so here we can compare `offset > bound - size` without + // wrapping. + let adj_bound = pos.ins().iadd_imm(bound, -size); + oob = pos.ins() + .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound); + } else { + // We need an overflow check for the adjusted offset. + let size_val = pos.ins().iconst(offset_ty, size); + let (adj_offset, overflow) = pos.ins().iadd_cout(offset, size_val); + pos.ins().trapnz(overflow); + oob = pos.ins() + .icmp(IntCC::UnsignedGreaterThan, adj_offset, bound); + } + pos.ins().trapnz(oob); + + offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func); +} + +/// Expand a `heap_addr` for a static heap. +fn static_addr(inst: ir::Inst, + heap: ir::Heap, + offset: ir::Value, + size: u32, + bound: i64, + func: &mut ir::Function) { + let size = size as i64; + let offset_ty = func.dfg.value_type(offset); + let addr_ty = func.dfg.value_type(func.dfg.first_result(inst)); + let mut pos = FuncCursor::new(func).at_inst(inst); + + // Start with the bounds check. Trap if `offset + size > bound`. + if size > bound { + // This will simply always trap since `offset >= 0`. + pos.ins().trap(); + pos.func.dfg.replace(inst).iconst(addr_ty, 0); + return; + } + + // Check `offset > limit` which is now known non-negative. + let limit = bound - size; + + // We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or + // more. + if offset_ty != ir::types::I32 || limit < 0xffff_ffff { + let oob = if limit & 1 == 1 { + // Prefer testing `offset >= limit - 1` when limit is odd because an even number is + // likely to be a convenient constant on ARM and other RISC architectures. + pos.ins() + .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit - 1) + } else { + pos.ins() + .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit) + }; + pos.ins().trapnz(oob); + } + + offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func); +} + +/// Emit code for the base address computation of a `heap_addr` instruction. +/// +/// +fn offset_addr(inst: ir::Inst, + heap: ir::Heap, + addr_ty: ir::Type, + mut offset: ir::Value, + offset_ty: ir::Type, + func: &mut ir::Function) { + let mut pos = FuncCursor::new(func).at_inst(inst); + + // Convert `offset` to `addr_ty`. + if offset_ty != addr_ty { + offset = pos.ins().uextend(addr_ty, offset); + } + + // Add the heap base address base + match pos.func.heaps[heap].base { + ir::HeapBase::ReservedReg => unimplemented!(), + ir::HeapBase::GlobalVar(base_gv) => { + let base_addr = pos.ins().global_addr(addr_ty, base_gv); + let base = pos.ins().load(addr_ty, MemFlags::new(), base_addr, 0); + pos.func.dfg.replace(inst).iadd(base, offset); + } + } +} diff --git a/lib/cretonne/src/legalizer/mod.rs b/lib/cretonne/src/legalizer/mod.rs index 230c7adc7e..65365018d2 100644 --- a/lib/cretonne/src/legalizer/mod.rs +++ b/lib/cretonne/src/legalizer/mod.rs @@ -22,9 +22,11 @@ use bitset::BitSet; mod boundary; mod globalvar; +mod heap; mod split; use self::globalvar::expand_global_addr; +use self::heap::expand_heap_addr; /// Legalize `func` for `isa`. ///