Add heap_addr custom legalization.
The expansion of a heap_addr instruction depends on the type of heap and its configuration, so this is handled by custom code. Add a couple examples of heap access code to the language reference manual.
This commit is contained in:
@@ -31,16 +31,13 @@ class CretonneLexer(RegexLexer):
|
||||
bygroups(Comment.Single, Comment.Special, Comment.Single)),
|
||||
# Plain comments.
|
||||
(r';.*?$', Comment.Single),
|
||||
# Strings are in double quotes, support \xx escapes only.
|
||||
(r'"([^"\\]+|\\[0-9a-fA-F]{2})*"', String),
|
||||
# A naked function name following 'function' is also a string.
|
||||
(r'\b(function)([ \t]+)(\w+)\b',
|
||||
bygroups(Keyword, Whitespace, String.Symbol)),
|
||||
# Strings are prefixed by % or # with hex.
|
||||
(r'%\w+|#[0-9a-fA-F]*', String),
|
||||
# Numbers.
|
||||
(r'[-+]?0[xX][0-9a-fA-F]+', Number.Hex),
|
||||
(r'[-+]?0[xX][0-9a-fA-F]*\.[0-9a-fA-F]*([pP]\d+)?', Number.Hex),
|
||||
(r'[-+]?(\d+\.\d+([eE]\d+)?|s?NaN|Inf)', Number.Float),
|
||||
(r'[-+]?\d+', Number.Integer),
|
||||
(r'[-+]?0[xX][0-9a-fA-F_]+', Number.Hex),
|
||||
(r'[-+]?0[xX][0-9a-fA-F_]*\.[0-9a-fA-F_]*([pP]\d+)?', Number.Hex),
|
||||
(r'[-+]?([0-9_]+\.[0-9_]+([eE]\d+)?|s?NaN|Inf)', Number.Float),
|
||||
(r'[-+]?[0-9_]+', Number.Integer),
|
||||
# Known attributes.
|
||||
(keywords('uext', 'sext'), Name.Attribute),
|
||||
# Well known value types.
|
||||
@@ -48,7 +45,7 @@ class CretonneLexer(RegexLexer):
|
||||
# v<nn> = value
|
||||
# ss<nn> = stack slot
|
||||
# jt<nn> = jump table
|
||||
(r'(v|ss|jt)\d+', Name.Variable),
|
||||
(r'(v|ss|gv|jt|fn|sig|heap)\d+', Name.Variable),
|
||||
# ebb<nn> = extended basic block
|
||||
(r'(ebb)\d+', Name.Label),
|
||||
# Match instruction names in context.
|
||||
|
||||
15
cranelift/docs/heapex-dyn.cton
Normal file
15
cranelift/docs/heapex-dyn.cton
Normal file
@@ -0,0 +1,15 @@
|
||||
test verifier
|
||||
|
||||
function %add_members(i32) -> f32 spiderwasm {
|
||||
gv0 = vmctx+64
|
||||
gv1 = vmctx+72
|
||||
heap0 = dynamic gv0, min 0x1000, bound gv1, guard 0
|
||||
|
||||
ebb0(v0: i32):
|
||||
v1 = heap_addr.i64 heap0, v0, 20
|
||||
v2 = load.f32 v1+16
|
||||
v3 = heap_addr.i64 heap0, v0, 24
|
||||
v4 = load.f32 v3+20
|
||||
v5 = fadd v2, v4
|
||||
return v5
|
||||
}
|
||||
14
cranelift/docs/heapex-sm32.cton
Normal file
14
cranelift/docs/heapex-sm32.cton
Normal file
@@ -0,0 +1,14 @@
|
||||
test verifier
|
||||
|
||||
function %add_members(i32) -> f32 spiderwasm {
|
||||
gv0 = vmctx+64
|
||||
heap0 = static gv0, min 0x1000, bound 0x10_0000, guard 0x1000
|
||||
|
||||
ebb0(v0: i32):
|
||||
v1 = heap_addr.i32 heap0, v0, 1
|
||||
v2 = load.f32 v1+16
|
||||
v3 = load.f32 v1+20
|
||||
v4 = fadd v2, v3
|
||||
return v4
|
||||
}
|
||||
|
||||
13
cranelift/docs/heapex-sm64.cton
Normal file
13
cranelift/docs/heapex-sm64.cton
Normal file
@@ -0,0 +1,13 @@
|
||||
test verifier
|
||||
|
||||
function %add_members(i32) -> f32 spiderwasm {
|
||||
gv0 = vmctx+64
|
||||
heap0 = static gv0, min 0x1000, bound 0x1_0000_0000, guard 0x8000_0000
|
||||
|
||||
ebb0(v0: i32):
|
||||
v1 = heap_addr.i64 heap0, v0, 1
|
||||
v2 = load.f32 v1+16
|
||||
v3 = load.f32 v1+20
|
||||
v4 = fadd v2, v3
|
||||
return v4
|
||||
}
|
||||
@@ -636,6 +636,37 @@ is resized. The bound of a dynamic heap is stored in a global variable.
|
||||
:arg BoundGV: Global variable containing the current heap bound in bytes.
|
||||
:arg GuardBytes: Size of the guard pages in bytes.
|
||||
|
||||
Heap examples
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
The SpiderMonkey VM prefers to use fixed heaps with a 4 GB bound and 2 GB of
|
||||
guard pages when running WebAssembly code on 64-bit CPUs. The combination of a
|
||||
4 GB fixed bound and 1-byte bounds checks means that no code needs to be
|
||||
generated for bounds checks at all:
|
||||
|
||||
.. literalinclude:: heapex-sm64.cton
|
||||
:language: cton
|
||||
:lines: 2-
|
||||
|
||||
A static heap can also be used for 32-bit code when the WebAssembly module
|
||||
declares a small upper bound on its memory. A 1 MB static bound with a single 4
|
||||
KB guard page still has opportunities for sharing bounds checking code:
|
||||
|
||||
.. literalinclude:: heapex-sm32.cton
|
||||
:language: cton
|
||||
:lines: 2-
|
||||
|
||||
If the upper bound on the heap size is too large, a dynamic heap is required
|
||||
instead.
|
||||
|
||||
Finally, a runtime environment that simply allocates a heap with
|
||||
:c:func:`malloc()` may not have any guard pages at all. In that case, full
|
||||
bounds checking is required for each access:
|
||||
|
||||
.. literalinclude:: heapex-dyn.cton
|
||||
:language: cton
|
||||
:lines: 2-
|
||||
|
||||
|
||||
Operations
|
||||
==========
|
||||
|
||||
@@ -27,3 +27,26 @@ ebb1(v1: i64):
|
||||
return v2
|
||||
; check: return $v2
|
||||
}
|
||||
|
||||
; SpiderMonkey VM-style static 4+2 GB heap.
|
||||
; This eliminates bounds checks completely for offsets < 2GB.
|
||||
function %staticheap_sm64(i32, i64 vmctx) -> f32 spiderwasm {
|
||||
gv0 = vmctx+64
|
||||
heap0 = static gv0, min 0x1000, bound 0x1_0000_0000, guard 0x8000_0000
|
||||
|
||||
ebb0(v0: i32, v999: i64):
|
||||
; check: $ebb0(
|
||||
v1 = heap_addr.i64 heap0, v0, 1
|
||||
; Boundscheck should be eliminated.
|
||||
; Checks here are assuming that no pipehole opts fold the load offsets.
|
||||
; nextln: $(xoff=$V) = uextend.i64 $v0
|
||||
; nextln: $(haddr=$V) = iadd_imm $v999, 64
|
||||
; nextln: $(hbase=$V) = load.i64 $haddr
|
||||
; nextln: $v1 = iadd $hbase, $xoff
|
||||
v2 = load.f32 v1+16
|
||||
; nextln: $v2 = load.f32 $v1+16
|
||||
v3 = load.f32 v1+20
|
||||
; nextln: $v3 = load.f32 $v1+20
|
||||
v4 = fadd v2, v3
|
||||
return v4
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@ expand = XFormGroup('expand', """
|
||||
|
||||
# Custom expansions for memory objects.
|
||||
expand.custom_legalize(insts.global_addr, 'expand_global_addr')
|
||||
expand.custom_legalize(insts.heap_addr, 'expand_heap_addr')
|
||||
|
||||
x = Var('x')
|
||||
y = Var('y')
|
||||
|
||||
144
lib/cretonne/src/legalizer/heap.rs
Normal file
144
lib/cretonne/src/legalizer/heap.rs
Normal file
@@ -0,0 +1,144 @@
|
||||
//! Legalization of heaps.
|
||||
//!
|
||||
//! This module exports the `expand_heap_addr` function which transforms a `heap_addr`
|
||||
//! instruction into code that depends on the kind of heap referenced.
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::{self, InstBuilder, MemFlags};
|
||||
use ir::condcodes::IntCC;
|
||||
|
||||
/// Expand a `heap_addr` instruction according to the definition of the heap.
|
||||
pub fn expand_heap_addr(inst: ir::Inst, func: &mut ir::Function, _cfg: &mut ControlFlowGraph) {
|
||||
// Unpack the instruction.
|
||||
let (heap, offset, size) = match &func.dfg[inst] {
|
||||
&ir::InstructionData::HeapAddr {
|
||||
opcode,
|
||||
heap,
|
||||
arg,
|
||||
imm,
|
||||
} => {
|
||||
assert_eq!(opcode, ir::Opcode::HeapAddr);
|
||||
(heap, arg, imm.into())
|
||||
}
|
||||
_ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
match func.heaps[heap].style {
|
||||
ir::HeapStyle::Dynamic { bound_gv } => {
|
||||
dynamic_addr(inst, heap, offset, size, bound_gv, func)
|
||||
}
|
||||
ir::HeapStyle::Static { bound } => {
|
||||
static_addr(inst, heap, offset, size, bound.into(), func)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Expand a `heap_addr` for a dynamic heap.
|
||||
fn dynamic_addr(inst: ir::Inst,
|
||||
heap: ir::Heap,
|
||||
offset: ir::Value,
|
||||
size: u32,
|
||||
bound_gv: ir::GlobalVar,
|
||||
func: &mut ir::Function) {
|
||||
let size = size as i64;
|
||||
let offset_ty = func.dfg.value_type(offset);
|
||||
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
|
||||
let min_size = func.heaps[heap].min_size.into();
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
|
||||
// Start with the bounds check. Trap if `offset + size > bound`.
|
||||
let bound_addr = pos.ins().global_addr(addr_ty, bound_gv);
|
||||
let bound = pos.ins().load(offset_ty, MemFlags::new(), bound_addr, 0);
|
||||
|
||||
let oob;
|
||||
if size == 1 {
|
||||
// `offset > bound - 1` is the same as `offset >= bound`.
|
||||
oob = pos.ins()
|
||||
.icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound);
|
||||
} else if size <= min_size {
|
||||
// We know that bound >= min_size, so here we can compare `offset > bound - size` without
|
||||
// wrapping.
|
||||
let adj_bound = pos.ins().iadd_imm(bound, -size);
|
||||
oob = pos.ins()
|
||||
.icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
|
||||
} else {
|
||||
// We need an overflow check for the adjusted offset.
|
||||
let size_val = pos.ins().iconst(offset_ty, size);
|
||||
let (adj_offset, overflow) = pos.ins().iadd_cout(offset, size_val);
|
||||
pos.ins().trapnz(overflow);
|
||||
oob = pos.ins()
|
||||
.icmp(IntCC::UnsignedGreaterThan, adj_offset, bound);
|
||||
}
|
||||
pos.ins().trapnz(oob);
|
||||
|
||||
offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
|
||||
}
|
||||
|
||||
/// Expand a `heap_addr` for a static heap.
|
||||
fn static_addr(inst: ir::Inst,
|
||||
heap: ir::Heap,
|
||||
offset: ir::Value,
|
||||
size: u32,
|
||||
bound: i64,
|
||||
func: &mut ir::Function) {
|
||||
let size = size as i64;
|
||||
let offset_ty = func.dfg.value_type(offset);
|
||||
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
|
||||
// Start with the bounds check. Trap if `offset + size > bound`.
|
||||
if size > bound {
|
||||
// This will simply always trap since `offset >= 0`.
|
||||
pos.ins().trap();
|
||||
pos.func.dfg.replace(inst).iconst(addr_ty, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check `offset > limit` which is now known non-negative.
|
||||
let limit = bound - size;
|
||||
|
||||
// We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
|
||||
// more.
|
||||
if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
|
||||
let oob = if limit & 1 == 1 {
|
||||
// Prefer testing `offset >= limit - 1` when limit is odd because an even number is
|
||||
// likely to be a convenient constant on ARM and other RISC architectures.
|
||||
pos.ins()
|
||||
.icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit - 1)
|
||||
} else {
|
||||
pos.ins()
|
||||
.icmp_imm(IntCC::UnsignedGreaterThan, offset, limit)
|
||||
};
|
||||
pos.ins().trapnz(oob);
|
||||
}
|
||||
|
||||
offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
|
||||
}
|
||||
|
||||
/// Emit code for the base address computation of a `heap_addr` instruction.
|
||||
///
|
||||
///
|
||||
fn offset_addr(inst: ir::Inst,
|
||||
heap: ir::Heap,
|
||||
addr_ty: ir::Type,
|
||||
mut offset: ir::Value,
|
||||
offset_ty: ir::Type,
|
||||
func: &mut ir::Function) {
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
|
||||
// Convert `offset` to `addr_ty`.
|
||||
if offset_ty != addr_ty {
|
||||
offset = pos.ins().uextend(addr_ty, offset);
|
||||
}
|
||||
|
||||
// Add the heap base address base
|
||||
match pos.func.heaps[heap].base {
|
||||
ir::HeapBase::ReservedReg => unimplemented!(),
|
||||
ir::HeapBase::GlobalVar(base_gv) => {
|
||||
let base_addr = pos.ins().global_addr(addr_ty, base_gv);
|
||||
let base = pos.ins().load(addr_ty, MemFlags::new(), base_addr, 0);
|
||||
pos.func.dfg.replace(inst).iadd(base, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -22,9 +22,11 @@ use bitset::BitSet;
|
||||
|
||||
mod boundary;
|
||||
mod globalvar;
|
||||
mod heap;
|
||||
mod split;
|
||||
|
||||
use self::globalvar::expand_global_addr;
|
||||
use self::heap::expand_heap_addr;
|
||||
|
||||
/// Legalize `func` for `isa`.
|
||||
///
|
||||
|
||||
Reference in New Issue
Block a user