Cranelift: Make heap_addr return calculated base + index + offset (#5231)

* Cranelift: Make `heap_addr` return calculated `base + index + offset`

Rather than return just the `base + index`.

(Note: I've chosen to use the nomenclature "index" for the dynamic operand and
"offset" for the static immediate.)

This move the addition of the `offset` into `heap_addr`, instead of leaving it
for the subsequent memory operation, so that we can Spectre-guard the full
address, and not allow speculative execution to read the first 4GiB of memory.

Before this commit, we were effectively doing

    load(spectre_guard(base + index) + offset)

Now we are effectively doing

    load(spectre_guard(base + index + offset))

Finally, this also corrects `heap_addr`'s documented semantics to say that it
returns an address that will trap on access if `index + offset + access_size` is
out of bounds for the given heap, rather than saying that the `heap_addr` itself
will trap. This matches the implemented behavior for static memories, and after
https://github.com/bytecodealliance/wasmtime/pull/5190 lands (which is blocked
on this commit) will also match the implemented behavior for dynamic memories.

* Update heap_addr docs

* Factor out `offset + size` to a helper
This commit is contained in:
Nick Fitzgerald
2022-11-09 11:53:51 -08:00
committed by GitHub
parent 33a192556e
commit fc62d4ad65
39 changed files with 563 additions and 284 deletions

View File

@@ -91,7 +91,6 @@ use cranelift_codegen::packed_option::ReservedValue;
use cranelift_frontend::{FunctionBuilder, Variable};
use itertools::Itertools;
use smallvec::SmallVec;
use std::cmp;
use std::convert::TryFrom;
use std::vec::Vec;
use wasmparser::{FuncValidator, MemArg, Operator, WasmModuleResources};
@@ -697,33 +696,33 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
translate_load(memarg, ir::Opcode::Load, I8X16, builder, state, environ)?;
}
Operator::V128Load8x8S { memarg } => {
let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().sload8x8(flags, base, offset);
let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().sload8x8(flags, base, 0);
state.push1(loaded);
}
Operator::V128Load8x8U { memarg } => {
let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().uload8x8(flags, base, offset);
let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().uload8x8(flags, base, 0);
state.push1(loaded);
}
Operator::V128Load16x4S { memarg } => {
let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().sload16x4(flags, base, offset);
let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().sload16x4(flags, base, 0);
state.push1(loaded);
}
Operator::V128Load16x4U { memarg } => {
let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().uload16x4(flags, base, offset);
let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().uload16x4(flags, base, 0);
state.push1(loaded);
}
Operator::V128Load32x2S { memarg } => {
let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().sload32x2(flags, base, offset);
let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().sload32x2(flags, base, 0);
state.push1(loaded);
}
Operator::V128Load32x2U { memarg } => {
let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().uload32x2(flags, base, offset);
let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?;
let loaded = builder.ins().uload32x2(flags, base, 0);
state.push1(loaded);
}
/****************************** Store instructions ***********************************
@@ -1067,8 +1066,13 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let heap = state.get_heap(builder.func, memarg.memory, environ)?;
let timeout = state.pop1(); // 64 (fixed)
let expected = state.pop1(); // 32 or 64 (per the `Ixx` in `IxxAtomicWait`)
let (_flags, addr) =
prepare_atomic_addr(memarg, implied_ty.bytes(), builder, state, environ)?;
let (_flags, addr) = prepare_atomic_addr(
memarg,
u8::try_from(implied_ty.bytes()).unwrap(),
builder,
state,
environ,
)?;
assert!(builder.func.dfg.value_type(expected) == implied_ty);
// `fn translate_atomic_wait` can inspect the type of `expected` to figure out what
// code it needs to generate, if it wants.
@@ -2171,21 +2175,20 @@ fn translate_unreachable_operator<FE: FuncEnvironment + ?Sized>(
/// This function is a generalized helper for validating that a wasm-supplied
/// heap address is in-bounds.
///
/// This function takes a litany of parameters and requires that the address to
/// be verified is at the top of the stack in `state`. This will generate
/// necessary IR to validate that the heap address is correctly in-bounds, and
/// various parameters are returned describing the valid heap address if
/// execution reaches that point.
/// This function takes a litany of parameters and requires that the *Wasm*
/// address to be verified is at the top of the stack in `state`. This will
/// generate necessary IR to validate that the heap address is correctly
/// in-bounds, and various parameters are returned describing the valid *native*
/// heap address if execution reaches that point.
fn prepare_addr<FE: FuncEnvironment + ?Sized>(
memarg: &MemArg,
access_size: u32,
access_size: u8,
builder: &mut FunctionBuilder,
state: &mut FuncTranslationState,
environ: &mut FE,
) -> WasmResult<(MemFlags, Value, Offset32)> {
) -> WasmResult<(MemFlags, Value)> {
let addr = state.pop1();
let heap = state.get_heap(builder.func, memarg.memory, environ)?;
let offset_guard_size: u64 = builder.func.heaps[heap].offset_guard_size.into();
// How exactly the bounds check is performed here and what it's performed
// on is a bit tricky. Generally we want to rely on access violations (e.g.
@@ -2244,10 +2247,9 @@ fn prepare_addr<FE: FuncEnvironment + ?Sized>(
// hit like so:
//
// * For wasm32, wasmtime defaults to 4gb "static" memories with 2gb guard
// regions. This means our `adjusted_offset` is 1 for all offsets <=2gb.
// This hits the optimized case for `heap_addr` on static memories 4gb in
// size in cranelift's legalization of `heap_addr`, eliding the bounds
// check entirely.
// regions. This means that for all offsets <=2gb, we hit the optimized
// case for `heap_addr` on static memories 4gb in size in cranelift's
// legalization of `heap_addr`, eliding the bounds check entirely.
//
// * For wasm64 offsets <=2gb will generate a single `heap_addr`
// instruction, but at this time all heaps are "dyanmic" which means that
@@ -2258,43 +2260,17 @@ fn prepare_addr<FE: FuncEnvironment + ?Sized>(
// offsets in `memarg` are <=2gb, which means we get the fast path of one
// `heap_addr` instruction plus a hardcoded i32-offset in memory-related
// instructions.
let adjusted_offset = if offset_guard_size == 0 {
// Why saturating? see (1) above
memarg.offset.saturating_add(u64::from(access_size))
} else {
// Why is there rounding here? see (2) above
assert!(access_size < 1024);
cmp::max(memarg.offset / offset_guard_size * offset_guard_size, 1)
};
debug_assert!(adjusted_offset > 0); // want to bounds check at least 1 byte
let (addr, offset) = match u32::try_from(adjusted_offset) {
// If our adjusted offset fits within a u32, then we can place the
// entire offset into the offset of the `heap_addr` instruction. After
// the `heap_addr` instruction, though, we need to factor the the offset
// into the returned address. This is either an immediate to later
// memory instructions if the offset further fits within `i32`, or a
// manual add instruction otherwise.
//
// Note that native instructions take a signed offset hence the switch
// to i32. Note also the lack of overflow checking in the offset
// addition, which should be ok since if `heap_addr` passed we're
// guaranteed that this won't overflow.
Ok(adjusted_offset) => {
let base = builder
let addr = match u32::try_from(memarg.offset) {
// If our offset fits within a u32, then we can place the it into the
// offset immediate of the `heap_addr` instruction.
Ok(offset) => {
builder
.ins()
.heap_addr(environ.pointer_type(), heap, addr, adjusted_offset);
match i32::try_from(memarg.offset) {
Ok(val) => (base, val),
Err(_) => {
let adj = builder.ins().iadd_imm(base, memarg.offset as i64);
(adj, 0)
}
}
.heap_addr(environ.pointer_type(), heap, addr, offset, access_size)
}
// If the adjusted offset doesn't fit within a u32, then we can't pass
// the adjust sized to `heap_addr` raw.
// If the offset doesn't fit within a u32, then we can't pass it
// directly into `heap_addr`.
//
// One reasonable question you might ask is "why not?". There's no
// fundamental reason why `heap_addr` *must* take a 32-bit offset. The
@@ -2313,8 +2289,6 @@ fn prepare_addr<FE: FuncEnvironment + ?Sized>(
//
// Once we have the effective address, offset already folded in, then
// `heap_addr` is used to verify that the address is indeed in-bounds.
// The access size of the `heap_addr` is what we were passed in from
// above.
//
// Note that this is generating what's likely to be at least two
// branches, one for the overflow and one for the bounds check itself.
@@ -2328,10 +2302,9 @@ fn prepare_addr<FE: FuncEnvironment + ?Sized>(
builder
.ins()
.uadd_overflow_trap(addr, offset, ir::TrapCode::HeapOutOfBounds);
let base = builder
builder
.ins()
.heap_addr(environ.pointer_type(), heap, addr, access_size);
(base, 0)
.heap_addr(environ.pointer_type(), heap, addr, 0, access_size)
}
};
@@ -2348,12 +2321,12 @@ fn prepare_addr<FE: FuncEnvironment + ?Sized>(
// vmctx, stack) accesses.
flags.set_heap();
Ok((flags, addr, offset.into()))
Ok((flags, addr))
}
fn prepare_atomic_addr<FE: FuncEnvironment + ?Sized>(
memarg: &MemArg,
loaded_bytes: u32,
loaded_bytes: u8,
builder: &mut FunctionBuilder,
state: &mut FuncTranslationState,
environ: &mut FE,
@@ -2386,18 +2359,7 @@ fn prepare_atomic_addr<FE: FuncEnvironment + ?Sized>(
builder.ins().trapnz(f, ir::TrapCode::HeapMisaligned);
}
let (flags, mut addr, offset) = prepare_addr(memarg, loaded_bytes, builder, state, environ)?;
// Currently cranelift IR operations for atomics don't have offsets
// associated with them so we fold the offset into the address itself. Note
// that via the `prepare_addr` helper we know that if execution reaches
// this point that this addition won't overflow.
let offset: i64 = offset.into();
if offset != 0 {
addr = builder.ins().iadd_imm(addr, offset);
}
Ok((flags, addr))
prepare_addr(memarg, loaded_bytes, builder, state, environ)
}
/// Translate a load instruction.
@@ -2409,14 +2371,16 @@ fn translate_load<FE: FuncEnvironment + ?Sized>(
state: &mut FuncTranslationState,
environ: &mut FE,
) -> WasmResult<()> {
let (flags, base, offset) = prepare_addr(
let (flags, base) = prepare_addr(
memarg,
mem_op_size(opcode, result_ty),
builder,
state,
environ,
)?;
let (load, dfg) = builder.ins().Load(opcode, result_ty, flags, offset, base);
let (load, dfg) = builder
.ins()
.Load(opcode, result_ty, flags, Offset32::new(0), base);
state.push1(dfg.first_result(load));
Ok(())
}
@@ -2432,20 +2396,19 @@ fn translate_store<FE: FuncEnvironment + ?Sized>(
let val = state.pop1();
let val_ty = builder.func.dfg.value_type(val);
let (flags, base, offset) =
prepare_addr(memarg, mem_op_size(opcode, val_ty), builder, state, environ)?;
let (flags, base) = prepare_addr(memarg, mem_op_size(opcode, val_ty), builder, state, environ)?;
builder
.ins()
.Store(opcode, val_ty, flags, offset.into(), val, base);
.Store(opcode, val_ty, flags, Offset32::new(0), val, base);
Ok(())
}
fn mem_op_size(opcode: ir::Opcode, ty: Type) -> u32 {
fn mem_op_size(opcode: ir::Opcode, ty: Type) -> u8 {
match opcode {
ir::Opcode::Istore8 | ir::Opcode::Sload8 | ir::Opcode::Uload8 => 1,
ir::Opcode::Istore16 | ir::Opcode::Sload16 | ir::Opcode::Uload16 => 2,
ir::Opcode::Istore32 | ir::Opcode::Sload32 | ir::Opcode::Uload32 => 4,
ir::Opcode::Store | ir::Opcode::Load => ty.bytes(),
ir::Opcode::Store | ir::Opcode::Load => u8::try_from(ty.bytes()).unwrap(),
_ => panic!("unknown size of mem op for {:?}", opcode),
}
}
@@ -2490,7 +2453,13 @@ fn translate_atomic_rmw<FE: FuncEnvironment + ?Sized>(
arg2 = builder.ins().ireduce(access_ty, arg2);
}
let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?;
let (flags, addr) = prepare_atomic_addr(
memarg,
u8::try_from(access_ty.bytes()).unwrap(),
builder,
state,
environ,
)?;
let mut res = builder.ins().atomic_rmw(access_ty, flags, op, addr, arg2);
if access_ty != widened_ty {
@@ -2538,7 +2507,13 @@ fn translate_atomic_cas<FE: FuncEnvironment + ?Sized>(
replacement = builder.ins().ireduce(access_ty, replacement);
}
let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?;
let (flags, addr) = prepare_atomic_addr(
memarg,
u8::try_from(access_ty.bytes()).unwrap(),
builder,
state,
environ,
)?;
let mut res = builder.ins().atomic_cas(flags, addr, expected, replacement);
if access_ty != widened_ty {
res = builder.ins().uextend(widened_ty, res);
@@ -2572,7 +2547,13 @@ fn translate_atomic_load<FE: FuncEnvironment + ?Sized>(
};
assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes());
let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?;
let (flags, addr) = prepare_atomic_addr(
memarg,
u8::try_from(access_ty.bytes()).unwrap(),
builder,
state,
environ,
)?;
let mut res = builder.ins().atomic_load(access_ty, flags, addr);
if access_ty != widened_ty {
res = builder.ins().uextend(widened_ty, res);
@@ -2612,7 +2593,13 @@ fn translate_atomic_store<FE: FuncEnvironment + ?Sized>(
data = builder.ins().ireduce(access_ty, data);
}
let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?;
let (flags, addr) = prepare_atomic_addr(
memarg,
u8::try_from(access_ty.bytes()).unwrap(),
builder,
state,
environ,
)?;
builder.ins().atomic_store(flags, data, addr);
Ok(())
}