That optimization is only for 12-bit immediates in Aarch64, which is now handled in backend lowering, so we can simplify this code a bit now.
341 lines
12 KiB
Rust
341 lines
12 KiB
Rust
//! Legalization of heaps.
|
|
//!
|
|
//! This module exports the `expand_heap_addr` function which transforms a `heap_addr`
|
|
//! instruction into code that depends on the kind of heap referenced.
|
|
|
|
use crate::cursor::{Cursor, FuncCursor};
|
|
use crate::flowgraph::ControlFlowGraph;
|
|
use crate::ir::condcodes::IntCC;
|
|
use crate::ir::immediates::{Uimm32, Uimm8};
|
|
use crate::ir::{self, InstBuilder, RelSourceLoc};
|
|
use crate::isa::TargetIsa;
|
|
use crate::trace;
|
|
|
|
/// Expand a `heap_addr` instruction according to the definition of the heap.
|
|
pub fn expand_heap_addr(
|
|
inst: ir::Inst,
|
|
func: &mut ir::Function,
|
|
cfg: &mut ControlFlowGraph,
|
|
isa: &dyn TargetIsa,
|
|
heap: ir::Heap,
|
|
index_operand: ir::Value,
|
|
offset_immediate: Uimm32,
|
|
access_size: Uimm8,
|
|
) {
|
|
trace!(
|
|
"expanding heap_addr: {:?}: {}",
|
|
inst,
|
|
func.dfg.display_inst(inst)
|
|
);
|
|
|
|
match func.heaps[heap].style {
|
|
ir::HeapStyle::Dynamic { bound_gv } => dynamic_addr(
|
|
isa,
|
|
inst,
|
|
heap,
|
|
index_operand,
|
|
u32::from(offset_immediate),
|
|
u8::from(access_size),
|
|
bound_gv,
|
|
func,
|
|
),
|
|
ir::HeapStyle::Static { bound } => static_addr(
|
|
isa,
|
|
inst,
|
|
heap,
|
|
index_operand,
|
|
u32::from(offset_immediate),
|
|
u8::from(access_size),
|
|
bound.into(),
|
|
func,
|
|
cfg,
|
|
),
|
|
}
|
|
}
|
|
|
|
/// Expand a `heap_addr` for a dynamic heap.
|
|
fn dynamic_addr(
|
|
isa: &dyn TargetIsa,
|
|
inst: ir::Inst,
|
|
heap: ir::Heap,
|
|
index: ir::Value,
|
|
offset: u32,
|
|
access_size: u8,
|
|
bound_gv: ir::GlobalValue,
|
|
func: &mut ir::Function,
|
|
) {
|
|
let index_ty = func.dfg.value_type(index);
|
|
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
|
|
let min_size = func.heaps[heap].min_size.into();
|
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
|
pos.use_srcloc(inst);
|
|
|
|
let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos);
|
|
|
|
// Start with the bounds check. Trap if `index + offset + access_size > bound`.
|
|
let bound = pos.ins().global_value(addr_ty, bound_gv);
|
|
let (cc, lhs, bound) = if offset == 0 && access_size == 1 {
|
|
// `index > bound - 1` is the same as `index >= bound`.
|
|
(IntCC::UnsignedGreaterThanOrEqual, index, bound)
|
|
} else if offset_plus_size(offset, access_size) <= min_size {
|
|
// We know that `bound >= min_size`, so here we can compare `offset >
|
|
// bound - (offset + access_size)` without wrapping.
|
|
let adj_bound = pos
|
|
.ins()
|
|
.iadd_imm(bound, -(offset_plus_size(offset, access_size) as i64));
|
|
trace!(
|
|
" inserting: {}",
|
|
pos.func.dfg.display_value_inst(adj_bound)
|
|
);
|
|
(IntCC::UnsignedGreaterThan, index, adj_bound)
|
|
} else {
|
|
// We need an overflow check for the adjusted offset.
|
|
let access_size_val = pos
|
|
.ins()
|
|
.iconst(addr_ty, offset_plus_size(offset, access_size) as i64);
|
|
let adj_offset =
|
|
pos.ins()
|
|
.uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds);
|
|
trace!(
|
|
" inserting: {}",
|
|
pos.func.dfg.display_value_inst(adj_offset)
|
|
);
|
|
(IntCC::UnsignedGreaterThan, adj_offset, bound)
|
|
};
|
|
|
|
let spectre_oob_comparison = if isa.flags().enable_heap_access_spectre_mitigation() {
|
|
// When we emit a spectre-guarded heap access, we do a `select
|
|
// is_out_of_bounds, NULL, addr` to compute the address, and so the load
|
|
// will trap if the address is out of bounds, which means we don't need
|
|
// to do another explicit bounds check like we do below.
|
|
Some(SpectreOobComparison {
|
|
cc,
|
|
lhs,
|
|
rhs: bound,
|
|
})
|
|
} else {
|
|
let oob = pos.ins().icmp(cc, lhs, bound);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(oob));
|
|
|
|
let trapnz = pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
|
|
trace!(" inserting: {}", pos.func.dfg.display_inst(trapnz));
|
|
|
|
None
|
|
};
|
|
|
|
compute_addr(
|
|
isa,
|
|
inst,
|
|
heap,
|
|
addr_ty,
|
|
index,
|
|
offset,
|
|
pos.func,
|
|
spectre_oob_comparison,
|
|
);
|
|
}
|
|
|
|
/// Expand a `heap_addr` for a static heap.
|
|
fn static_addr(
|
|
isa: &dyn TargetIsa,
|
|
inst: ir::Inst,
|
|
heap: ir::Heap,
|
|
index: ir::Value,
|
|
offset: u32,
|
|
access_size: u8,
|
|
bound: u64,
|
|
func: &mut ir::Function,
|
|
cfg: &mut ControlFlowGraph,
|
|
) {
|
|
let index_ty = func.dfg.value_type(index);
|
|
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
|
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
|
pos.use_srcloc(inst);
|
|
|
|
// The goal here is to trap if `index + offset + access_size > bound`.
|
|
//
|
|
// This first case is a trivial case where we can statically trap.
|
|
if offset_plus_size(offset, access_size) > bound {
|
|
// This will simply always trap since `offset >= 0`.
|
|
let trap = pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
|
|
trace!(" inserting: {}", pos.func.dfg.display_inst(trap));
|
|
let iconst = pos.func.dfg.replace(inst).iconst(addr_ty, 0);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(iconst));
|
|
|
|
// Split the block, as the trap is a terminator instruction.
|
|
let curr_block = pos.current_block().expect("Cursor is not in a block");
|
|
let new_block = pos.func.dfg.make_block();
|
|
pos.insert_block(new_block);
|
|
cfg.recompute_block(pos.func, curr_block);
|
|
cfg.recompute_block(pos.func, new_block);
|
|
return;
|
|
}
|
|
|
|
// After the trivial case is done we're now mostly interested in trapping if
|
|
// `index > bound - offset - access_size`. We know `bound - offset -
|
|
// access_size` here is non-negative from the above comparison.
|
|
//
|
|
// If we can know `bound - offset - access_size >= 4GB` then with a 32-bit
|
|
// offset we're guaranteed:
|
|
//
|
|
// bound - offset - access_size >= 4GB > index
|
|
//
|
|
// or, in other words, `index < bound - offset - access_size`, meaning we
|
|
// can't trap for any value of `index`.
|
|
//
|
|
// With that we have an optimization here where with 32-bit offsets and
|
|
// `bound - access_size >= 4GB` we can omit a bounds check.
|
|
let limit = bound - offset as u64 - access_size as u64;
|
|
let mut spectre_oob_comparison = None;
|
|
let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos);
|
|
if index_ty != ir::types::I32 || limit < 0xffff_ffff {
|
|
// Here we want to test the condition `index > limit` and if that's true
|
|
// then this is an out-of-bounds access and needs to trap.
|
|
let oob = pos
|
|
.ins()
|
|
.icmp_imm(IntCC::UnsignedGreaterThan, index, limit as i64);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(oob));
|
|
|
|
let trapnz = pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
|
|
trace!(" inserting: {}", pos.func.dfg.display_inst(trapnz));
|
|
|
|
if isa.flags().enable_heap_access_spectre_mitigation() {
|
|
let limit = pos.ins().iconst(addr_ty, limit as i64);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(limit));
|
|
spectre_oob_comparison = Some(SpectreOobComparison {
|
|
cc: IntCC::UnsignedGreaterThan,
|
|
lhs: index,
|
|
rhs: limit,
|
|
});
|
|
}
|
|
}
|
|
|
|
compute_addr(
|
|
isa,
|
|
inst,
|
|
heap,
|
|
addr_ty,
|
|
index,
|
|
offset,
|
|
pos.func,
|
|
spectre_oob_comparison,
|
|
);
|
|
}
|
|
|
|
fn cast_index_to_pointer_ty(
|
|
index: ir::Value,
|
|
index_ty: ir::Type,
|
|
addr_ty: ir::Type,
|
|
pos: &mut FuncCursor,
|
|
) -> ir::Value {
|
|
if index_ty == addr_ty {
|
|
return index;
|
|
}
|
|
// Note that using 64-bit heaps on a 32-bit host is not currently supported,
|
|
// would require at least a bounds check here to ensure that the truncation
|
|
// from 64-to-32 bits doesn't lose any upper bits. For now though we're
|
|
// mostly interested in the 32-bit-heaps-on-64-bit-hosts cast.
|
|
assert!(index_ty.bits() < addr_ty.bits());
|
|
|
|
// Convert `index` to `addr_ty`.
|
|
let extended_index = pos.ins().uextend(addr_ty, index);
|
|
|
|
// Add debug value-label alias so that debuginfo can name the extended
|
|
// value as the address
|
|
let loc = pos.srcloc();
|
|
let loc = RelSourceLoc::from_base_offset(pos.func.params.base_srcloc(), loc);
|
|
pos.func
|
|
.stencil
|
|
.dfg
|
|
.add_value_label_alias(extended_index, loc, index);
|
|
|
|
extended_index
|
|
}
|
|
|
|
struct SpectreOobComparison {
|
|
cc: IntCC,
|
|
lhs: ir::Value,
|
|
rhs: ir::Value,
|
|
}
|
|
|
|
/// Emit code for the base address computation of a `heap_addr` instruction.
|
|
fn compute_addr(
|
|
isa: &dyn TargetIsa,
|
|
inst: ir::Inst,
|
|
heap: ir::Heap,
|
|
addr_ty: ir::Type,
|
|
index: ir::Value,
|
|
offset: u32,
|
|
func: &mut ir::Function,
|
|
// If we are performing Spectre mitigation with conditional selects, the
|
|
// values to compare and the condition code that indicates an out-of bounds
|
|
// condition; on this condition, the conditional move will choose a
|
|
// speculatively safe address (a zero / null pointer) instead.
|
|
spectre_oob_comparison: Option<SpectreOobComparison>,
|
|
) {
|
|
debug_assert_eq!(func.dfg.value_type(index), addr_ty);
|
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
|
pos.use_srcloc(inst);
|
|
|
|
// Add the heap base address base
|
|
let base = if isa.flags().enable_pinned_reg() && isa.flags().use_pinned_reg_as_heap_base() {
|
|
let base = pos.ins().get_pinned_reg(isa.pointer_type());
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(base));
|
|
base
|
|
} else {
|
|
let base_gv = pos.func.heaps[heap].base;
|
|
let base = pos.ins().global_value(addr_ty, base_gv);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(base));
|
|
base
|
|
};
|
|
|
|
if let Some(SpectreOobComparison { cc, lhs, rhs }) = spectre_oob_comparison {
|
|
let final_base = pos.ins().iadd(base, index);
|
|
// NB: The addition of the offset immediate must happen *before* the
|
|
// `select_spectre_guard`. If it happens after, then we potentially are
|
|
// letting speculative execution read the whole first 4GiB of memory.
|
|
let final_addr = if offset == 0 {
|
|
final_base
|
|
} else {
|
|
let final_addr = pos.ins().iadd_imm(final_base, offset as i64);
|
|
trace!(
|
|
" inserting: {}",
|
|
pos.func.dfg.display_value_inst(final_addr)
|
|
);
|
|
final_addr
|
|
};
|
|
let zero = pos.ins().iconst(addr_ty, 0);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(zero));
|
|
|
|
let cmp = pos.ins().icmp(cc, lhs, rhs);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(cmp));
|
|
|
|
let value = pos
|
|
.func
|
|
.dfg
|
|
.replace(inst)
|
|
.select_spectre_guard(cmp, zero, final_addr);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(value));
|
|
} else if offset == 0 {
|
|
let addr = pos.func.dfg.replace(inst).iadd(base, index);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(addr));
|
|
} else {
|
|
let final_base = pos.ins().iadd(base, index);
|
|
trace!(
|
|
" inserting: {}",
|
|
pos.func.dfg.display_value_inst(final_base)
|
|
);
|
|
let addr = pos
|
|
.func
|
|
.dfg
|
|
.replace(inst)
|
|
.iadd_imm(final_base, offset as i64);
|
|
trace!(" inserting: {}", pos.func.dfg.display_value_inst(addr));
|
|
}
|
|
}
|
|
|
|
fn offset_plus_size(offset: u32, size: u8) -> u64 {
|
|
// Cannot overflow because we are widening to `u64`.
|
|
offset as u64 + size as u64
|
|
}
|