Support heaps with no offset-guard pages.

Also, say "guard-offset pages" rather than just "guard pages" to describe the
region of a heap which is never accessible and which exists to support
optimizations for heap accesses with offsets.

And, introduce a `Uimm64` immediate type, and make all heap fields use
`Uimm64` instead of `Imm64` since they really are unsigned.
This commit is contained in:
Dan Gohman
2018-11-29 04:53:30 -08:00
parent 93696a80bb
commit a20c852148
27 changed files with 302 additions and 172 deletions

View File

@@ -1,6 +1,6 @@
//! Heaps.
use ir::immediates::Imm64;
use ir::immediates::Uimm64;
use ir::{GlobalValue, Type};
use std::fmt;
@@ -12,10 +12,10 @@ pub struct HeapData {
/// Guaranteed minimum heap size in bytes. Heap accesses before `min_size` don't need bounds
/// checking.
pub min_size: Imm64,
pub min_size: Uimm64,
/// Size in bytes of the guard pages following the heap.
pub guard_size: Imm64,
/// Size in bytes of the offset-guard pages following the heap.
pub offset_guard_size: Uimm64,
/// Heap style, with additional style-specific info.
pub style: HeapStyle,
@@ -34,10 +34,10 @@ pub enum HeapStyle {
},
/// A static heap has a fixed base address and a number of not-yet-allocated pages before the
/// guard pages.
/// offset-guard pages.
Static {
/// Heap bound in bytes. The guard pages are allocated after the bound.
bound: Imm64,
/// Heap bound in bytes. The offset-guard pages are allocated after the bound.
bound: Uimm64,
},
}
@@ -55,8 +55,8 @@ impl fmt::Display for HeapData {
}
write!(
f,
", guard {}, index_type {}",
self.guard_size, self.index_type
", offset_guard {}, index_type {}",
self.offset_guard_size, self.index_type
)
}
}

View File

@@ -9,7 +9,7 @@ use std::mem;
use std::str::FromStr;
use std::{i32, u32};
/// 64-bit immediate integer operand.
/// 64-bit immediate signed integer operand.
///
/// An `Imm64` operand can also be used to represent immediate values of smaller integer types by
/// sign-extending to `i64`.
@@ -40,13 +40,87 @@ impl From<i64> for Imm64 {
}
}
impl Display for Imm64 {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let x = self.0;
if -10_000 < x && x < 10_000 {
// Use decimal for small numbers.
write!(f, "{}", x)
} else {
write_hex(x as u64, f)
}
}
}
/// Parse a 64-bit signed number.
fn parse_i64(s: &str) -> Result<i64, &'static str> {
let negative = s.starts_with('-');
let s2 = if negative || s.starts_with('+') {
&s[1..]
} else {
s
};
let mut value = parse_u64(s2)?;
// We support the range-and-a-half from -2^63 .. 2^64-1.
if negative {
value = value.wrapping_neg();
// Don't allow large negative values to wrap around and become positive.
if value as i64 > 0 {
return Err("Negative number too small");
}
}
Ok(value as i64)
}
impl FromStr for Imm64 {
type Err = &'static str;
// Parse a decimal or hexadecimal `Imm64`, formatted as above.
fn from_str(s: &str) -> Result<Self, &'static str> {
parse_i64(s).map(Self::new)
}
}
/// 64-bit immediate unsigned integer operand.
///
/// A `Uimm64` operand can also be used to represent immediate values of smaller integer types by
/// zero-extending to `i64`.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
pub struct Uimm64(u64);
impl Uimm64 {
/// Create a new `Uimm64` representing the unsigned number `x`.
pub fn new(x: u64) -> Self {
Uimm64(x)
}
/// Return self negated.
pub fn wrapping_neg(self) -> Self {
Uimm64(self.0.wrapping_neg())
}
}
impl Into<u64> for Uimm64 {
fn into(self) -> u64 {
self.0
}
}
impl From<u64> for Uimm64 {
fn from(x: u64) -> Self {
Uimm64(x)
}
}
/// Hexadecimal with a multiple of 4 digits and group separators:
///
/// 0xfff0
/// 0x0001_ffff
/// 0xffff_ffff_fff8_4400
///
fn write_hex(x: i64, f: &mut Formatter) -> fmt::Result {
fn write_hex(x: u64, f: &mut Formatter) -> fmt::Result {
let mut pos = (64 - x.leading_zeros() - 1) & 0xf0;
write!(f, "0x{:04x}", (x >> pos) & 0xffff)?;
while pos > 0 {
@@ -56,10 +130,10 @@ fn write_hex(x: i64, f: &mut Formatter) -> fmt::Result {
Ok(())
}
impl Display for Imm64 {
impl Display for Uimm64 {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let x = self.0;
if -10_000 < x && x < 10_000 {
if x < 10_000 {
// Use decimal for small numbers.
write!(f, "{}", x)
} else {
@@ -68,20 +142,16 @@ impl Display for Imm64 {
}
}
/// Parse a 64-bit number.
fn parse_i64(s: &str) -> Result<i64, &'static str> {
/// Parse a 64-bit unsigned number.
fn parse_u64(s: &str) -> Result<u64, &'static str> {
let mut value: u64 = 0;
let mut digits = 0;
let negative = s.starts_with('-');
let s2 = if negative || s.starts_with('+') {
&s[1..]
} else {
s
};
if s2.starts_with("0x") {
if s.starts_with("-0x") {
return Err("Invalid character in hexadecimal number");
} else if s.starts_with("0x") {
// Hexadecimal.
for ch in s2[2..].chars() {
for ch in s[2..].chars() {
match ch.to_digit(16) {
Some(digit) => {
digits += 1;
@@ -101,7 +171,7 @@ fn parse_i64(s: &str) -> Result<i64, &'static str> {
}
} else {
// Decimal number, possibly negative.
for ch in s2.chars() {
for ch in s.chars() {
match ch.to_digit(16) {
Some(digit) => {
digits += 1;
@@ -128,23 +198,15 @@ fn parse_i64(s: &str) -> Result<i64, &'static str> {
return Err("No digits in number");
}
// We support the range-and-a-half from -2^63 .. 2^64-1.
if negative {
value = value.wrapping_neg();
// Don't allow large negative values to wrap around and become positive.
if value as i64 > 0 {
return Err("Negative number too small");
}
}
Ok(value as i64)
Ok(value)
}
impl FromStr for Imm64 {
impl FromStr for Uimm64 {
type Err = &'static str;
// Parse a decimal or hexadecimal `Imm64`, formatted as above.
// Parse a decimal or hexadecimal `Uimm64`, formatted as above.
fn from_str(s: &str) -> Result<Self, &'static str> {
parse_i64(s).map(Self::new)
parse_u64(s).map(Self::new)
}
}
@@ -182,7 +244,7 @@ impl Display for Uimm32 {
if self.0 < 10_000 {
write!(f, "{}", self.0)
} else {
write_hex(i64::from(self.0), f)
write_hex(u64::from(self.0), f)
}
}
}
@@ -268,7 +330,7 @@ impl Display for Offset32 {
if val < 10_000 {
write!(f, "{}", val)
} else {
write_hex(val, f)
write_hex(val as u64, f)
}
}
}
@@ -683,6 +745,20 @@ mod tests {
assert_eq!(Imm64(0x10000).to_string(), "0x0001_0000");
}
#[test]
fn format_uimm64() {
assert_eq!(Uimm64(0).to_string(), "0");
assert_eq!(Uimm64(9999).to_string(), "9999");
assert_eq!(Uimm64(10000).to_string(), "0x2710");
assert_eq!(Uimm64(-9999i64 as u64).to_string(), "0xffff_ffff_ffff_d8f1");
assert_eq!(
Uimm64(-10000i64 as u64).to_string(),
"0xffff_ffff_ffff_d8f0"
);
assert_eq!(Uimm64(0xffff).to_string(), "0xffff");
assert_eq!(Uimm64(0x10000).to_string(), "0x0001_0000");
}
// Verify that `text` can be parsed as a `T` into a value that displays as `want`.
fn parse_ok<T: FromStr + Display>(text: &str, want: &str)
where
@@ -750,6 +826,46 @@ mod tests {
parse_err::<Imm64>("0x0_0000_0000_0000_0000", "Too many hexadecimal digits");
}
#[test]
fn parse_uimm64() {
parse_ok::<Uimm64>("0", "0");
parse_ok::<Uimm64>("1", "1");
parse_ok::<Uimm64>("0x0", "0");
parse_ok::<Uimm64>("0xf", "15");
parse_ok::<Uimm64>("0xffffffff_fffffff7", "0xffff_ffff_ffff_fff7");
// Probe limits.
parse_ok::<Uimm64>("0xffffffff_ffffffff", "0xffff_ffff_ffff_ffff");
parse_ok::<Uimm64>("0x80000000_00000000", "0x8000_0000_0000_0000");
parse_ok::<Uimm64>("18446744073709551615", "0xffff_ffff_ffff_ffff");
// Overflow both the `checked_add` and `checked_mul`.
parse_err::<Uimm64>("18446744073709551616", "Too large decimal number");
parse_err::<Uimm64>("184467440737095516100", "Too large decimal number");
// Underscores are allowed where digits go.
parse_ok::<Uimm64>("0_0", "0");
parse_ok::<Uimm64>("_10_", "10");
parse_ok::<Uimm64>("0x97_88_bb", "0x0097_88bb");
parse_ok::<Uimm64>("0x_97_", "151");
parse_err::<Uimm64>("", "No digits in number");
parse_err::<Uimm64>("_", "No digits in number");
parse_err::<Uimm64>("0x", "No digits in number");
parse_err::<Uimm64>("0x_", "No digits in number");
parse_err::<Uimm64>("-", "Invalid character in decimal number");
parse_err::<Uimm64>("-0x", "Invalid character in hexadecimal number");
parse_err::<Uimm64>(" ", "Invalid character in decimal number");
parse_err::<Uimm64>("0 ", "Invalid character in decimal number");
parse_err::<Uimm64>(" 0", "Invalid character in decimal number");
parse_err::<Uimm64>("--", "Invalid character in decimal number");
parse_err::<Uimm64>("-0x-", "Invalid character in hexadecimal number");
parse_err::<Uimm64>("-0", "Invalid character in decimal number");
parse_err::<Uimm64>("-1", "Invalid character in decimal number");
// Hex count overflow.
parse_err::<Uimm64>("0x0_0000_0000_0000_0000", "Too many hexadecimal digits");
}
#[test]
fn format_offset32() {
assert_eq!(Offset32(0).to_string(), "");

View File

@@ -1,6 +1,6 @@
//! Tables.
use ir::immediates::Imm64;
use ir::immediates::Uimm64;
use ir::{GlobalValue, Type};
use std::fmt;
@@ -12,13 +12,13 @@ pub struct TableData {
/// Guaranteed minimum table size in elements. Table accesses before `min_size` don't need
/// bounds checking.
pub min_size: Imm64,
pub min_size: Uimm64,
/// Global value giving the current bound of the table, in elements.
pub bound_gv: GlobalValue,
/// The size of a table element, in bytes.
pub element_size: Imm64,
pub element_size: Uimm64,
/// The index type for the table.
pub index_type: Type,

View File

@@ -17,7 +17,8 @@ pub enum TrapCode {
/// A `heap_addr` instruction detected an out-of-bounds error.
///
/// Note that not all out-of-bounds heap accesses are reported this way;
/// some are detected by a segmentation fault on the heap guard pages.
/// some are detected by a segmentation fault on the heap unmapped or
/// offset-guard pages.
HeapOutOfBounds,
/// A `table_addr` instruction detected an out-of-bounds error.

View File

@@ -49,7 +49,7 @@ fn dynamic_addr(
bound_gv: ir::GlobalValue,
func: &mut ir::Function,
) {
let access_size = i64::from(access_size);
let access_size = u64::from(access_size);
let offset_ty = func.dfg.value_type(offset);
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
let min_size = func.heaps[heap].min_size.into();
@@ -67,13 +67,13 @@ fn dynamic_addr(
} else if access_size <= min_size {
// We know that bound >= min_size, so here we can compare `offset > bound - access_size`
// without wrapping.
let adj_bound = pos.ins().iadd_imm(bound, -access_size);
let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64));
oob = pos
.ins()
.icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
} else {
// We need an overflow check for the adjusted offset.
let access_size_val = pos.ins().iconst(offset_ty, access_size);
let access_size_val = pos.ins().iconst(offset_ty, access_size as i64);
let (adj_offset, overflow) = pos.ins().iadd_cout(offset, access_size_val);
pos.ins().trapnz(overflow, ir::TrapCode::HeapOutOfBounds);
oob = pos
@@ -91,11 +91,11 @@ fn static_addr(
heap: ir::Heap,
offset: ir::Value,
access_size: u32,
bound: i64,
bound: u64,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
) {
let access_size = i64::from(access_size);
let access_size = u64::from(access_size);
let offset_ty = func.dfg.value_type(offset);
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
let mut pos = FuncCursor::new(func).at_inst(inst);
@@ -117,7 +117,7 @@ fn static_addr(
}
// Check `offset > limit` which is now known non-negative.
let limit = bound - access_size;
let limit = bound - u64::from(access_size);
// We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
// more.
@@ -126,10 +126,10 @@ fn static_addr(
// Prefer testing `offset >= limit - 1` when limit is odd because an even number is
// likely to be a convenient constant on ARM and other RISC architectures.
pos.ins()
.icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit - 1)
.icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit as i64 - 1)
} else {
pos.ins()
.icmp_imm(IntCC::UnsignedGreaterThan, offset, limit)
.icmp_imm(IntCC::UnsignedGreaterThan, offset, limit as i64)
};
pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
}

View File

@@ -92,17 +92,15 @@ fn compute_addr(
let element_size = pos.func.tables[table].element_size;
let mut offset;
let element_size_i64: i64 = element_size.into();
debug_assert!(element_size_i64 >= 0);
let element_size_u64 = element_size_i64 as u64;
if element_size_u64 == 1 {
let element_size: u64 = element_size.into();
if element_size == 1 {
offset = index;
} else if element_size_u64.is_power_of_two() {
} else if element_size.is_power_of_two() {
offset = pos
.ins()
.ishl_imm(index, i64::from(element_size_u64.trailing_zeros()));
.ishl_imm(index, i64::from(element_size.trailing_zeros()));
} else {
offset = pos.ins().imul_imm(index, element_size);
offset = pos.ins().imul_imm(index, element_size as i64);
}
if element_offset == Offset32::new(0) {

View File

@@ -3,7 +3,7 @@
use cranelift_codegen::entity::EntityRef;
use cranelift_codegen::ir;
use cranelift_codegen::ir::entities::AnyEntity;
use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32};
use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64};
use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs};
use cranelift_codegen::ir::types::INVALID;
use cranelift_codegen::ir::{
@@ -188,10 +188,10 @@ impl<'a> Context<'a> {
while self.function.heaps.next_key().index() <= heap.index() {
self.function.create_heap(HeapData {
base: GlobalValue::reserved_value(),
min_size: Imm64::new(0),
guard_size: Imm64::new(0),
min_size: Uimm64::new(0),
offset_guard_size: Uimm64::new(0),
style: HeapStyle::Static {
bound: Imm64::new(0),
bound: Uimm64::new(0),
},
index_type: INVALID,
});
@@ -214,9 +214,9 @@ impl<'a> Context<'a> {
while self.function.tables.next_key().index() <= table.index() {
self.function.create_table(TableData {
base_gv: GlobalValue::reserved_value(),
min_size: Imm64::new(0),
min_size: Uimm64::new(0),
bound_gv: GlobalValue::reserved_value(),
element_size: Imm64::new(0),
element_size: Uimm64::new(0),
index_type: INVALID,
});
}
@@ -544,6 +544,19 @@ impl<'a> Parser<'a> {
}
}
// Match and consume a Uimm64 immediate.
fn match_uimm64(&mut self, err_msg: &str) -> ParseResult<Uimm64> {
if let Some(Token::Integer(text)) = self.token() {
self.consume();
// Lexer just gives us raw text that looks like an integer.
// Parse it as an Uimm64 to check for overflow and other issues.
text.parse()
.map_err(|_| self.error("expected u64 decimal immediate"))
} else {
err!(self.loc, err_msg)
}
}
// Match and consume a Uimm32 immediate.
fn match_uimm32(&mut self, err_msg: &str) -> ParseResult<Uimm32> {
if let Some(Token::Integer(text)) = self.token() {
@@ -1279,7 +1292,7 @@ impl<'a> Parser<'a> {
// heap-base ::= GlobalValue(base)
// heap-attr ::= "min" Imm64(bytes)
// | "bound" Imm64(bytes)
// | "guard" Imm64(bytes)
// | "offset_guard" Imm64(bytes)
// | "index_type" type
//
fn parse_heap_decl(&mut self) -> ParseResult<(Heap, HeapData)> {
@@ -1302,7 +1315,7 @@ impl<'a> Parser<'a> {
let mut data = HeapData {
base,
min_size: 0.into(),
guard_size: 0.into(),
offset_guard_size: 0.into(),
style: HeapStyle::Static { bound: 0.into() },
index_type: ir::types::I32,
};
@@ -1311,7 +1324,7 @@ impl<'a> Parser<'a> {
while self.optional(Token::Comma) {
match self.match_any_identifier("expected heap attribute name")? {
"min" => {
data.min_size = self.match_imm64("expected integer min size")?;
data.min_size = self.match_uimm64("expected integer min size")?;
}
"bound" => {
data.style = match style_name {
@@ -1319,13 +1332,14 @@ impl<'a> Parser<'a> {
bound_gv: self.match_gv("expected gv bound")?,
},
"static" => HeapStyle::Static {
bound: self.match_imm64("expected integer bound")?,
bound: self.match_uimm64("expected integer bound")?,
},
t => return err!(self.loc, "unknown heap style '{}'", t),
};
}
"guard" => {
data.guard_size = self.match_imm64("expected integer guard size")?;
"offset_guard" => {
data.offset_guard_size =
self.match_uimm64("expected integer offset-guard size")?;
}
"index_type" => {
data.index_type = self.match_type("expected index type")?;
@@ -1381,7 +1395,7 @@ impl<'a> Parser<'a> {
while self.optional(Token::Comma) {
match self.match_any_identifier("expected table attribute name")? {
"min" => {
data.min_size = self.match_imm64("expected integer min size")?;
data.min_size = self.match_uimm64("expected integer min size")?;
}
"bound" => {
data.bound_gv = match style_name {
@@ -1390,7 +1404,7 @@ impl<'a> Parser<'a> {
};
}
"element_size" => {
data.element_size = self.match_imm64("expected integer element size")?;
data.element_size = self.match_uimm64("expected integer element size")?;
}
"index_type" => {
data.index_type = self.match_type("expected index type")?;
@@ -2780,8 +2794,8 @@ mod tests {
fn duplicate_heap() {
let ParseError { location, message } = Parser::new(
"function %ebbs() system_v {
heap0 = static gv0, min 0x1000, bound 0x10_0000, guard 0x1000
heap0 = static gv0, min 0x1000, bound 0x10_0000, guard 0x1000",
heap0 = static gv0, min 0x1000, bound 0x10_0000, offset_guard 0x1000
heap0 = static gv0, min 0x1000, bound 0x10_0000, offset_guard 0x1000",
)
.parse_function(None)
.unwrap_err();

View File

@@ -36,7 +36,7 @@ pub mod prelude {
pub use codegen;
pub use codegen::entity::EntityRef;
pub use codegen::ir::condcodes::{FloatCC, IntCC};
pub use codegen::ir::immediates::{Ieee32, Ieee64, Imm64};
pub use codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Uimm64};
pub use codegen::ir::types;
pub use codegen::ir::{
AbiParam, Ebb, ExtFuncData, ExternalName, GlobalValueData, InstBuilder, JumpTableData,

View File

@@ -992,20 +992,20 @@ fn get_heap_addr(
) -> (ir::Value, i32) {
use std::cmp::min;
let guard_size: i64 = builder.func.heaps[heap].guard_size.into();
debug_assert!(guard_size > 0, "Heap guard pages currently required");
let mut adjusted_offset = u64::from(offset);
let offset_guard_size: u64 = builder.func.heaps[heap].offset_guard_size.into();
// Generate `heap_addr` instructions that are friendly to CSE by checking offsets that are
// multiples of the guard size. Add one to make sure that we check the pointer itself is in
// bounds.
//
// For accesses on the outer skirts of the guard pages, we expect that we get a trap
// even if the access goes beyond the guard pages. This is because the first byte pointed to is
// inside the guard pages.
let check_size = min(
i64::from(u32::MAX),
1 + (i64::from(offset) / guard_size) * guard_size,
) as u32;
// multiples of the offset-guard size. Add one to make sure that we check the pointer itself
// is in bounds.
if offset_guard_size != 0 {
adjusted_offset = adjusted_offset / offset_guard_size * offset_guard_size;
}
// For accesses on the outer skirts of the offset-guard pages, we expect that we get a trap
// even if the access goes beyond the offset-guard pages. This is because the first byte
// pointed to is inside the offset-guard pages.
let check_size = min(u64::from(u32::MAX), 1 + adjusted_offset) as u32;
let base = builder.ins().heap_addr(addr_ty, heap, addr32, check_size);
// Native load/store instructions take a signed `Offset32` immediate, so adjust the base

View File

@@ -2,7 +2,7 @@
//! wasm translation.
use cranelift_codegen::cursor::FuncCursor;
use cranelift_codegen::ir::immediates::{Imm64, Offset32};
use cranelift_codegen::ir::immediates::{Offset32, Uimm64};
use cranelift_codegen::ir::types::*;
use cranelift_codegen::ir::{self, InstBuilder};
use cranelift_codegen::isa::TargetFrontendConfig;
@@ -195,7 +195,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
func.create_heap(ir::HeapData {
base: gv,
min_size: 0.into(),
guard_size: 0x8000_0000.into(),
offset_guard_size: 0x8000_0000.into(),
style: ir::HeapStyle::Static {
bound: 0x1_0000_0000.into(),
},
@@ -221,9 +221,9 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
func.create_table(ir::TableData {
base_gv,
min_size: Imm64::new(0),
min_size: Uimm64::new(0),
bound_gv,
element_size: Imm64::new(i64::from(self.pointer_bytes()) * 2),
element_size: Uimm64::from(u64::from(self.pointer_bytes()) * 2),
index_type: I32,
})
}