Support heaps with no offset-guard pages.

Also, say "guard-offset pages" rather than just "guard pages" to describe the region of a heap which is never accessible and which exists to support optimizations for heap accesses with offsets. And, introduce a `Uimm64` immediate type, and make all heap fields use `Uimm64` instead of `Imm64` since they really are unsigned.
2018-11-29 04:53:30 -08:00
parent 93696a80bb
commit a20c852148
27 changed files with 302 additions and 172 deletions
--- a/lib/codegen/src/ir/heap.rs
+++ b/lib/codegen/src/ir/heap.rs
@@ -1,6 +1,6 @@
 //! Heaps.

-use ir::immediates::Imm64;
+use ir::immediates::Uimm64;
 use ir::{GlobalValue, Type};
 use std::fmt;

@@ -12,10 +12,10 @@ pub struct HeapData {

    /// Guaranteed minimum heap size in bytes. Heap accesses before `min_size` don't need bounds
    /// checking.
-    pub min_size: Imm64,
+    pub min_size: Uimm64,

-    /// Size in bytes of the guard pages following the heap.
-    pub guard_size: Imm64,
+    /// Size in bytes of the offset-guard pages following the heap.
+    pub offset_guard_size: Uimm64,

    /// Heap style, with additional style-specific info.
    pub style: HeapStyle,
@@ -34,10 +34,10 @@ pub enum HeapStyle {
    },

    /// A static heap has a fixed base address and a number of not-yet-allocated pages before the
-    /// guard pages.
+    /// offset-guard pages.
    Static {
-        /// Heap bound in bytes. The guard pages are allocated after the bound.
-        bound: Imm64,
+        /// Heap bound in bytes. The offset-guard pages are allocated after the bound.
+        bound: Uimm64,
    },
 }

@@ -55,8 +55,8 @@ impl fmt::Display for HeapData {
        }
        write!(
            f,
-            ", guard {}, index_type {}",
-            self.guard_size, self.index_type
+            ", offset_guard {}, index_type {}",
+            self.offset_guard_size, self.index_type
        )
    }
 }
--- a/lib/codegen/src/ir/immediates.rs
+++ b/lib/codegen/src/ir/immediates.rs
@@ -9,7 +9,7 @@ use std::mem;
 use std::str::FromStr;
 use std::{i32, u32};

-/// 64-bit immediate integer operand.
+/// 64-bit immediate signed integer operand.
 ///
 /// An `Imm64` operand can also be used to represent immediate values of smaller integer types by
 /// sign-extending to `i64`.
@@ -40,13 +40,87 @@ impl From<i64> for Imm64 {
    }
 }

+impl Display for Imm64 {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        let x = self.0;
+        if -10_000 < x && x < 10_000 {
+            // Use decimal for small numbers.
+            write!(f, "{}", x)
+        } else {
+            write_hex(x as u64, f)
+        }
+    }
+}
+
+/// Parse a 64-bit signed number.
+fn parse_i64(s: &str) -> Result<i64, &'static str> {
+    let negative = s.starts_with('-');
+    let s2 = if negative || s.starts_with('+') {
+        &s[1..]
+    } else {
+        s
+    };
+
+    let mut value = parse_u64(s2)?;
+
+    // We support the range-and-a-half from -2^63 .. 2^64-1.
+    if negative {
+        value = value.wrapping_neg();
+        // Don't allow large negative values to wrap around and become positive.
+        if value as i64 > 0 {
+            return Err("Negative number too small");
+        }
+    }
+    Ok(value as i64)
+}
+
+impl FromStr for Imm64 {
+    type Err = &'static str;
+
+    // Parse a decimal or hexadecimal `Imm64`, formatted as above.
+    fn from_str(s: &str) -> Result<Self, &'static str> {
+        parse_i64(s).map(Self::new)
+    }
+}
+
+/// 64-bit immediate unsigned integer operand.
+///
+/// A `Uimm64` operand can also be used to represent immediate values of smaller integer types by
+/// zero-extending to `i64`.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+pub struct Uimm64(u64);
+
+impl Uimm64 {
+    /// Create a new `Uimm64` representing the unsigned number `x`.
+    pub fn new(x: u64) -> Self {
+        Uimm64(x)
+    }
+
+    /// Return self negated.
+    pub fn wrapping_neg(self) -> Self {
+        Uimm64(self.0.wrapping_neg())
+    }
+}
+
+impl Into<u64> for Uimm64 {
+    fn into(self) -> u64 {
+        self.0
+    }
+}
+
+impl From<u64> for Uimm64 {
+    fn from(x: u64) -> Self {
+        Uimm64(x)
+    }
+}
+
 /// Hexadecimal with a multiple of 4 digits and group separators:
 ///
 ///   0xfff0
 ///   0x0001_ffff
 ///   0xffff_ffff_fff8_4400
 ///
-fn write_hex(x: i64, f: &mut Formatter) -> fmt::Result {
+fn write_hex(x: u64, f: &mut Formatter) -> fmt::Result {
    let mut pos = (64 - x.leading_zeros() - 1) & 0xf0;
    write!(f, "0x{:04x}", (x >> pos) & 0xffff)?;
    while pos > 0 {
@@ -56,10 +130,10 @@ fn write_hex(x: i64, f: &mut Formatter) -> fmt::Result {
    Ok(())
 }

-impl Display for Imm64 {
+impl Display for Uimm64 {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
        let x = self.0;
-        if -10_000 < x && x < 10_000 {
+        if x < 10_000 {
            // Use decimal for small numbers.
            write!(f, "{}", x)
        } else {
@@ -68,20 +142,16 @@ impl Display for Imm64 {
    }
 }

-/// Parse a 64-bit number.
-fn parse_i64(s: &str) -> Result<i64, &'static str> {
+/// Parse a 64-bit unsigned number.
+fn parse_u64(s: &str) -> Result<u64, &'static str> {
    let mut value: u64 = 0;
    let mut digits = 0;
-    let negative = s.starts_with('-');
-    let s2 = if negative || s.starts_with('+') {
-        &s[1..]
-    } else {
-        s
-    };

-    if s2.starts_with("0x") {
+    if s.starts_with("-0x") {
+        return Err("Invalid character in hexadecimal number");
+    } else if s.starts_with("0x") {
        // Hexadecimal.
-        for ch in s2[2..].chars() {
+        for ch in s[2..].chars() {
            match ch.to_digit(16) {
                Some(digit) => {
                    digits += 1;
@@ -101,7 +171,7 @@ fn parse_i64(s: &str) -> Result<i64, &'static str> {
        }
    } else {
        // Decimal number, possibly negative.
-        for ch in s2.chars() {
+        for ch in s.chars() {
            match ch.to_digit(16) {
                Some(digit) => {
                    digits += 1;
@@ -128,23 +198,15 @@ fn parse_i64(s: &str) -> Result<i64, &'static str> {
        return Err("No digits in number");
    }

-    // We support the range-and-a-half from -2^63 .. 2^64-1.
-    if negative {
-        value = value.wrapping_neg();
-        // Don't allow large negative values to wrap around and become positive.
-        if value as i64 > 0 {
-            return Err("Negative number too small");
-        }
-    }
-    Ok(value as i64)
+    Ok(value)
 }

-impl FromStr for Imm64 {
+impl FromStr for Uimm64 {
    type Err = &'static str;

-    // Parse a decimal or hexadecimal `Imm64`, formatted as above.
+    // Parse a decimal or hexadecimal `Uimm64`, formatted as above.
    fn from_str(s: &str) -> Result<Self, &'static str> {
-        parse_i64(s).map(Self::new)
+        parse_u64(s).map(Self::new)
    }
 }

@@ -182,7 +244,7 @@ impl Display for Uimm32 {
        if self.0 < 10_000 {
            write!(f, "{}", self.0)
        } else {
-            write_hex(i64::from(self.0), f)
+            write_hex(u64::from(self.0), f)
        }
    }
 }
@@ -268,7 +330,7 @@ impl Display for Offset32 {
        if val < 10_000 {
            write!(f, "{}", val)
        } else {
-            write_hex(val, f)
+            write_hex(val as u64, f)
        }
    }
 }
@@ -683,6 +745,20 @@ mod tests {
        assert_eq!(Imm64(0x10000).to_string(), "0x0001_0000");
    }

+    #[test]
+    fn format_uimm64() {
+        assert_eq!(Uimm64(0).to_string(), "0");
+        assert_eq!(Uimm64(9999).to_string(), "9999");
+        assert_eq!(Uimm64(10000).to_string(), "0x2710");
+        assert_eq!(Uimm64(-9999i64 as u64).to_string(), "0xffff_ffff_ffff_d8f1");
+        assert_eq!(
+            Uimm64(-10000i64 as u64).to_string(),
+            "0xffff_ffff_ffff_d8f0"
+        );
+        assert_eq!(Uimm64(0xffff).to_string(), "0xffff");
+        assert_eq!(Uimm64(0x10000).to_string(), "0x0001_0000");
+    }
+
    // Verify that `text` can be parsed as a `T` into a value that displays as `want`.
    fn parse_ok<T: FromStr + Display>(text: &str, want: &str)
    where
@@ -750,6 +826,46 @@ mod tests {
        parse_err::<Imm64>("0x0_0000_0000_0000_0000", "Too many hexadecimal digits");
    }

+    #[test]
+    fn parse_uimm64() {
+        parse_ok::<Uimm64>("0", "0");
+        parse_ok::<Uimm64>("1", "1");
+        parse_ok::<Uimm64>("0x0", "0");
+        parse_ok::<Uimm64>("0xf", "15");
+        parse_ok::<Uimm64>("0xffffffff_fffffff7", "0xffff_ffff_ffff_fff7");
+
+        // Probe limits.
+        parse_ok::<Uimm64>("0xffffffff_ffffffff", "0xffff_ffff_ffff_ffff");
+        parse_ok::<Uimm64>("0x80000000_00000000", "0x8000_0000_0000_0000");
+        parse_ok::<Uimm64>("18446744073709551615", "0xffff_ffff_ffff_ffff");
+        // Overflow both the `checked_add` and `checked_mul`.
+        parse_err::<Uimm64>("18446744073709551616", "Too large decimal number");
+        parse_err::<Uimm64>("184467440737095516100", "Too large decimal number");
+
+        // Underscores are allowed where digits go.
+        parse_ok::<Uimm64>("0_0", "0");
+        parse_ok::<Uimm64>("_10_", "10");
+        parse_ok::<Uimm64>("0x97_88_bb", "0x0097_88bb");
+        parse_ok::<Uimm64>("0x_97_", "151");
+
+        parse_err::<Uimm64>("", "No digits in number");
+        parse_err::<Uimm64>("_", "No digits in number");
+        parse_err::<Uimm64>("0x", "No digits in number");
+        parse_err::<Uimm64>("0x_", "No digits in number");
+        parse_err::<Uimm64>("-", "Invalid character in decimal number");
+        parse_err::<Uimm64>("-0x", "Invalid character in hexadecimal number");
+        parse_err::<Uimm64>(" ", "Invalid character in decimal number");
+        parse_err::<Uimm64>("0 ", "Invalid character in decimal number");
+        parse_err::<Uimm64>(" 0", "Invalid character in decimal number");
+        parse_err::<Uimm64>("--", "Invalid character in decimal number");
+        parse_err::<Uimm64>("-0x-", "Invalid character in hexadecimal number");
+        parse_err::<Uimm64>("-0", "Invalid character in decimal number");
+        parse_err::<Uimm64>("-1", "Invalid character in decimal number");
+
+        // Hex count overflow.
+        parse_err::<Uimm64>("0x0_0000_0000_0000_0000", "Too many hexadecimal digits");
+    }
+
    #[test]
    fn format_offset32() {
        assert_eq!(Offset32(0).to_string(), "");
--- a/lib/codegen/src/ir/table.rs
+++ b/lib/codegen/src/ir/table.rs
@@ -1,6 +1,6 @@
 //! Tables.

-use ir::immediates::Imm64;
+use ir::immediates::Uimm64;
 use ir::{GlobalValue, Type};
 use std::fmt;

@@ -12,13 +12,13 @@ pub struct TableData {

    /// Guaranteed minimum table size in elements. Table accesses before `min_size` don't need
    /// bounds checking.
-    pub min_size: Imm64,
+    pub min_size: Uimm64,

    /// Global value giving the current bound of the table, in elements.
    pub bound_gv: GlobalValue,

    /// The size of a table element, in bytes.
-    pub element_size: Imm64,
+    pub element_size: Uimm64,

    /// The index type for the table.
    pub index_type: Type,
--- a/lib/codegen/src/ir/trapcode.rs
+++ b/lib/codegen/src/ir/trapcode.rs
@@ -17,7 +17,8 @@ pub enum TrapCode {
    /// A `heap_addr` instruction detected an out-of-bounds error.
    ///
    /// Note that not all out-of-bounds heap accesses are reported this way;
-    /// some are detected by a segmentation fault on the heap guard pages.
+    /// some are detected by a segmentation fault on the heap unmapped or
+    /// offset-guard pages.
    HeapOutOfBounds,

    /// A `table_addr` instruction detected an out-of-bounds error.
--- a/lib/codegen/src/legalizer/heap.rs
+++ b/lib/codegen/src/legalizer/heap.rs
@@ -49,7 +49,7 @@ fn dynamic_addr(
    bound_gv: ir::GlobalValue,
    func: &mut ir::Function,
 ) {
-    let access_size = i64::from(access_size);
+    let access_size = u64::from(access_size);
    let offset_ty = func.dfg.value_type(offset);
    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let min_size = func.heaps[heap].min_size.into();
@@ -67,13 +67,13 @@ fn dynamic_addr(
    } else if access_size <= min_size {
        // We know that bound >= min_size, so here we can compare `offset > bound - access_size`
        // without wrapping.
-        let adj_bound = pos.ins().iadd_imm(bound, -access_size);
+        let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64));
        oob = pos
            .ins()
            .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
    } else {
        // We need an overflow check for the adjusted offset.
-        let access_size_val = pos.ins().iconst(offset_ty, access_size);
+        let access_size_val = pos.ins().iconst(offset_ty, access_size as i64);
        let (adj_offset, overflow) = pos.ins().iadd_cout(offset, access_size_val);
        pos.ins().trapnz(overflow, ir::TrapCode::HeapOutOfBounds);
        oob = pos
@@ -91,11 +91,11 @@ fn static_addr(
    heap: ir::Heap,
    offset: ir::Value,
    access_size: u32,
-    bound: i64,
+    bound: u64,
    func: &mut ir::Function,
    cfg: &mut ControlFlowGraph,
 ) {
-    let access_size = i64::from(access_size);
+    let access_size = u64::from(access_size);
    let offset_ty = func.dfg.value_type(offset);
    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let mut pos = FuncCursor::new(func).at_inst(inst);
@@ -117,7 +117,7 @@ fn static_addr(
    }

    // Check `offset > limit` which is now known non-negative.
-    let limit = bound - access_size;
+    let limit = bound - u64::from(access_size);

    // We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
    // more.
@@ -126,10 +126,10 @@ fn static_addr(
            // Prefer testing `offset >= limit - 1` when limit is odd because an even number is
            // likely to be a convenient constant on ARM and other RISC architectures.
            pos.ins()
-                .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit - 1)
+                .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit as i64 - 1)
        } else {
            pos.ins()
-                .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit)
+                .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit as i64)
        };
        pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
    }
--- a/lib/codegen/src/legalizer/table.rs
+++ b/lib/codegen/src/legalizer/table.rs
@@ -92,17 +92,15 @@ fn compute_addr(

    let element_size = pos.func.tables[table].element_size;
    let mut offset;
-    let element_size_i64: i64 = element_size.into();
-    debug_assert!(element_size_i64 >= 0);
-    let element_size_u64 = element_size_i64 as u64;
-    if element_size_u64 == 1 {
+    let element_size: u64 = element_size.into();
+    if element_size == 1 {
        offset = index;
-    } else if element_size_u64.is_power_of_two() {
+    } else if element_size.is_power_of_two() {
        offset = pos
            .ins()
-            .ishl_imm(index, i64::from(element_size_u64.trailing_zeros()));
+            .ishl_imm(index, i64::from(element_size.trailing_zeros()));
    } else {
-        offset = pos.ins().imul_imm(index, element_size);
+        offset = pos.ins().imul_imm(index, element_size as i64);
    }

    if element_offset == Offset32::new(0) {