Share split_entity_name between lexer and sourcemap.

There's only one way of parsing entity names correctly.
2016-09-20 13:20:33 -07:00
parent 64490a3587
commit 26332f6f91
2 changed files with 63 additions and 71 deletions
--- a/src/libreader/lexer.rs
+++ b/src/libreader/lexer.rs
@@ -6,6 +6,7 @@
 // ====--------------------------------------------------------------------------------------====//
 use std::str::CharIndices;
 use std::u16;
 use cretonne::ir::types;
 use cretonne::ir::{Value, Ebb};
 use error::Location;
@@ -72,6 +73,23 @@ fn error<'a>(error: Error, loc: Location) -> Result<LocatedToken<'a>, LocatedErr
    })
 }
 /// Get the number of decimal digits at the end of `s`.
 fn trailing_digits(s: &str) -> usize {
    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
    s.as_bytes().iter().rev().cloned().take_while(|&b| b'0' <= b && b <= b'9').count()
 }
 /// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
 /// letters and numeric tail.
 pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
    if tail.len() > 1 && tail.starts_with('0') {
        None
    } else {
        tail.parse().ok().map(|n| (head, n))
    }
 }
 /// Lexical analysis.
 ///
 /// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
@@ -237,52 +255,42 @@ impl<'a> Lexer<'a> {
    fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
        let begin = self.pos;
        let loc = self.loc();
        let mut trailing_digits = 0usize;
        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic());
        loop {
            match self.next_ch() {
-                Some(ch) if ch.is_digit(10) => trailing_digits += 1,
+                Some('_') => {}
-                Some('_') => trailing_digits = 0,
+                Some(ch) if ch.is_alphanumeric() => {}
                Some(ch) if ch.is_alphabetic() => trailing_digits = 0,
                _ => break,
            }
        }
        let text = &self.source[begin..self.pos];
        let (prefix, suffix) = text.split_at(text.len() - trailing_digits);
        // Look for numbered well-known entities like ebb15, v45, ...
-        token(Self::numbered_entity(prefix, suffix)
+        token(split_entity_name(text)
-                  .or_else(|| Self::value_type(text, prefix, suffix))
+                  .and_then(|(prefix, number)| {
                      Self::numbered_entity(prefix, number)
                          .or_else(|| Self::value_type(text, prefix, number))
                  })
                  .unwrap_or(Token::Identifier(text)),
              loc)
    }
    // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
    // decoded token.
-    fn numbered_entity(prefix: &str, suffix: &str) -> Option<Token<'a>> {
+    fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
        // Reject non-canonical numbers like v0001.
        if suffix.len() > 1 && suffix.starts_with('0') {
            return None;
        }
        let value: u32 = match suffix.parse() {
            Ok(v) => v,
            _ => return None,
        };
        match prefix {
-            "v" => Value::direct_with_number(value).map(|v| Token::Value(v)),
+            "v" => Value::direct_with_number(number).map(|v| Token::Value(v)),
-            "vx" => Value::table_with_number(value).map(|v| Token::Value(v)),
+            "vx" => Value::table_with_number(number).map(|v| Token::Value(v)),
-            "ebb" => Ebb::with_number(value).map(|ebb| Token::Ebb(ebb)),
+            "ebb" => Ebb::with_number(number).map(|ebb| Token::Ebb(ebb)),
-            "ss" => Some(Token::StackSlot(value)),
+            "ss" => Some(Token::StackSlot(number)),
-            "jt" => Some(Token::JumpTable(value)),
+            "jt" => Some(Token::JumpTable(number)),
            _ => None,
        }
    }
    // Recognize a scalar or vector type.
-    fn value_type(text: &str, prefix: &str, suffix: &str) -> Option<Token<'a>> {
+    fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
        let is_vector = prefix.ends_with('x');
        let scalar = if is_vector {
            &prefix[0..prefix.len() - 1]
@@ -304,11 +312,11 @@ impl<'a> Lexer<'a> {
            _ => return None,
        };
        if is_vector {
-            let lanes: u16 = match suffix.parse() {
+            if number <= u16::MAX as u32 {
-                Ok(v) => v,
+                base_type.by(number as u16).map(|t| Token::Type(t))
-                _ => return None,
+            } else {
-            };
+                None
-            base_type.by(lanes).map(|t| Token::Type(t))
+            }
        } else {
            Some(Token::Type(base_type))
        }
@@ -356,11 +364,36 @@ impl<'a> Lexer<'a> {
 #[cfg(test)]
 mod tests {
    use super::trailing_digits;
    use super::*;
    use cretonne::ir::types;
    use cretonne::ir::{Value, Ebb};
    use error::Location;
    #[test]
    fn digits() {
        assert_eq!(trailing_digits(""), 0);
        assert_eq!(trailing_digits("x"), 0);
        assert_eq!(trailing_digits("0x"), 0);
        assert_eq!(trailing_digits("x1"), 1);
        assert_eq!(trailing_digits("1x1"), 1);
        assert_eq!(trailing_digits("1x01"), 2);
    }
    #[test]
    fn entity_name() {
        assert_eq!(split_entity_name(""), None);
        assert_eq!(split_entity_name("x"), None);
        assert_eq!(split_entity_name("x+"), None);
        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
        assert_eq!(split_entity_name("1"), Some(("", 1)));
        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
        // Reject this non-canonical form.
        assert_eq!(split_entity_name("inst01"), None);
    }
    fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
        Some(super::token(token, Location { line_number: line }))
    }
--- a/src/libreader/sourcemap.rs
+++ b/src/libreader/sourcemap.rs
@@ -11,6 +11,7 @@ use std::collections::HashMap;
 use cretonne::ir::{StackSlot, JumpTable, Ebb, Value, Inst};
 use cretonne::ir::entities::AnyEntity;
 use error::{Result, Location};
 use lexer::split_entity_name;
 /// Mapping from source entity names to entity references that are valid in the parsed function.
 #[derive(Debug)]
@@ -114,23 +115,6 @@ impl SourceMap {
    }
 }
 /// Get the number of decimal digits at the end of `s`.
 fn trailing_digits(s: &str) -> usize {
    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
    s.as_bytes().iter().rev().cloned().take_while(|&b| b'0' <= b && b <= b'9').count()
 }
 /// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
 /// letters and numeric tail.
 fn split_entity_name(name: &str) -> Option<(&str, u32)> {
    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
    if tail.len() > 1 && tail.starts_with('0') {
        None
    } else {
        tail.parse().ok().map(|n| (head, n))
    }
 }
 /// Interface for mutating a source map.
 ///
@@ -211,33 +195,8 @@ impl MutableSourceMap for SourceMap {
 #[cfg(test)]
 mod tests {
    use super::{trailing_digits, split_entity_name};
    use parse_test;
    #[test]
    fn digits() {
        assert_eq!(trailing_digits(""), 0);
        assert_eq!(trailing_digits("x"), 0);
        assert_eq!(trailing_digits("0x"), 0);
        assert_eq!(trailing_digits("x1"), 1);
        assert_eq!(trailing_digits("1x1"), 1);
        assert_eq!(trailing_digits("1x01"), 2);
    }
    #[test]
    fn entity_name() {
        assert_eq!(split_entity_name(""), None);
        assert_eq!(split_entity_name("x"), None);
        assert_eq!(split_entity_name("x+"), None);
        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
        assert_eq!(split_entity_name("1"), Some(("", 1)));
        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
        // Reject this non-canonical form.
        assert_eq!(split_entity_name("inst01"), None);
    }
    #[test]
    fn details() {
        let tf = parse_test("function detail() {