From 26332f6f918230a48fd1ff6155c38ab46d400afc Mon Sep 17 00:00:00 2001
From: Jakob Stoklund Olesen <jolesen@mozilla.com>
Date: Tue, 20 Sep 2016 13:20:33 -0700
Subject: [PATCH] Share split_entity_name between lexer and sourcemap.

There's only one way of parsing entity names correctly.
---
 src/libreader/lexer.rs     | 91 ++++++++++++++++++++++++++------------
 src/libreader/sourcemap.rs | 43 +-----------------
 2 files changed, 63 insertions(+), 71 deletions(-)

diff --git a/src/libreader/lexer.rs b/src/libreader/lexer.rs
index c57b8290cf..3454225dc8 100644
--- a/src/libreader/lexer.rs
+++ b/src/libreader/lexer.rs
@@ -6,6 +6,7 @@
 // ====--------------------------------------------------------------------------------------====//
 
 use std::str::CharIndices;
+use std::u16;
 use cretonne::ir::types;
 use cretonne::ir::{Value, Ebb};
 use error::Location;
@@ -72,6 +73,23 @@ fn error<'a>(error: Error, loc: Location) -> Result<LocatedToken<'a>, LocatedErr
     })
 }
 
+/// Get the number of decimal digits at the end of `s`.
+fn trailing_digits(s: &str) -> usize {
+    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
+    s.as_bytes().iter().rev().cloned().take_while(|&b| b'0' <= b && b <= b'9').count()
+}
+
+/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
+/// letters and numeric tail.
+pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
+    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
+    if tail.len() > 1 && tail.starts_with('0') {
+        None
+    } else {
+        tail.parse().ok().map(|n| (head, n))
+    }
+}
+
 /// Lexical analysis.
 ///
 /// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
@@ -237,52 +255,42 @@ impl<'a> Lexer<'a> {
     fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
         let begin = self.pos;
         let loc = self.loc();
-        let mut trailing_digits = 0usize;
 
         assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic());
         loop {
             match self.next_ch() {
-                Some(ch) if ch.is_digit(10) => trailing_digits += 1,
-                Some('_') => trailing_digits = 0,
-                Some(ch) if ch.is_alphabetic() => trailing_digits = 0,
+                Some('_') => {}
+                Some(ch) if ch.is_alphanumeric() => {}
                 _ => break,
             }
         }
         let text = &self.source[begin..self.pos];
-        let (prefix, suffix) = text.split_at(text.len() - trailing_digits);
 
         // Look for numbered well-known entities like ebb15, v45, ...
-        token(Self::numbered_entity(prefix, suffix)
-                  .or_else(|| Self::value_type(text, prefix, suffix))
+        token(split_entity_name(text)
+                  .and_then(|(prefix, number)| {
+                      Self::numbered_entity(prefix, number)
+                          .or_else(|| Self::value_type(text, prefix, number))
+                  })
                   .unwrap_or(Token::Identifier(text)),
               loc)
     }
 
     // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
     // decoded token.
-    fn numbered_entity(prefix: &str, suffix: &str) -> Option<Token<'a>> {
-        // Reject non-canonical numbers like v0001.
-        if suffix.len() > 1 && suffix.starts_with('0') {
-            return None;
-        }
-
-        let value: u32 = match suffix.parse() {
-            Ok(v) => v,
-            _ => return None,
-        };
-
+    fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
         match prefix {
-            "v" => Value::direct_with_number(value).map(|v| Token::Value(v)),
-            "vx" => Value::table_with_number(value).map(|v| Token::Value(v)),
-            "ebb" => Ebb::with_number(value).map(|ebb| Token::Ebb(ebb)),
-            "ss" => Some(Token::StackSlot(value)),
-            "jt" => Some(Token::JumpTable(value)),
+            "v" => Value::direct_with_number(number).map(|v| Token::Value(v)),
+            "vx" => Value::table_with_number(number).map(|v| Token::Value(v)),
+            "ebb" => Ebb::with_number(number).map(|ebb| Token::Ebb(ebb)),
+            "ss" => Some(Token::StackSlot(number)),
+            "jt" => Some(Token::JumpTable(number)),
             _ => None,
         }
     }
 
     // Recognize a scalar or vector type.
-    fn value_type(text: &str, prefix: &str, suffix: &str) -> Option<Token<'a>> {
+    fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
         let is_vector = prefix.ends_with('x');
         let scalar = if is_vector {
             &prefix[0..prefix.len() - 1]
@@ -304,11 +312,11 @@ impl<'a> Lexer<'a> {
             _ => return None,
         };
         if is_vector {
-            let lanes: u16 = match suffix.parse() {
-                Ok(v) => v,
-                _ => return None,
-            };
-            base_type.by(lanes).map(|t| Token::Type(t))
+            if number <= u16::MAX as u32 {
+                base_type.by(number as u16).map(|t| Token::Type(t))
+            } else {
+                None
+            }
         } else {
             Some(Token::Type(base_type))
         }
@@ -356,11 +364,36 @@ impl<'a> Lexer<'a> {
 
 #[cfg(test)]
 mod tests {
+    use super::trailing_digits;
     use super::*;
     use cretonne::ir::types;
     use cretonne::ir::{Value, Ebb};
     use error::Location;
 
+    #[test]
+    fn digits() {
+        assert_eq!(trailing_digits(""), 0);
+        assert_eq!(trailing_digits("x"), 0);
+        assert_eq!(trailing_digits("0x"), 0);
+        assert_eq!(trailing_digits("x1"), 1);
+        assert_eq!(trailing_digits("1x1"), 1);
+        assert_eq!(trailing_digits("1x01"), 2);
+    }
+
+    #[test]
+    fn entity_name() {
+        assert_eq!(split_entity_name(""), None);
+        assert_eq!(split_entity_name("x"), None);
+        assert_eq!(split_entity_name("x+"), None);
+        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
+        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
+        assert_eq!(split_entity_name("1"), Some(("", 1)));
+        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
+        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
+        // Reject this non-canonical form.
+        assert_eq!(split_entity_name("inst01"), None);
+    }
+
     fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
         Some(super::token(token, Location { line_number: line }))
     }
diff --git a/src/libreader/sourcemap.rs b/src/libreader/sourcemap.rs
index 82f5ece750..2bff8e4b81 100644
--- a/src/libreader/sourcemap.rs
+++ b/src/libreader/sourcemap.rs
@@ -11,6 +11,7 @@ use std::collections::HashMap;
 use cretonne::ir::{StackSlot, JumpTable, Ebb, Value, Inst};
 use cretonne::ir::entities::AnyEntity;
 use error::{Result, Location};
+use lexer::split_entity_name;
 
 /// Mapping from source entity names to entity references that are valid in the parsed function.
 #[derive(Debug)]
@@ -114,23 +115,6 @@ impl SourceMap {
     }
 }
 
-/// Get the number of decimal digits at the end of `s`.
-fn trailing_digits(s: &str) -> usize {
-    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
-    s.as_bytes().iter().rev().cloned().take_while(|&b| b'0' <= b && b <= b'9').count()
-}
-
-/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
-/// letters and numeric tail.
-fn split_entity_name(name: &str) -> Option<(&str, u32)> {
-    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
-    if tail.len() > 1 && tail.starts_with('0') {
-        None
-    } else {
-        tail.parse().ok().map(|n| (head, n))
-    }
-}
-
 
 /// Interface for mutating a source map.
 ///
@@ -211,33 +195,8 @@ impl MutableSourceMap for SourceMap {
 
 #[cfg(test)]
 mod tests {
-    use super::{trailing_digits, split_entity_name};
     use parse_test;
 
-    #[test]
-    fn digits() {
-        assert_eq!(trailing_digits(""), 0);
-        assert_eq!(trailing_digits("x"), 0);
-        assert_eq!(trailing_digits("0x"), 0);
-        assert_eq!(trailing_digits("x1"), 1);
-        assert_eq!(trailing_digits("1x1"), 1);
-        assert_eq!(trailing_digits("1x01"), 2);
-    }
-
-    #[test]
-    fn entity_name() {
-        assert_eq!(split_entity_name(""), None);
-        assert_eq!(split_entity_name("x"), None);
-        assert_eq!(split_entity_name("x+"), None);
-        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
-        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
-        assert_eq!(split_entity_name("1"), Some(("", 1)));
-        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
-        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
-        // Reject this non-canonical form.
-        assert_eq!(split_entity_name("inst01"), None);
-    }
-
     #[test]
     fn details() {
         let tf = parse_test("function detail() {