[reader] Avoid handling of unicode when not necessary

Clif files are not meant to be written by end-users anyway. The main effects are that non-ascii identifiers fail to lex instead of parse and whitespace must now be in the ascii range. Comments still have full unicode support. This also inlines all char::is_* methods to avoid nested matches. Overall this results in a slight reduction of instruction count.
2020-08-26 11:30:19 +02:00
parent 19393be396
commit 0d3f9ad8ef
1 changed files with 12 additions and 13 deletions
--- a/cranelift/reader/src/lexer.rs
+++ b/cranelift/reader/src/lexer.rs
@@ -179,10 +179,8 @@ impl<'a> Lexer<'a> {
    // Starting from `lookahead`, are we looking at a number?
    fn looking_at_numeric(&self) -> bool {
        if let Some(c) = self.lookahead {
            if c.is_digit(10) {
                return true;
            }
            match c {
                '0'..='9' => return true,
                '-' => return true,
                '+' => return true,
                '.' => return true,
@@ -291,7 +289,7 @@ impl<'a> Lexer<'a> {
            match self.next_ch() {
                Some('-') | Some('_') => {}
                Some('.') => is_float = true,
-                Some(ch) if ch.is_alphanumeric() => {}
+                Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
                _ => break,
            }
        }
@@ -309,11 +307,10 @@ impl<'a> Lexer<'a> {
        let begin = self.pos;
        let loc = self.loc();
-        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic());
+        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
        loop {
            match self.next_ch() {
-                Some('_') => {}
+                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
                Some(ch) if ch.is_alphanumeric() => {}
                _ => break,
            }
        }
@@ -398,9 +395,10 @@ impl<'a> Lexer<'a> {
        assert_eq!(self.lookahead, Some('%'));
-        while let Some(c) = self.next_ch() {
+        loop {
-            if !(c.is_ascii() && c.is_alphanumeric() || c == '_') {
+            match self.next_ch() {
-                break;
+                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
                _ => break,
            }
        }
@@ -490,8 +488,8 @@ impl<'a> Lexer<'a> {
                        Some(self.scan_number())
                    }
                }
-                Some(ch) if ch.is_digit(10) => Some(self.scan_number()),
+                Some('0'..='9') => Some(self.scan_number()),
-                Some(ch) if ch.is_alphabetic() => {
+                Some('a'..='z') | Some('A'..='Z') => {
                    if self.looking_at("NaN") || self.looking_at("Inf") {
                        Some(self.scan_number())
                    } else {
@@ -502,7 +500,8 @@ impl<'a> Lexer<'a> {
                Some('"') => Some(self.scan_string()),
                Some('#') => Some(self.scan_hex_sequence()),
                Some('@') => Some(self.scan_srcloc()),
-                Some(ch) if ch.is_whitespace() => {
+                // all ascii whitespace
                Some(' ') | Some('\x09'..='\x0d') => {
                    self.next_ch();
                    continue;
                }