From 0d3f9ad8ef940e4c9ae382daa2dc2ee99b230ef2 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Wed, 26 Aug 2020 11:30:19 +0200
Subject: [PATCH] [reader] Avoid handling of unicode when not necessary

Clif files are not meant to be written by end-users anyway. The main
effects are that non-ascii identifiers fail to lex instead of parse and
whitespace must now be in the ascii range. Comments still have full
unicode support.

This also inlines all char::is_* methods to avoid nested matches.

Overall this results in a slight reduction of instruction count.
---
 cranelift/reader/src/lexer.rs | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs
index 7a5edc33be..647742cacc 100644
--- a/cranelift/reader/src/lexer.rs
+++ b/cranelift/reader/src/lexer.rs
@@ -179,10 +179,8 @@ impl<'a> Lexer<'a> {
     // Starting from `lookahead`, are we looking at a number?
     fn looking_at_numeric(&self) -> bool {
         if let Some(c) = self.lookahead {
-            if c.is_digit(10) {
-                return true;
-            }
             match c {
+                '0'..='9' => return true,
                 '-' => return true,
                 '+' => return true,
                 '.' => return true,
@@ -291,7 +289,7 @@ impl<'a> Lexer<'a> {
             match self.next_ch() {
                 Some('-') | Some('_') => {}
                 Some('.') => is_float = true,
-                Some(ch) if ch.is_alphanumeric() => {}
+                Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
                 _ => break,
             }
         }
@@ -309,11 +307,10 @@ impl<'a> Lexer<'a> {
         let begin = self.pos;
         let loc = self.loc();
 
-        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic());
+        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
         loop {
             match self.next_ch() {
-                Some('_') => {}
-                Some(ch) if ch.is_alphanumeric() => {}
+                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
                 _ => break,
             }
         }
@@ -398,9 +395,10 @@ impl<'a> Lexer<'a> {
 
         assert_eq!(self.lookahead, Some('%'));
 
-        while let Some(c) = self.next_ch() {
-            if !(c.is_ascii() && c.is_alphanumeric() || c == '_') {
-                break;
+        loop {
+            match self.next_ch() {
+                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
+                _ => break,
             }
         }
 
@@ -490,8 +488,8 @@ impl<'a> Lexer<'a> {
                         Some(self.scan_number())
                     }
                 }
-                Some(ch) if ch.is_digit(10) => Some(self.scan_number()),
-                Some(ch) if ch.is_alphabetic() => {
+                Some('0'..='9') => Some(self.scan_number()),
+                Some('a'..='z') | Some('A'..='Z') => {
                     if self.looking_at("NaN") || self.looking_at("Inf") {
                         Some(self.scan_number())
                     } else {
@@ -502,7 +500,8 @@ impl<'a> Lexer<'a> {
                 Some('"') => Some(self.scan_string()),
                 Some('#') => Some(self.scan_hex_sequence()),
                 Some('@') => Some(self.scan_srcloc()),
-                Some(ch) if ch.is_whitespace() => {
+                // all ascii whitespace
+                Some(' ') | Some('\x09'..='\x0d') => {
                     self.next_ch();
                     continue;
                 }