From 0d3f9ad8ef940e4c9ae382daa2dc2ee99b230ef2 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 26 Aug 2020 11:30:19 +0200 Subject: [PATCH] [reader] Avoid handling of unicode when not necessary Clif files are not meant to be written by end-users anyway. The main effects are that non-ascii identifiers fail to lex instead of parse and whitespace must now be in the ascii range. Comments still have full unicode support. This also inlines all char::is_* methods to avoid nested matches. Overall this results in a slight reduction of instruction count. --- cranelift/reader/src/lexer.rs | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs index 7a5edc33be..647742cacc 100644 --- a/cranelift/reader/src/lexer.rs +++ b/cranelift/reader/src/lexer.rs @@ -179,10 +179,8 @@ impl<'a> Lexer<'a> { // Starting from `lookahead`, are we looking at a number? fn looking_at_numeric(&self) -> bool { if let Some(c) = self.lookahead { - if c.is_digit(10) { - return true; - } match c { + '0'..='9' => return true, '-' => return true, '+' => return true, '.' => return true, @@ -291,7 +289,7 @@ impl<'a> Lexer<'a> { match self.next_ch() { Some('-') | Some('_') => {} Some('.') => is_float = true, - Some(ch) if ch.is_alphanumeric() => {} + Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {} _ => break, } } @@ -309,11 +307,10 @@ impl<'a> Lexer<'a> { let begin = self.pos; let loc = self.loc(); - assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic()); + assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic()); loop { match self.next_ch() { - Some('_') => {} - Some(ch) if ch.is_alphanumeric() => {} + Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {} _ => break, } } @@ -398,9 +395,10 @@ impl<'a> Lexer<'a> { assert_eq!(self.lookahead, Some('%')); - while let Some(c) = self.next_ch() { - if !(c.is_ascii() && c.is_alphanumeric() || c == '_') { - break; + loop { + match self.next_ch() { + Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {} + _ => break, } } @@ -490,8 +488,8 @@ impl<'a> Lexer<'a> { Some(self.scan_number()) } } - Some(ch) if ch.is_digit(10) => Some(self.scan_number()), - Some(ch) if ch.is_alphabetic() => { + Some('0'..='9') => Some(self.scan_number()), + Some('a'..='z') | Some('A'..='Z') => { if self.looking_at("NaN") || self.looking_at("Inf") { Some(self.scan_number()) } else { @@ -502,7 +500,8 @@ impl<'a> Lexer<'a> { Some('"') => Some(self.scan_string()), Some('#') => Some(self.scan_hex_sequence()), Some('@') => Some(self.scan_srcloc()), - Some(ch) if ch.is_whitespace() => { + // all ascii whitespace + Some(' ') | Some('\x09'..='\x0d') => { self.next_ch(); continue; }