[reader] Avoid handling of unicode when not necessary
Clif files are not meant to be written by end-users anyway. The main effects are that non-ascii identifiers fail to lex instead of parse and whitespace must now be in the ascii range. Comments still have full unicode support. This also inlines all char::is_* methods to avoid nested matches. Overall this results in a slight reduction of instruction count.
This commit is contained in:
@@ -179,10 +179,8 @@ impl<'a> Lexer<'a> {
|
||||
// Starting from `lookahead`, are we looking at a number?
|
||||
fn looking_at_numeric(&self) -> bool {
|
||||
if let Some(c) = self.lookahead {
|
||||
if c.is_digit(10) {
|
||||
return true;
|
||||
}
|
||||
match c {
|
||||
'0'..='9' => return true,
|
||||
'-' => return true,
|
||||
'+' => return true,
|
||||
'.' => return true,
|
||||
@@ -291,7 +289,7 @@ impl<'a> Lexer<'a> {
|
||||
match self.next_ch() {
|
||||
Some('-') | Some('_') => {}
|
||||
Some('.') => is_float = true,
|
||||
Some(ch) if ch.is_alphanumeric() => {}
|
||||
Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
@@ -309,11 +307,10 @@ impl<'a> Lexer<'a> {
|
||||
let begin = self.pos;
|
||||
let loc = self.loc();
|
||||
|
||||
assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic());
|
||||
assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
|
||||
loop {
|
||||
match self.next_ch() {
|
||||
Some('_') => {}
|
||||
Some(ch) if ch.is_alphanumeric() => {}
|
||||
Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
@@ -398,9 +395,10 @@ impl<'a> Lexer<'a> {
|
||||
|
||||
assert_eq!(self.lookahead, Some('%'));
|
||||
|
||||
while let Some(c) = self.next_ch() {
|
||||
if !(c.is_ascii() && c.is_alphanumeric() || c == '_') {
|
||||
break;
|
||||
loop {
|
||||
match self.next_ch() {
|
||||
Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -490,8 +488,8 @@ impl<'a> Lexer<'a> {
|
||||
Some(self.scan_number())
|
||||
}
|
||||
}
|
||||
Some(ch) if ch.is_digit(10) => Some(self.scan_number()),
|
||||
Some(ch) if ch.is_alphabetic() => {
|
||||
Some('0'..='9') => Some(self.scan_number()),
|
||||
Some('a'..='z') | Some('A'..='Z') => {
|
||||
if self.looking_at("NaN") || self.looking_at("Inf") {
|
||||
Some(self.scan_number())
|
||||
} else {
|
||||
@@ -502,7 +500,8 @@ impl<'a> Lexer<'a> {
|
||||
Some('"') => Some(self.scan_string()),
|
||||
Some('#') => Some(self.scan_hex_sequence()),
|
||||
Some('@') => Some(self.scan_srcloc()),
|
||||
Some(ch) if ch.is_whitespace() => {
|
||||
// all ascii whitespace
|
||||
Some(' ') | Some('\x09'..='\x0d') => {
|
||||
self.next_ch();
|
||||
continue;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user