[reader] Avoid handling of unicode when not necessary
Clif files are not meant to be written by end-users anyway. The main effects are that non-ascii identifiers fail to lex instead of parse and whitespace must now be in the ascii range. Comments still have full unicode support. This also inlines all char::is_* methods to avoid nested matches. Overall this results in a slight reduction of instruction count.
This commit is contained in:
@@ -179,10 +179,8 @@ impl<'a> Lexer<'a> {
|
|||||||
// Starting from `lookahead`, are we looking at a number?
|
// Starting from `lookahead`, are we looking at a number?
|
||||||
fn looking_at_numeric(&self) -> bool {
|
fn looking_at_numeric(&self) -> bool {
|
||||||
if let Some(c) = self.lookahead {
|
if let Some(c) = self.lookahead {
|
||||||
if c.is_digit(10) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
match c {
|
match c {
|
||||||
|
'0'..='9' => return true,
|
||||||
'-' => return true,
|
'-' => return true,
|
||||||
'+' => return true,
|
'+' => return true,
|
||||||
'.' => return true,
|
'.' => return true,
|
||||||
@@ -291,7 +289,7 @@ impl<'a> Lexer<'a> {
|
|||||||
match self.next_ch() {
|
match self.next_ch() {
|
||||||
Some('-') | Some('_') => {}
|
Some('-') | Some('_') => {}
|
||||||
Some('.') => is_float = true,
|
Some('.') => is_float = true,
|
||||||
Some(ch) if ch.is_alphanumeric() => {}
|
Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
|
||||||
_ => break,
|
_ => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -309,11 +307,10 @@ impl<'a> Lexer<'a> {
|
|||||||
let begin = self.pos;
|
let begin = self.pos;
|
||||||
let loc = self.loc();
|
let loc = self.loc();
|
||||||
|
|
||||||
assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic());
|
assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
|
||||||
loop {
|
loop {
|
||||||
match self.next_ch() {
|
match self.next_ch() {
|
||||||
Some('_') => {}
|
Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
|
||||||
Some(ch) if ch.is_alphanumeric() => {}
|
|
||||||
_ => break,
|
_ => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -398,9 +395,10 @@ impl<'a> Lexer<'a> {
|
|||||||
|
|
||||||
assert_eq!(self.lookahead, Some('%'));
|
assert_eq!(self.lookahead, Some('%'));
|
||||||
|
|
||||||
while let Some(c) = self.next_ch() {
|
loop {
|
||||||
if !(c.is_ascii() && c.is_alphanumeric() || c == '_') {
|
match self.next_ch() {
|
||||||
break;
|
Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
|
||||||
|
_ => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -490,8 +488,8 @@ impl<'a> Lexer<'a> {
|
|||||||
Some(self.scan_number())
|
Some(self.scan_number())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(ch) if ch.is_digit(10) => Some(self.scan_number()),
|
Some('0'..='9') => Some(self.scan_number()),
|
||||||
Some(ch) if ch.is_alphabetic() => {
|
Some('a'..='z') | Some('A'..='Z') => {
|
||||||
if self.looking_at("NaN") || self.looking_at("Inf") {
|
if self.looking_at("NaN") || self.looking_at("Inf") {
|
||||||
Some(self.scan_number())
|
Some(self.scan_number())
|
||||||
} else {
|
} else {
|
||||||
@@ -502,7 +500,8 @@ impl<'a> Lexer<'a> {
|
|||||||
Some('"') => Some(self.scan_string()),
|
Some('"') => Some(self.scan_string()),
|
||||||
Some('#') => Some(self.scan_hex_sequence()),
|
Some('#') => Some(self.scan_hex_sequence()),
|
||||||
Some('@') => Some(self.scan_srcloc()),
|
Some('@') => Some(self.scan_srcloc()),
|
||||||
Some(ch) if ch.is_whitespace() => {
|
// all ascii whitespace
|
||||||
|
Some(' ') | Some('\x09'..='\x0d') => {
|
||||||
self.next_ch();
|
self.next_ch();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user