Lexer can now scan names, hex sequences, brackets and minus signs.
This commit is contained in:
committed by
Jakob Stoklund Olesen
parent
04bddd73ba
commit
a08e177595
@@ -22,6 +22,9 @@ pub enum Token<'a> {
|
|||||||
RPar, // ')'
|
RPar, // ')'
|
||||||
LBrace, // '{'
|
LBrace, // '{'
|
||||||
RBrace, // '}'
|
RBrace, // '}'
|
||||||
|
LBracket, // '['
|
||||||
|
RBracket, // ']'
|
||||||
|
Minus, // '-'
|
||||||
Comma, // ','
|
Comma, // ','
|
||||||
Dot, // '.'
|
Dot, // '.'
|
||||||
Colon, // ':'
|
Colon, // ':'
|
||||||
@@ -36,6 +39,8 @@ pub enum Token<'a> {
|
|||||||
JumpTable(u32), // jt2
|
JumpTable(u32), // jt2
|
||||||
FuncRef(u32), // fn2
|
FuncRef(u32), // fn2
|
||||||
SigRef(u32), // sig2
|
SigRef(u32), // sig2
|
||||||
|
Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ...
|
||||||
|
HexSequence(&'a str), // #89AF
|
||||||
Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...)
|
Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -222,6 +227,13 @@ impl<'a> Lexer<'a> {
|
|||||||
// Skip a leading sign.
|
// Skip a leading sign.
|
||||||
if self.lookahead == Some('-') {
|
if self.lookahead == Some('-') {
|
||||||
self.next_ch();
|
self.next_ch();
|
||||||
|
|
||||||
|
if let Some(c) = self.lookahead {
|
||||||
|
// If the next character won't parse as a number, we conservatively return Token::Minus
|
||||||
|
if !c.is_alphanumeric() && c != '.' {
|
||||||
|
return token(Token::Minus, loc);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for NaNs with payloads.
|
// Check for NaNs with payloads.
|
||||||
@@ -326,6 +338,39 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
|
||||||
|
let loc = self.loc();
|
||||||
|
let begin = self.pos;
|
||||||
|
|
||||||
|
assert!(self.lookahead == Some('%'));
|
||||||
|
|
||||||
|
while let Some(c) = self.next_ch() {
|
||||||
|
if !c.is_alphanumeric() && c != '_' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let end = self.pos;
|
||||||
|
token(Token::Name(&self.source[begin..end]), loc)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
|
||||||
|
let loc = self.loc();
|
||||||
|
let begin = self.pos;
|
||||||
|
|
||||||
|
assert!(self.lookahead == Some('#'));
|
||||||
|
|
||||||
|
while let Some(c) = self.next_ch() {
|
||||||
|
match c {
|
||||||
|
'a'...'f' | 'A'...'F' | '0'...'9' => {},
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let end = self.pos;
|
||||||
|
token(Token::HexSequence(&self.source[begin..end]), loc)
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the next token or a lexical error.
|
/// Get the next token or a lexical error.
|
||||||
///
|
///
|
||||||
/// Return None when the end of the source is encountered.
|
/// Return None when the end of the source is encountered.
|
||||||
@@ -339,6 +384,8 @@ impl<'a> Lexer<'a> {
|
|||||||
Some(')') => Some(self.scan_char(Token::RPar)),
|
Some(')') => Some(self.scan_char(Token::RPar)),
|
||||||
Some('{') => Some(self.scan_char(Token::LBrace)),
|
Some('{') => Some(self.scan_char(Token::LBrace)),
|
||||||
Some('}') => Some(self.scan_char(Token::RBrace)),
|
Some('}') => Some(self.scan_char(Token::RBrace)),
|
||||||
|
Some('[') => Some(self.scan_char(Token::LBracket)),
|
||||||
|
Some(']') => Some(self.scan_char(Token::RBracket)),
|
||||||
Some(',') => Some(self.scan_char(Token::Comma)),
|
Some(',') => Some(self.scan_char(Token::Comma)),
|
||||||
Some('.') => Some(self.scan_char(Token::Dot)),
|
Some('.') => Some(self.scan_char(Token::Dot)),
|
||||||
Some(':') => Some(self.scan_char(Token::Colon)),
|
Some(':') => Some(self.scan_char(Token::Colon)),
|
||||||
@@ -352,6 +399,8 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
Some(ch) if ch.is_digit(10) => Some(self.scan_number()),
|
Some(ch) if ch.is_digit(10) => Some(self.scan_number()),
|
||||||
Some(ch) if ch.is_alphabetic() => Some(self.scan_word()),
|
Some(ch) if ch.is_alphabetic() => Some(self.scan_word()),
|
||||||
|
Some('%') => Some(self.scan_name()),
|
||||||
|
Some('#') => Some(self.scan_hex_sequence()),
|
||||||
Some(ch) if ch.is_whitespace() => {
|
Some(ch) if ch.is_whitespace() => {
|
||||||
self.next_ch();
|
self.next_ch();
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
Reference in New Issue
Block a user