Some refactorings to the ISLE parser (#5693)

* Use is_ascii_digit and is_ascii_hexdigit in the ISLE lexer

* Use range pattern in ISLE lexer

* Use a couple of shorthands in the ISLE parser

* Use parse_ident instead of symbol + str_to_ident

* Introduce token eating api

This is a non-fatal version of the take api

* Rename take to expect and add expect_ prefixes to several methods

* Review comments
This commit is contained in:
bjorn3
2023-02-07 00:11:25 +01:00
committed by GitHub
parent e9c05622c0
commit 16afefdab1
2 changed files with 83 additions and 113 deletions

View File

@@ -232,7 +232,7 @@ impl<'a> Lexer<'a> {
debug_assert!(!s.is_empty()); debug_assert!(!s.is_empty());
Ok(Some((start_pos, Token::Symbol(s.to_string())))) Ok(Some((start_pos, Token::Symbol(s.to_string()))))
} }
c if (c >= b'0' && c <= b'9') || c == b'-' => { c @ (b'0'..=b'9' | b'-') => {
let start_pos = self.pos(); let start_pos = self.pos();
let neg = if c == b'-' { let neg = if c == b'-' {
self.advance_pos(); self.advance_pos();
@@ -258,16 +258,8 @@ impl<'a> Lexer<'a> {
// string-to-integer conversion. // string-to-integer conversion.
let mut s = vec![]; let mut s = vec![];
while self.pos.offset < self.buf.len() while self.pos.offset < self.buf.len()
&& ((radix == 10 && ((radix == 10 && self.buf[self.pos.offset].is_ascii_digit())
&& self.buf[self.pos.offset] >= b'0' || (radix == 16 && self.buf[self.pos.offset].is_ascii_hexdigit())
&& self.buf[self.pos.offset] <= b'9')
|| (radix == 16
&& ((self.buf[self.pos.offset] >= b'0'
&& self.buf[self.pos.offset] <= b'9')
|| (self.buf[self.pos.offset] >= b'a'
&& self.buf[self.pos.offset] <= b'f')
|| (self.buf[self.pos.offset] >= b'A'
&& self.buf[self.pos.offset] <= b'F')))
|| self.buf[self.pos.offset] == b'_') || self.buf[self.pos.offset] == b'_')
{ {
if self.buf[self.pos.offset] != b'_' { if self.buf[self.pos.offset] != b'_' {

View File

@@ -45,7 +45,7 @@ impl<'a> Parser<'a> {
} }
} }
fn take<F: Fn(&Token) -> bool>(&mut self, f: F) -> Result<Token> { fn expect<F: Fn(&Token) -> bool>(&mut self, f: F) -> Result<Token> {
if let Some(&(pos, ref peek)) = self.lexer.peek() { if let Some(&(pos, ref peek)) = self.lexer.peek() {
if !f(peek) { if !f(peek) {
return Err(self.error(pos, format!("Unexpected token {:?}", peek))); return Err(self.error(pos, format!("Unexpected token {:?}", peek)));
@@ -56,6 +56,17 @@ impl<'a> Parser<'a> {
} }
} }
fn eat<F: Fn(&Token) -> bool>(&mut self, f: F) -> Result<Option<Token>> {
if let Some(&(_pos, ref peek)) = self.lexer.peek() {
if !f(peek) {
return Ok(None);
}
Ok(Some(self.lexer.next()?.unwrap().1))
} else {
Ok(None) // EOF
}
}
fn is<F: Fn(&Token) -> bool>(&self, f: F) -> bool { fn is<F: Fn(&Token) -> bool>(&self, f: F) -> bool {
if let Some(&(_, ref peek)) = self.lexer.peek() { if let Some(&(_, ref peek)) = self.lexer.peek() {
f(peek) f(peek)
@@ -80,16 +91,10 @@ impl<'a> Parser<'a> {
self.is(|tok| *tok == Token::At) self.is(|tok| *tok == Token::At)
} }
fn is_sym(&self) -> bool { fn is_sym(&self) -> bool {
self.is(|tok| tok.is_sym()) self.is(Token::is_sym)
} }
fn is_int(&self) -> bool { fn is_int(&self) -> bool {
self.is(|tok| tok.is_int()) self.is(Token::is_int)
}
fn is_sym_str(&self, s: &str) -> bool {
self.is(|tok| match tok {
&Token::Symbol(ref tok_s) if tok_s == s => true,
_ => false,
})
} }
fn is_const(&self) -> bool { fn is_const(&self) -> bool {
@@ -99,25 +104,33 @@ impl<'a> Parser<'a> {
}) })
} }
fn lparen(&mut self) -> Result<()> { fn expect_lparen(&mut self) -> Result<()> {
self.take(|tok| *tok == Token::LParen).map(|_| ()) self.expect(|tok| *tok == Token::LParen).map(|_| ())
} }
fn rparen(&mut self) -> Result<()> { fn expect_rparen(&mut self) -> Result<()> {
self.take(|tok| *tok == Token::RParen).map(|_| ()) self.expect(|tok| *tok == Token::RParen).map(|_| ())
} }
fn at(&mut self) -> Result<()> { fn expect_at(&mut self) -> Result<()> {
self.take(|tok| *tok == Token::At).map(|_| ()) self.expect(|tok| *tok == Token::At).map(|_| ())
} }
fn symbol(&mut self) -> Result<String> { fn expect_symbol(&mut self) -> Result<String> {
match self.take(|tok| tok.is_sym())? { match self.expect(Token::is_sym)? {
Token::Symbol(s) => Ok(s), Token::Symbol(s) => Ok(s),
_ => unreachable!(), _ => unreachable!(),
} }
} }
fn int(&mut self) -> Result<i128> { fn eat_sym_str(&mut self, s: &str) -> Result<bool> {
match self.take(|tok| tok.is_int())? { self.eat(|tok| match tok {
&Token::Symbol(ref tok_s) if tok_s == s => true,
_ => false,
})
.map(|token| token.is_some())
}
fn expect_int(&mut self) -> Result<i128> {
match self.expect(Token::is_int)? {
Token::Int(i) => Ok(i), Token::Int(i) => Ok(i),
_ => unreachable!(), _ => unreachable!(),
} }
@@ -136,9 +149,9 @@ impl<'a> Parser<'a> {
} }
fn parse_def(&mut self) -> Result<Def> { fn parse_def(&mut self) -> Result<Def> {
self.lparen()?; self.expect_lparen()?;
let pos = self.pos(); let pos = self.pos();
let def = match &self.symbol()?[..] { let def = match &self.expect_symbol()?[..] {
"pragma" => Def::Pragma(self.parse_pragma()?), "pragma" => Def::Pragma(self.parse_pragma()?),
"type" => Def::Type(self.parse_type()?), "type" => Def::Type(self.parse_type()?),
"decl" => Def::Decl(self.parse_decl()?), "decl" => Def::Decl(self.parse_decl()?),
@@ -150,7 +163,7 @@ impl<'a> Parser<'a> {
return Err(self.error(pos, format!("Unexpected identifier: {}", s))); return Err(self.error(pos, format!("Unexpected identifier: {}", s)));
} }
}; };
self.rparen()?; self.expect_rparen()?;
Ok(def) Ok(def)
} }
@@ -182,7 +195,7 @@ impl<'a> Parser<'a> {
fn parse_ident(&mut self) -> Result<Ident> { fn parse_ident(&mut self) -> Result<Ident> {
let pos = self.pos(); let pos = self.pos();
let s = self.symbol()?; let s = self.expect_symbol()?;
self.str_to_ident(pos, &s) self.str_to_ident(pos, &s)
} }
@@ -216,7 +229,7 @@ impl<'a> Parser<'a> {
let mut is_nodebug = false; let mut is_nodebug = false;
while self.lexer.peek().map_or(false, |(_pos, tok)| tok.is_sym()) { while self.lexer.peek().map_or(false, |(_pos, tok)| tok.is_sym()) {
let sym = self.symbol()?; let sym = self.expect_symbol()?;
if sym == "extern" { if sym == "extern" {
is_extern = true; is_extern = true;
} else if sym == "nodebug" { } else if sym == "nodebug" {
@@ -241,20 +254,18 @@ impl<'a> Parser<'a> {
fn parse_typevalue(&mut self) -> Result<TypeValue> { fn parse_typevalue(&mut self) -> Result<TypeValue> {
let pos = self.pos(); let pos = self.pos();
self.lparen()?; self.expect_lparen()?;
if self.is_sym_str("primitive") { if self.eat_sym_str("primitive")? {
self.symbol()?;
let primitive_ident = self.parse_ident()?; let primitive_ident = self.parse_ident()?;
self.rparen()?; self.expect_rparen()?;
Ok(TypeValue::Primitive(primitive_ident, pos)) Ok(TypeValue::Primitive(primitive_ident, pos))
} else if self.is_sym_str("enum") { } else if self.eat_sym_str("enum")? {
self.symbol()?;
let mut variants = vec![]; let mut variants = vec![];
while !self.is_rparen() { while !self.is_rparen() {
let variant = self.parse_type_variant()?; let variant = self.parse_type_variant()?;
variants.push(variant); variants.push(variant);
} }
self.rparen()?; self.expect_rparen()?;
Ok(TypeValue::Enum(variants, pos)) Ok(TypeValue::Enum(variants, pos))
} else { } else {
Err(self.error(pos, "Unknown type definition".to_string())) Err(self.error(pos, "Unknown type definition".to_string()))
@@ -272,56 +283,41 @@ impl<'a> Parser<'a> {
}) })
} else { } else {
let pos = self.pos(); let pos = self.pos();
self.lparen()?; self.expect_lparen()?;
let name = self.parse_ident()?; let name = self.parse_ident()?;
let mut fields = vec![]; let mut fields = vec![];
while !self.is_rparen() { while !self.is_rparen() {
fields.push(self.parse_type_field()?); fields.push(self.parse_type_field()?);
} }
self.rparen()?; self.expect_rparen()?;
Ok(Variant { name, fields, pos }) Ok(Variant { name, fields, pos })
} }
} }
fn parse_type_field(&mut self) -> Result<Field> { fn parse_type_field(&mut self) -> Result<Field> {
let pos = self.pos(); let pos = self.pos();
self.lparen()?; self.expect_lparen()?;
let name = self.parse_ident()?; let name = self.parse_ident()?;
let ty = self.parse_ident()?; let ty = self.parse_ident()?;
self.rparen()?; self.expect_rparen()?;
Ok(Field { name, ty, pos }) Ok(Field { name, ty, pos })
} }
fn parse_decl(&mut self) -> Result<Decl> { fn parse_decl(&mut self) -> Result<Decl> {
let pos = self.pos(); let pos = self.pos();
let pure = if self.is_sym_str("pure") { let pure = self.eat_sym_str("pure")?;
self.symbol()?; let multi = self.eat_sym_str("multi")?;
true let partial = self.eat_sym_str("partial")?;
} else {
false
};
let multi = if self.is_sym_str("multi") {
self.symbol()?;
true
} else {
false
};
let partial = if self.is_sym_str("partial") {
self.symbol()?;
true
} else {
false
};
let term = self.parse_ident()?; let term = self.parse_ident()?;
self.lparen()?; self.expect_lparen()?;
let mut arg_tys = vec![]; let mut arg_tys = vec![];
while !self.is_rparen() { while !self.is_rparen() {
arg_tys.push(self.parse_ident()?); arg_tys.push(self.parse_ident()?);
} }
self.rparen()?; self.expect_rparen()?;
let ret_ty = self.parse_ident()?; let ret_ty = self.parse_ident()?;
@@ -338,21 +334,12 @@ impl<'a> Parser<'a> {
fn parse_extern(&mut self) -> Result<Extern> { fn parse_extern(&mut self) -> Result<Extern> {
let pos = self.pos(); let pos = self.pos();
if self.is_sym_str("constructor") { if self.eat_sym_str("constructor")? {
self.symbol()?;
let term = self.parse_ident()?; let term = self.parse_ident()?;
let func = self.parse_ident()?; let func = self.parse_ident()?;
Ok(Extern::Constructor { term, func, pos }) Ok(Extern::Constructor { term, func, pos })
} else if self.is_sym_str("extractor") { } else if self.eat_sym_str("extractor")? {
self.symbol()?; let infallible = self.eat_sym_str("infallible")?;
let infallible = if self.is_sym_str("infallible") {
self.symbol()?;
true
} else {
false
};
let term = self.parse_ident()?; let term = self.parse_ident()?;
let func = self.parse_ident()?; let func = self.parse_ident()?;
@@ -363,8 +350,7 @@ impl<'a> Parser<'a> {
pos, pos,
infallible, infallible,
}) })
} else if self.is_sym_str("const") { } else if self.eat_sym_str("const")? {
self.symbol()?;
let pos = self.pos(); let pos = self.pos();
let name = self.parse_const()?; let name = self.parse_const()?;
let ty = self.parse_ident()?; let ty = self.parse_ident()?;
@@ -380,13 +366,13 @@ impl<'a> Parser<'a> {
fn parse_etor(&mut self) -> Result<Extractor> { fn parse_etor(&mut self) -> Result<Extractor> {
let pos = self.pos(); let pos = self.pos();
self.lparen()?; self.expect_lparen()?;
let term = self.parse_ident()?; let term = self.parse_ident()?;
let mut args = vec![]; let mut args = vec![];
while !self.is_rparen() { while !self.is_rparen() {
args.push(self.parse_ident()?); args.push(self.parse_ident()?);
} }
self.rparen()?; self.expect_rparen()?;
let template = self.parse_pattern()?; let template = self.parse_pattern()?;
Ok(Extractor { Ok(Extractor {
term, term,
@@ -400,7 +386,7 @@ impl<'a> Parser<'a> {
let pos = self.pos(); let pos = self.pos();
let prio = if self.is_int() { let prio = if self.is_int() {
Some( Some(
i64::try_from(self.int()?) i64::try_from(self.expect_int()?)
.map_err(|err| self.error(pos, format!("Invalid rule priority: {}", err)))?, .map_err(|err| self.error(pos, format!("Invalid rule priority: {}", err)))?,
) )
} else { } else {
@@ -430,34 +416,31 @@ impl<'a> Parser<'a> {
let pos = self.pos(); let pos = self.pos();
if self.is_int() { if self.is_int() {
Ok(Pattern::ConstInt { Ok(Pattern::ConstInt {
val: self.int()?, val: self.expect_int()?,
pos, pos,
}) })
} else if self.is_const() { } else if self.is_const() {
let val = self.parse_const()?; let val = self.parse_const()?;
Ok(Pattern::ConstPrim { val, pos }) Ok(Pattern::ConstPrim { val, pos })
} else if self.is_sym_str("_") { } else if self.eat_sym_str("_")? {
self.symbol()?;
Ok(Pattern::Wildcard { pos }) Ok(Pattern::Wildcard { pos })
} else if self.is_sym() { } else if self.is_sym() {
let s = self.symbol()?; let var = self.parse_ident()?;
let var = self.str_to_ident(pos, &s)?;
if self.is_at() { if self.is_at() {
self.at()?; self.expect_at()?;
let subpat = Box::new(self.parse_pattern()?); let subpat = Box::new(self.parse_pattern()?);
Ok(Pattern::BindPattern { var, subpat, pos }) Ok(Pattern::BindPattern { var, subpat, pos })
} else { } else {
Ok(Pattern::Var { var, pos }) Ok(Pattern::Var { var, pos })
} }
} else if self.is_lparen() { } else if self.is_lparen() {
self.lparen()?; self.expect_lparen()?;
if self.is_sym_str("and") { if self.eat_sym_str("and")? {
self.symbol()?;
let mut subpats = vec![]; let mut subpats = vec![];
while !self.is_rparen() { while !self.is_rparen() {
subpats.push(self.parse_pattern()?); subpats.push(self.parse_pattern()?);
} }
self.rparen()?; self.expect_rparen()?;
Ok(Pattern::And { subpats, pos }) Ok(Pattern::And { subpats, pos })
} else { } else {
let sym = self.parse_ident()?; let sym = self.parse_ident()?;
@@ -465,7 +448,7 @@ impl<'a> Parser<'a> {
while !self.is_rparen() { while !self.is_rparen() {
args.push(self.parse_pattern()?); args.push(self.parse_pattern()?);
} }
self.rparen()?; self.expect_rparen()?;
Ok(Pattern::Term { sym, args, pos }) Ok(Pattern::Term { sym, args, pos })
} }
} else { } else {
@@ -476,19 +459,17 @@ impl<'a> Parser<'a> {
fn parse_iflet_or_expr(&mut self) -> Result<IfLetOrExpr> { fn parse_iflet_or_expr(&mut self) -> Result<IfLetOrExpr> {
let pos = self.pos(); let pos = self.pos();
if self.is_lparen() { if self.is_lparen() {
self.lparen()?; self.expect_lparen()?;
let ret = if self.is_sym_str("if-let") { let ret = if self.eat_sym_str("if-let")? {
self.symbol()?;
IfLetOrExpr::IfLet(self.parse_iflet()?) IfLetOrExpr::IfLet(self.parse_iflet()?)
} else if self.is_sym_str("if") { } else if self.eat_sym_str("if")? {
// Shorthand form: `(if (x))` desugars to `(if-let _ // Shorthand form: `(if (x))` desugars to `(if-let _
// (x))`. // (x))`.
self.symbol()?;
IfLetOrExpr::IfLet(self.parse_iflet_if()?) IfLetOrExpr::IfLet(self.parse_iflet_if()?)
} else { } else {
IfLetOrExpr::Expr(self.parse_expr_inner_parens(pos)?) IfLetOrExpr::Expr(self.parse_expr_inner_parens(pos)?)
}; };
self.rparen()?; self.expect_rparen()?;
Ok(ret) Ok(ret)
} else { } else {
self.parse_expr().map(|expr| IfLetOrExpr::Expr(expr)) self.parse_expr().map(|expr| IfLetOrExpr::Expr(expr))
@@ -515,15 +496,13 @@ impl<'a> Parser<'a> {
fn parse_expr(&mut self) -> Result<Expr> { fn parse_expr(&mut self) -> Result<Expr> {
let pos = self.pos(); let pos = self.pos();
if self.is_lparen() { if self.is_lparen() {
self.lparen()?; self.expect_lparen()?;
let ret = self.parse_expr_inner_parens(pos)?; let ret = self.parse_expr_inner_parens(pos)?;
self.rparen()?; self.expect_rparen()?;
Ok(ret) Ok(ret)
} else if self.is_sym_str("#t") { } else if self.eat_sym_str("#t")? {
self.symbol()?;
Ok(Expr::ConstInt { val: 1, pos }) Ok(Expr::ConstInt { val: 1, pos })
} else if self.is_sym_str("#f") { } else if self.eat_sym_str("#f")? {
self.symbol()?;
Ok(Expr::ConstInt { val: 0, pos }) Ok(Expr::ConstInt { val: 0, pos })
} else if self.is_const() { } else if self.is_const() {
let val = self.parse_const()?; let val = self.parse_const()?;
@@ -532,7 +511,7 @@ impl<'a> Parser<'a> {
let name = self.parse_ident()?; let name = self.parse_ident()?;
Ok(Expr::Var { name, pos }) Ok(Expr::Var { name, pos })
} else if self.is_int() { } else if self.is_int() {
let val = self.int()?; let val = self.expect_int()?;
Ok(Expr::ConstInt { val, pos }) Ok(Expr::ConstInt { val, pos })
} else { } else {
Err(self.error(pos, "Invalid expression".into())) Err(self.error(pos, "Invalid expression".into()))
@@ -540,15 +519,14 @@ impl<'a> Parser<'a> {
} }
fn parse_expr_inner_parens(&mut self, pos: Pos) -> Result<Expr> { fn parse_expr_inner_parens(&mut self, pos: Pos) -> Result<Expr> {
if self.is_sym_str("let") { if self.eat_sym_str("let")? {
self.symbol()?; self.expect_lparen()?;
self.lparen()?;
let mut defs = vec![]; let mut defs = vec![];
while !self.is_rparen() { while !self.is_rparen() {
let def = self.parse_letdef()?; let def = self.parse_letdef()?;
defs.push(def); defs.push(def);
} }
self.rparen()?; self.expect_rparen()?;
let body = Box::new(self.parse_expr()?); let body = Box::new(self.parse_expr()?);
Ok(Expr::Let { defs, body, pos }) Ok(Expr::Let { defs, body, pos })
} else { } else {
@@ -563,11 +541,11 @@ impl<'a> Parser<'a> {
fn parse_letdef(&mut self) -> Result<LetDef> { fn parse_letdef(&mut self) -> Result<LetDef> {
let pos = self.pos(); let pos = self.pos();
self.lparen()?; self.expect_lparen()?;
let var = self.parse_ident()?; let var = self.parse_ident()?;
let ty = self.parse_ident()?; let ty = self.parse_ident()?;
let val = Box::new(self.parse_expr()?); let val = Box::new(self.parse_expr()?);
self.rparen()?; self.expect_rparen()?;
Ok(LetDef { var, ty, val, pos }) Ok(LetDef { var, ty, val, pos })
} }