//! A Pratt parser which aims for simplicity //! //! Based on [Simple but Powerful Pratt Parsing][1] by Alex Kladov //! //! [1]: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html pub mod expr { use crate::token::Op; use std::fmt; #[derive(Clone, Debug)] pub enum Expr { Int(usize), Char(char), Str(String), Ident(String), Unary(Op, Box), Postfix(Op, Box), // Binary operators like `a + b`, `a * b`, ... Binary(Op, Box<[Expr; 2]>), Index(Box<[Expr; 2]>), } impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Expr::Int(v) => write!(f, "{v}"), Expr::Str(v) => write!(f, "\"{v}\""), Expr::Char(v) => write!(f, "'{v}'"), Expr::Ident(v) => write!(f, "{v}"), Expr::Unary(op, e) => write!(f, "{op}{e}"), Expr::Postfix(op, e) => write!(f, "{e}{op}"), Expr::Binary(op, e) => write!(f, "({} {op} {})", e[0], e[1]), Expr::Index(e) => write!(f, "{}[{}]", e[0], e[1]), } } } } pub mod parser { #![allow(unused)] use std::iter::Peekable; use cl_lexer::Lexer; use crate::{ expr::Expr, token::{Op, Token, Tokenizer}, }; pub fn expr(text: &str) -> Option { let mut lexer = Tokenizer::new(Lexer::new(text)).peekable(); exprec(&mut lexer, 0) } /// Performs the pratt precedence ascent algorithm fn exprec(lexer: &mut Peekable, min: u8) -> Option where I: Iterator, { let mut head = match lexer.next()? { Token::Int(d) => Expr::Int(d), Token::Char(c) => Expr::Char(c), Token::Ident(c) => Expr::Ident(c), Token::Str(c) => Expr::Str(c), Token::Op(Op::Lpa) => { let head = exprec(lexer, 0)?; assert_eq!(lexer.next()?, Token::Op(Op::Rpa)); head } Token::Op(op) => { let ((), after) = prefix(op)?; Expr::Unary(op, Box::new(exprec(lexer, after)?)) } }; loop { let op = match lexer.peek() { None => break, Some(Token::Op(op)) => *op, Some(t) => { eprintln!("Bad token: {t}"); return Some(head); } }; if let Some((before, ())) = postfix(op) { if before < min { break; } lexer.next().expect("should not change since peeked"); head = match op { Op::Lbk => { let tail = exprec(lexer, 0)?; assert_eq!(lexer.next(), Some(Token::Op(Op::Rbk))); Expr::Index(Box::new([head, tail])) } _ => Expr::Postfix(op, Box::new(head)), }; continue; } if let Some((before, after)) = infix(op) { if before < min { break; } lexer.next().expect("should not change since peeked"); let tail = exprec(lexer, after)?; head = Expr::Binary(op, [head, tail].into()); continue; } break; } Some(head) } fn prefix(op: Op) -> Option<((), u8)> { match op { Op::Sub | Op::Not => Prec::Unary, _ => None?, } .prefix() } fn infix(op: Op) -> Option<(u8, u8)> { match op { Op::Dot => Prec::Member, Op::Not => Prec::Unary, Op::Mul | Op::Div | Op::Rem => Prec::Term, Op::Add | Op::Sub => Prec::Factor, Op::Shl | Op::Shr => Prec::Shift, Op::Ban | Op::Bor | Op::Bxr => Prec::Bitwise, Op::Lan | Op::Lor | Op::Lxr => Prec::Logic, Op::Inc | Op::Exc => Prec::Range, Op::Lt | Op::Lte | Op::Eq | Op::Neq | Op::Gte | Op::Gt => Prec::Compare, Op::Lpa => None?, Op::Rpa => None?, Op::Lbk => None?, Op::Rbk => None?, Op::Huh => None?, } .infix() } fn postfix(op: Op) -> Option<(u8, ())> { match op { Op::Lbk => Prec::Index, Op::Huh => Prec::Postfix, _ => None?, } .postfix() } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] enum Prec { Compare, Range, Index, Logic, Bitwise, Shift, Factor, Term, Unary, Postfix, Member, // left-associative } impl Prec { #[inline] fn level(self) -> u8 { (self as u8) << 1 } fn prefix(self) -> Option<((), u8)> { match self { Self::Unary => Some(((), self.level())), _ => None, } } fn infix(self) -> Option<(u8, u8)> { let level = self.level(); match self { Self::Unary => None, Self::Member => Some((level + 1, level)), _ => Some((level, level + 1)), } } fn postfix(self) -> Option<(u8, ())> { match self { Self::Index | Self::Postfix => Some((self.level(), ())), _ => None, } } } } pub mod token { //! Custom token type, plus a [Tokenizer] iterator adapter for cl-lexer's token type use cl_token::{token_type::Op as Tkop, *}; pub struct Tokenizer<'t> { lexer: cl_lexer::lexer_iter::LexerIter<'t>, } impl<'t> Tokenizer<'t> { pub fn new(lexer: cl_lexer::Lexer<'t>) -> Self { Self { lexer: lexer.into_iter(), } } } impl Iterator for Tokenizer<'_> { type Item = Token; fn next(&mut self) -> Option { let token = self.lexer.next()?.ok()?; let (ty, data) = (token.ty(), token.into_data()); match data { TokenData::Integer(v) => return Some(Token::Int(v as _)), TokenData::Character(v) => return Some(Token::Char(v)), TokenData::Identifier(v) => return Some(Token::Ident(v.into_string())), TokenData::String(v) => return Some(Token::Str(v.to_owned())), _ => {} } match ty.try_into() { Ok(op) => Some(Token::Op(op)), Err(Er::Invalid) => self.next(), Err(Er::NotAnOp) => None, } } } #[derive(Clone, Debug, PartialEq, Eq)] pub enum Token { Int(usize), Char(char), Ident(String), Str(String), Op(Op), } impl std::fmt::Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Token::Int(v) => write!(f, "{v}"), Token::Char(v) => write!(f, "'{v}'"), Token::Ident(v) => write!(f, "{v}"), Token::Str(v) => write!(f, "\"{v}\""), Token::Op(v) => write!(f, "{v}"), } } } macro_rules! operator { ( $(#[$Meta:meta])* $vis:vis enum $Name:ident { $( $(#[$meta:meta])* #[$rep:literal] $name:ident = $try_from:pat ),*$(,)? } ) => { $(#[$Meta])* $vis enum $Name {$( $(#[$meta])* #[doc = $rep] $name, )*} impl ::core::fmt::Display for $Name { fn fmt( &self, f: &mut ::core::fmt::Formatter<'_> ) -> ::core::fmt::Result { match self { $($Name::$name => $rep,)* }.fmt(f) } } impl TryFrom for $Name { type Error = $crate::token::Er; fn try_from(value: cl_token::TokenKind) -> Result { match value { cl_token::TokenKind::Comment | cl_token::TokenKind::Invalid => Err(Er::Invalid), $($try_from => Ok($Name::$name),)* _ => Err(Er::NotAnOp) } } } }; } operator! { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Op { // Delimiter #["("] Lpa = TokenKind::Op(Tkop::LParen), #[")"] Rpa = TokenKind::Op(Tkop::RParen), #["["] Lbk = TokenKind::Op(Tkop::LBrack), #["]"] Rbk = TokenKind::Op(Tkop::RBrack), // Member #["."] Dot = TokenKind::Op(Tkop::Dot), // Factor #["*"] Mul = TokenKind::Op(Tkop::Star), #["/"] Div = TokenKind::Op(Tkop::Slash), #["%"] Rem = TokenKind::Op(Tkop::Rem), // Term #["+"] Add = TokenKind::Op(Tkop::Plus), #["-"] Sub = TokenKind::Op(Tkop::Minus), // Shift #["<<"] Shl = TokenKind::Op(Tkop::LtLt), #[">>"] Shr = TokenKind::Op(Tkop::GtGt), // Bitwise #["&"] Ban = TokenKind::Op(Tkop::Amp), #["|"] Bor = TokenKind::Op(Tkop::Bar), #["^"] Bxr = TokenKind::Op(Tkop::Xor), // Logic #["&&"] Lan = TokenKind::Op(Tkop::AmpAmp), #["||"] Lor = TokenKind::Op(Tkop::BarBar), #["^^"] Lxr = TokenKind::Op(Tkop::XorXor), // Range #["..="] Inc = TokenKind::Op(Tkop::DotDotEq), #[".."] Exc = TokenKind::Op(Tkop::DotDot), // Compare #["<"] Lt = TokenKind::Op(Tkop::Lt), #["<="] Lte = TokenKind::Op(Tkop::LtEq), #["=="] Eq = TokenKind::Op(Tkop::EqEq), #["!="] Neq = TokenKind::Op(Tkop::BangEq), #[">="] Gte = TokenKind::Op(Tkop::GtEq), #[">"] Gt = TokenKind::Op(Tkop::Gt), // Unary-only #["!"] Not = TokenKind::Op(Tkop::Bang), // Postfix unary #["?"] Huh = TokenKind::Op(Tkop::Question), } } #[doc(hidden)] pub enum Er { Invalid, NotAnOp, } }