341 lines
10 KiB
Rust
341 lines
10 KiB
Rust
//! A Pratt parser which aims for simplicity
|
|
//!
|
|
//! Based on [Simple but Powerful Pratt Parsing][1] by Alex Kladov
|
|
//!
|
|
//! [1]: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
|
|
|
|
pub mod expr {
|
|
use crate::token::Op;
|
|
use std::fmt;
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub enum Expr {
|
|
Int(usize),
|
|
Char(char),
|
|
Str(String),
|
|
Ident(String),
|
|
Unary(Op, Box<Expr>),
|
|
Postfix(Op, Box<Expr>),
|
|
// Binary operators like `a + b`, `a * b`, ...
|
|
Binary(Op, Box<[Expr; 2]>),
|
|
Index(Box<[Expr; 2]>),
|
|
}
|
|
impl fmt::Display for Expr {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Expr::Int(v) => write!(f, "{v}"),
|
|
Expr::Str(v) => write!(f, "\"{v}\""),
|
|
Expr::Char(v) => write!(f, "'{v}'"),
|
|
Expr::Ident(v) => write!(f, "{v}"),
|
|
Expr::Unary(op, e) => write!(f, "{op}{e}"),
|
|
Expr::Postfix(op, e) => write!(f, "{e}{op}"),
|
|
Expr::Binary(op, e) => write!(f, "({} {op} {})", e[0], e[1]),
|
|
Expr::Index(e) => write!(f, "{}[{}]", e[0], e[1]),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub mod parser {
|
|
#![allow(unused)]
|
|
use std::iter::Peekable;
|
|
|
|
use cl_lexer::Lexer;
|
|
|
|
use crate::{
|
|
expr::Expr,
|
|
token::{Op, Token, Tokenizer},
|
|
};
|
|
|
|
pub fn expr(text: &str) -> Option<Expr> {
|
|
let mut lexer = Tokenizer::new(Lexer::new(text)).peekable();
|
|
exprec(&mut lexer, 0)
|
|
}
|
|
|
|
/// Performs the pratt precedence ascent algorithm
|
|
fn exprec<I>(lexer: &mut Peekable<I>, min: u8) -> Option<Expr>
|
|
where
|
|
I: Iterator<Item = Token>,
|
|
{
|
|
let mut head = match lexer.next()? {
|
|
Token::Int(d) => Expr::Int(d),
|
|
Token::Char(c) => Expr::Char(c),
|
|
Token::Ident(c) => Expr::Ident(c),
|
|
Token::Str(c) => Expr::Str(c),
|
|
Token::Op(Op::Lpa) => {
|
|
let head = exprec(lexer, 0)?;
|
|
assert_eq!(lexer.next()?, Token::Op(Op::Rpa));
|
|
head
|
|
}
|
|
Token::Op(op) => {
|
|
let ((), after) = prefix(op)?;
|
|
Expr::Unary(op, Box::new(exprec(lexer, after)?))
|
|
}
|
|
};
|
|
|
|
loop {
|
|
let op = match lexer.peek() {
|
|
None => break,
|
|
Some(Token::Op(op)) => *op,
|
|
Some(t) => {
|
|
eprintln!("Bad token: {t}");
|
|
return Some(head);
|
|
}
|
|
};
|
|
|
|
if let Some((before, ())) = postfix(op) {
|
|
if before < min {
|
|
break;
|
|
}
|
|
lexer.next().expect("should not change since peeked");
|
|
|
|
head = match op {
|
|
Op::Lbk => {
|
|
let tail = exprec(lexer, 0)?;
|
|
assert_eq!(lexer.next(), Some(Token::Op(Op::Rbk)));
|
|
Expr::Index(Box::new([head, tail]))
|
|
}
|
|
_ => Expr::Postfix(op, Box::new(head)),
|
|
};
|
|
continue;
|
|
}
|
|
|
|
if let Some((before, after)) = infix(op) {
|
|
if before < min {
|
|
break;
|
|
}
|
|
lexer.next().expect("should not change since peeked");
|
|
|
|
let tail = exprec(lexer, after)?;
|
|
head = Expr::Binary(op, [head, tail].into());
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
Some(head)
|
|
}
|
|
|
|
fn prefix(op: Op) -> Option<((), u8)> {
|
|
match op {
|
|
Op::Sub | Op::Not => Prec::Unary,
|
|
_ => None?,
|
|
}
|
|
.prefix()
|
|
}
|
|
fn infix(op: Op) -> Option<(u8, u8)> {
|
|
match op {
|
|
Op::Dot => Prec::Member,
|
|
Op::Not => Prec::Unary,
|
|
Op::Mul | Op::Div | Op::Rem => Prec::Term,
|
|
Op::Add | Op::Sub => Prec::Factor,
|
|
Op::Shl | Op::Shr => Prec::Shift,
|
|
Op::Ban | Op::Bor | Op::Bxr => Prec::Bitwise,
|
|
Op::Lan | Op::Lor | Op::Lxr => Prec::Logic,
|
|
Op::Inc | Op::Exc => Prec::Range,
|
|
Op::Lt | Op::Lte | Op::Eq | Op::Neq | Op::Gte | Op::Gt => Prec::Compare,
|
|
Op::Lpa => None?,
|
|
Op::Rpa => None?,
|
|
Op::Lbk => None?,
|
|
Op::Rbk => None?,
|
|
Op::Huh => None?,
|
|
}
|
|
.infix()
|
|
}
|
|
fn postfix(op: Op) -> Option<(u8, ())> {
|
|
match op {
|
|
Op::Lbk => Prec::Index,
|
|
Op::Huh => Prec::Postfix,
|
|
_ => None?,
|
|
}
|
|
.postfix()
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
|
enum Prec {
|
|
Compare,
|
|
Range,
|
|
Index,
|
|
Logic,
|
|
Bitwise,
|
|
Shift,
|
|
Factor,
|
|
Term,
|
|
Unary,
|
|
Postfix,
|
|
Member, // left-associative
|
|
}
|
|
impl Prec {
|
|
#[inline]
|
|
fn level(self) -> u8 {
|
|
(self as u8) << 1
|
|
}
|
|
fn prefix(self) -> Option<((), u8)> {
|
|
match self {
|
|
Self::Unary => Some(((), self.level())),
|
|
_ => None,
|
|
}
|
|
}
|
|
fn infix(self) -> Option<(u8, u8)> {
|
|
let level = self.level();
|
|
match self {
|
|
Self::Unary => None,
|
|
Self::Member => Some((level + 1, level)),
|
|
_ => Some((level, level + 1)),
|
|
}
|
|
}
|
|
fn postfix(self) -> Option<(u8, ())> {
|
|
match self {
|
|
Self::Index | Self::Postfix => Some((self.level(), ())),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub mod token {
|
|
//! Custom token type, plus a [Tokenizer] iterator adapter for cl-lexer's token type
|
|
use cl_token::{token_type::Op as Tkop, *};
|
|
|
|
pub struct Tokenizer<'t> {
|
|
lexer: cl_lexer::lexer_iter::LexerIter<'t>,
|
|
}
|
|
impl<'t> Tokenizer<'t> {
|
|
pub fn new(lexer: cl_lexer::Lexer<'t>) -> Self {
|
|
Self {
|
|
lexer: lexer.into_iter(),
|
|
}
|
|
}
|
|
}
|
|
impl Iterator for Tokenizer<'_> {
|
|
type Item = Token;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
let token = self.lexer.next()?.ok()?;
|
|
let (ty, data) = (token.ty(), token.into_data());
|
|
|
|
match data {
|
|
TokenData::Integer(v) => return Some(Token::Int(v as _)),
|
|
TokenData::Character(v) => return Some(Token::Char(v)),
|
|
TokenData::Identifier(v) => return Some(Token::Ident(v.into_string())),
|
|
TokenData::String(v) => return Some(Token::Str(v.to_owned())),
|
|
_ => {}
|
|
}
|
|
|
|
match ty.try_into() {
|
|
Ok(op) => Some(Token::Op(op)),
|
|
Err(Er::Invalid) => self.next(),
|
|
Err(Er::NotAnOp) => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
pub enum Token {
|
|
Int(usize),
|
|
Char(char),
|
|
Ident(String),
|
|
Str(String),
|
|
Op(Op),
|
|
}
|
|
|
|
impl std::fmt::Display for Token {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self {
|
|
Token::Int(v) => write!(f, "{v}"),
|
|
Token::Char(v) => write!(f, "'{v}'"),
|
|
Token::Ident(v) => write!(f, "{v}"),
|
|
Token::Str(v) => write!(f, "\"{v}\""),
|
|
Token::Op(v) => write!(f, "{v}"),
|
|
}
|
|
}
|
|
}
|
|
|
|
macro_rules! operator {
|
|
(
|
|
$(#[$Meta:meta])*
|
|
$vis:vis enum $Name:ident {
|
|
$(
|
|
$(#[$meta:meta])*
|
|
#[$rep:literal]
|
|
$name:ident = $try_from:pat
|
|
),*$(,)?
|
|
}
|
|
) => {
|
|
$(#[$Meta])*
|
|
$vis enum $Name {$(
|
|
$(#[$meta])*
|
|
#[doc = $rep]
|
|
$name,
|
|
)*}
|
|
impl ::core::fmt::Display for $Name {
|
|
fn fmt(
|
|
&self, f: &mut ::core::fmt::Formatter<'_>
|
|
) -> ::core::fmt::Result {
|
|
match self { $($Name::$name => $rep,)* }.fmt(f)
|
|
}
|
|
}
|
|
impl TryFrom<cl_token::TokenKind> for $Name {
|
|
type Error = $crate::token::Er;
|
|
fn try_from(value: cl_token::TokenKind) -> Result<Self, Self::Error> {
|
|
match value {
|
|
cl_token::TokenKind::Comment |
|
|
cl_token::TokenKind::Invalid => Err(Er::Invalid),
|
|
$($try_from => Ok($Name::$name),)*
|
|
_ => Err(Er::NotAnOp)
|
|
}
|
|
}
|
|
}
|
|
};
|
|
}
|
|
operator! {
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
pub enum Op {
|
|
// Delimiter
|
|
#["("] Lpa = TokenKind::Op(Tkop::LParen),
|
|
#[")"] Rpa = TokenKind::Op(Tkop::RParen),
|
|
#["["] Lbk = TokenKind::Op(Tkop::LBrack),
|
|
#["]"] Rbk = TokenKind::Op(Tkop::RBrack),
|
|
// Member
|
|
#["."] Dot = TokenKind::Op(Tkop::Dot),
|
|
// Factor
|
|
#["*"] Mul = TokenKind::Op(Tkop::Star),
|
|
#["/"] Div = TokenKind::Op(Tkop::Slash),
|
|
#["%"] Rem = TokenKind::Op(Tkop::Rem),
|
|
// Term
|
|
#["+"] Add = TokenKind::Op(Tkop::Plus),
|
|
#["-"] Sub = TokenKind::Op(Tkop::Minus),
|
|
// Shift
|
|
#["<<"] Shl = TokenKind::Op(Tkop::LtLt),
|
|
#[">>"] Shr = TokenKind::Op(Tkop::GtGt),
|
|
// Bitwise
|
|
#["&"] Ban = TokenKind::Op(Tkop::Amp),
|
|
#["|"] Bor = TokenKind::Op(Tkop::Bar),
|
|
#["^"] Bxr = TokenKind::Op(Tkop::Xor),
|
|
// Logic
|
|
#["&&"] Lan = TokenKind::Op(Tkop::AmpAmp),
|
|
#["||"] Lor = TokenKind::Op(Tkop::BarBar),
|
|
#["^^"] Lxr = TokenKind::Op(Tkop::XorXor),
|
|
// Range
|
|
#["..="] Inc = TokenKind::Op(Tkop::DotDotEq),
|
|
#[".."] Exc = TokenKind::Op(Tkop::DotDot),
|
|
// Compare
|
|
#["<"] Lt = TokenKind::Op(Tkop::Lt),
|
|
#["<="] Lte = TokenKind::Op(Tkop::LtEq),
|
|
#["=="] Eq = TokenKind::Op(Tkop::EqEq),
|
|
#["!="] Neq = TokenKind::Op(Tkop::BangEq),
|
|
#[">="] Gte = TokenKind::Op(Tkop::GtEq),
|
|
#[">"] Gt = TokenKind::Op(Tkop::Gt),
|
|
// Unary-only
|
|
#["!"] Not = TokenKind::Op(Tkop::Bang),
|
|
// Postfix unary
|
|
#["?"] Huh = TokenKind::Op(Tkop::Question),
|
|
}
|
|
}
|
|
|
|
#[doc(hidden)]
|
|
pub enum Er {
|
|
Invalid,
|
|
NotAnOp,
|
|
}
|
|
}
|