Implement a simple but powerful pratt parser based on matklad's minipratt
This commit is contained in:
340
src/lib.rs
Normal file
340
src/lib.rs
Normal file
@@ -0,0 +1,340 @@
|
||||
//! A Pratt parser which aims for simplicity
|
||||
//!
|
||||
//! Based on [Simple but Powerful Pratt Parsing][1] by Alex Kladov
|
||||
//!
|
||||
//! [1]: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
|
||||
|
||||
pub mod expr {
|
||||
use crate::token::Op;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Expr {
|
||||
Int(usize),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Ident(String),
|
||||
Unary(Op, Box<Expr>),
|
||||
Postfix(Op, Box<Expr>),
|
||||
// Binary operators like `a + b`, `a * b`, ...
|
||||
Binary(Op, Box<[Expr; 2]>),
|
||||
Index(Box<[Expr; 2]>),
|
||||
}
|
||||
impl fmt::Display for Expr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Expr::Int(v) => write!(f, "{v}"),
|
||||
Expr::Str(v) => write!(f, "\"{v}\""),
|
||||
Expr::Char(v) => write!(f, "'{v}'"),
|
||||
Expr::Ident(v) => write!(f, "{v}"),
|
||||
Expr::Unary(op, e) => write!(f, "{op}{e}"),
|
||||
Expr::Postfix(op, e) => write!(f, "{e}{op}"),
|
||||
Expr::Binary(op, e) => write!(f, "({} {op} {})", e[0], e[1]),
|
||||
Expr::Index(e) => write!(f, "{}[{}]", e[0], e[1]),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod parser {
|
||||
#![allow(unused)]
|
||||
use std::iter::Peekable;
|
||||
|
||||
use cl_lexer::Lexer;
|
||||
|
||||
use crate::{
|
||||
expr::Expr,
|
||||
token::{Op, Token, Tokenizer},
|
||||
};
|
||||
|
||||
pub fn expr(text: &str) -> Option<Expr> {
|
||||
let mut lexer = Tokenizer::new(Lexer::new(text)).peekable();
|
||||
exprec(&mut lexer, 0)
|
||||
}
|
||||
|
||||
/// Performs the pratt precedence ascent algorithm
|
||||
fn exprec<I>(lexer: &mut Peekable<I>, min: u8) -> Option<Expr>
|
||||
where
|
||||
I: Iterator<Item = Token>,
|
||||
{
|
||||
let mut head = match lexer.next()? {
|
||||
Token::Int(d) => Expr::Int(d),
|
||||
Token::Char(c) => Expr::Char(c),
|
||||
Token::Ident(c) => Expr::Ident(c),
|
||||
Token::Str(c) => Expr::Str(c),
|
||||
Token::Op(Op::Lpa) => {
|
||||
let head = exprec(lexer, 0)?;
|
||||
assert_eq!(lexer.next()?, Token::Op(Op::Rpa));
|
||||
head
|
||||
}
|
||||
Token::Op(op) => {
|
||||
let ((), after) = prefix(op)?;
|
||||
Expr::Unary(op, Box::new(exprec(lexer, after)?))
|
||||
}
|
||||
};
|
||||
|
||||
loop {
|
||||
let op = match lexer.peek() {
|
||||
None => break,
|
||||
Some(Token::Op(op)) => *op,
|
||||
Some(t) => {
|
||||
eprintln!("Bad token: {t}");
|
||||
return Some(head);
|
||||
}
|
||||
};
|
||||
|
||||
if let Some((before, ())) = postfix(op) {
|
||||
if before < min {
|
||||
break;
|
||||
}
|
||||
lexer.next().expect("should not change since peeked");
|
||||
|
||||
head = match op {
|
||||
Op::Lbk => {
|
||||
let tail = exprec(lexer, 0)?;
|
||||
assert_eq!(lexer.next(), Some(Token::Op(Op::Rbk)));
|
||||
Expr::Index(Box::new([head, tail]))
|
||||
}
|
||||
_ => Expr::Postfix(op, Box::new(head)),
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some((before, after)) = infix(op) {
|
||||
if before < min {
|
||||
break;
|
||||
}
|
||||
lexer.next().expect("should not change since peeked");
|
||||
|
||||
let tail = exprec(lexer, after)?;
|
||||
head = Expr::Binary(op, [head, tail].into());
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
Some(head)
|
||||
}
|
||||
|
||||
fn prefix(op: Op) -> Option<((), u8)> {
|
||||
match op {
|
||||
Op::Sub | Op::Not => Prec::Unary,
|
||||
_ => None?,
|
||||
}
|
||||
.prefix()
|
||||
}
|
||||
fn infix(op: Op) -> Option<(u8, u8)> {
|
||||
match op {
|
||||
Op::Dot => Prec::Member,
|
||||
Op::Not => Prec::Unary,
|
||||
Op::Mul | Op::Div | Op::Rem => Prec::Term,
|
||||
Op::Add | Op::Sub => Prec::Factor,
|
||||
Op::Shl | Op::Shr => Prec::Shift,
|
||||
Op::Ban | Op::Bor | Op::Bxr => Prec::Bitwise,
|
||||
Op::Lan | Op::Lor | Op::Lxr => Prec::Logic,
|
||||
Op::Inc | Op::Exc => Prec::Range,
|
||||
Op::Lt | Op::Lte | Op::Eq | Op::Neq | Op::Gte | Op::Gt => Prec::Compare,
|
||||
Op::Lpa => None?,
|
||||
Op::Rpa => None?,
|
||||
Op::Lbk => None?,
|
||||
Op::Rbk => None?,
|
||||
Op::Huh => None?,
|
||||
}
|
||||
.infix()
|
||||
}
|
||||
fn postfix(op: Op) -> Option<(u8, ())> {
|
||||
match op {
|
||||
Op::Lbk => Prec::Index,
|
||||
Op::Huh => Prec::Postfix,
|
||||
_ => None?,
|
||||
}
|
||||
.postfix()
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
enum Prec {
|
||||
Compare,
|
||||
Range,
|
||||
Index,
|
||||
Logic,
|
||||
Bitwise,
|
||||
Shift,
|
||||
Factor,
|
||||
Term,
|
||||
Unary,
|
||||
Postfix,
|
||||
Member, // left-associative
|
||||
}
|
||||
impl Prec {
|
||||
#[inline]
|
||||
fn level(self) -> u8 {
|
||||
(self as u8) << 1
|
||||
}
|
||||
fn prefix(self) -> Option<((), u8)> {
|
||||
match self {
|
||||
Self::Unary => Some(((), self.level())),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
fn infix(self) -> Option<(u8, u8)> {
|
||||
let level = self.level();
|
||||
match self {
|
||||
Self::Unary => None,
|
||||
Self::Member => Some((level + 1, level)),
|
||||
_ => Some((level, level + 1)),
|
||||
}
|
||||
}
|
||||
fn postfix(self) -> Option<(u8, ())> {
|
||||
match self {
|
||||
Self::Index | Self::Postfix => Some((self.level(), ())),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod token {
|
||||
//! Custom token type, plus a [Tokenizer] iterator adapter for cl-lexer's token type
|
||||
use cl_token::*;
|
||||
|
||||
pub struct Tokenizer<'t> {
|
||||
lexer: cl_lexer::lexer_iter::LexerIter<'t>,
|
||||
}
|
||||
impl<'t> Tokenizer<'t> {
|
||||
pub fn new(lexer: cl_lexer::Lexer<'t>) -> Self {
|
||||
Self {
|
||||
lexer: lexer.into_iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Iterator for Tokenizer<'_> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let token = self.lexer.next()?.ok()?;
|
||||
let (ty, data) = (token.ty(), token.into_data());
|
||||
|
||||
match data {
|
||||
Data::Integer(v) => return Some(Token::Int(v as _)),
|
||||
Data::Character(v) => return Some(Token::Char(v)),
|
||||
Data::Identifier(v) => return Some(Token::Ident(v.into_string())),
|
||||
Data::String(v) => return Some(Token::Str(v.to_owned())),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
match ty.try_into() {
|
||||
Ok(op) => Some(Token::Op(op)),
|
||||
Err(Er::Invalid) => self.next(),
|
||||
Err(Er::NotAnOp) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Token {
|
||||
Int(usize),
|
||||
Char(char),
|
||||
Ident(String),
|
||||
Str(String),
|
||||
Op(Op),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Token::Int(v) => write!(f, "{v}"),
|
||||
Token::Char(v) => write!(f, "'{v}'"),
|
||||
Token::Ident(v) => write!(f, "{v}"),
|
||||
Token::Str(v) => write!(f, "\"{v}\""),
|
||||
Token::Op(v) => write!(f, "{v}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! operator {
|
||||
(
|
||||
$(#[$Meta:meta])*
|
||||
$vis:vis enum $Name:ident {
|
||||
$(
|
||||
$(#[$meta:meta])*
|
||||
#[$rep:literal]
|
||||
$name:ident = $try_from:pat
|
||||
),*$(,)?
|
||||
}
|
||||
) => {
|
||||
$(#[$Meta])*
|
||||
$vis enum $Name {$(
|
||||
$(#[$meta])*
|
||||
#[doc = $rep]
|
||||
$name,
|
||||
)*}
|
||||
impl ::core::fmt::Display for $Name {
|
||||
fn fmt(
|
||||
&self, f: &mut ::core::fmt::Formatter<'_>
|
||||
) -> ::core::fmt::Result {
|
||||
match self { $($Name::$name => $rep,)* }.fmt(f)
|
||||
}
|
||||
}
|
||||
impl TryFrom<cl_token::Type> for $Name {
|
||||
type Error = $crate::token::Er;
|
||||
fn try_from(value: cl_token::Type) -> Result<Self, Self::Error> {
|
||||
match value {
|
||||
cl_token::Type::Comment |
|
||||
cl_token::Type::Invalid => Err(Er::Invalid),
|
||||
$($try_from => Ok($Name::$name),)*
|
||||
_ => Err(Er::NotAnOp)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
operator! {
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Op {
|
||||
// Delimiter
|
||||
#["("] Lpa = Type::LParen,
|
||||
#[")"] Rpa = Type::RParen,
|
||||
#["["] Lbk = Type::LBrack,
|
||||
#["]"] Rbk = Type::RBrack,
|
||||
// Member
|
||||
#["."] Dot = Type::Dot,
|
||||
// Factor
|
||||
#["*"] Mul = Type::Star,
|
||||
#["/"] Div = Type::Slash,
|
||||
#["%"] Rem = Type::Rem,
|
||||
// Term
|
||||
#["+"] Add = Type::Plus,
|
||||
#["-"] Sub = Type::Minus,
|
||||
// Shift
|
||||
#["<<"] Shl = Type::LtLt,
|
||||
#[">>"] Shr = Type::GtGt,
|
||||
// Bitwise
|
||||
#["&"] Ban = Type::Amp,
|
||||
#["|"] Bor = Type::Bar,
|
||||
#["^"] Bxr = Type::Xor,
|
||||
// Logic
|
||||
#["&&"] Lan = Type::AmpAmp,
|
||||
#["||"] Lor = Type::BarBar,
|
||||
#["^^"] Lxr = Type::XorXor,
|
||||
// Range
|
||||
#["..="] Inc = Type::DotDotEq,
|
||||
#[".."] Exc = Type::DotDot,
|
||||
// Compare
|
||||
#["<"] Lt = Type::Lt,
|
||||
#["<="] Lte = Type::LtEq,
|
||||
#["=="] Eq = Type::EqEq,
|
||||
#["!="] Neq = Type::BangEq,
|
||||
#[">="] Gte = Type::GtEq,
|
||||
#[">"] Gt = Type::Gt,
|
||||
// Unary-only
|
||||
#["!"] Not = Type::Bang,
|
||||
// Postfix unary
|
||||
#["?"] Huh = Type::Question,
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub enum Er {
|
||||
Invalid,
|
||||
NotAnOp,
|
||||
}
|
||||
}
|
||||
13
src/main.rs
Normal file
13
src/main.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
use cl_repl::repline::Repline;
|
||||
use pratt::parser;
|
||||
|
||||
fn main() {
|
||||
let mut rl = Repline::new("\x1b[32m", "crisp >", "what? >");
|
||||
|
||||
while let Ok(line) = rl.read() {
|
||||
if let Some(expr) = parser::expr(&line) {
|
||||
println!("\x1b[G\x1b[J{expr:?}");
|
||||
rl.accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user