Implement a simple but powerful pratt parser based on matklad's minipratt

This commit is contained in:
John 2024-04-12 13:57:50 -05:00
commit 5c6a588250
5 changed files with 767 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

404
Cargo.lock generated Normal file
View File

@ -0,0 +1,404 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "argh"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7af5ba06967ff7214ce4c7419c7d185be7ecd6cc4965a8f6e1d8ce0398aad219"
dependencies = [
"argh_derive",
"argh_shared",
]
[[package]]
name = "argh_derive"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56df0aeedf6b7a2fc67d06db35b09684c3e8da0c95f8f27685cb17e08413d87a"
dependencies = [
"argh_shared",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "argh_shared"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5693f39141bda5760ecc4111ab08da40565d1771038c4a0250f03457ec707531"
dependencies = [
"serde",
]
[[package]]
name = "autocfg"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cl-ast"
version = "0.0.4"
dependencies = [
"cl-structures",
]
[[package]]
name = "cl-interpret"
version = "0.0.4"
dependencies = [
"cl-ast",
"cl-structures",
]
[[package]]
name = "cl-lexer"
version = "0.0.4"
dependencies = [
"cl-structures",
"cl-token",
"unicode-ident",
]
[[package]]
name = "cl-parser"
version = "0.0.4"
dependencies = [
"cl-ast",
"cl-lexer",
"cl-structures",
"cl-token",
]
[[package]]
name = "cl-repl"
version = "0.0.4"
dependencies = [
"argh",
"cl-ast",
"cl-interpret",
"cl-lexer",
"cl-parser",
"cl-token",
"crossterm",
]
[[package]]
name = "cl-structures"
version = "0.0.4"
[[package]]
name = "cl-token"
version = "0.0.4"
[[package]]
name = "crossterm"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
dependencies = [
"bitflags 2.5.0",
"crossterm_winapi",
"libc",
"mio",
"parking_lot",
"signal-hook",
"signal-hook-mio",
"winapi",
]
[[package]]
name = "crossterm_winapi"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
dependencies = [
"winapi",
]
[[package]]
name = "libc"
version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "lock_api"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "mio"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"log",
"wasi",
"windows-sys",
]
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "pratt"
version = "0.1.0"
dependencies = [
"cl-lexer",
"cl-repl",
"cl-token",
]
[[package]]
name = "proc-macro2"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "signal-hook"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801"
dependencies = [
"libc",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-mio"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
dependencies = [
"libc",
"mio",
"signal-hook",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
dependencies = [
"libc",
]
[[package]]
name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "syn"
version = "2.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"

9
Cargo.toml Normal file
View File

@ -0,0 +1,9 @@
[package]
name = "pratt"
version = "0.1.0"
edition = "2021"
[dependencies]
cl-lexer = { path = "../conlang/cl-lexer" }
cl-token = { path = "../conlang/cl-token" }
cl-repl = { path = "../conlang/cl-repl" }

340
src/lib.rs Normal file
View File

@ -0,0 +1,340 @@
//! A Pratt parser which aims for simplicity
//!
//! Based on [Simple but Powerful Pratt Parsing][1] by Alex Kladov
//!
//! [1]: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
pub mod expr {
use crate::token::Op;
use std::fmt;
#[derive(Clone, Debug)]
pub enum Expr {
Int(usize),
Char(char),
Str(String),
Ident(String),
Unary(Op, Box<Expr>),
Postfix(Op, Box<Expr>),
// Binary operators like `a + b`, `a * b`, ...
Binary(Op, Box<[Expr; 2]>),
Index(Box<[Expr; 2]>),
}
impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Expr::Int(v) => write!(f, "{v}"),
Expr::Str(v) => write!(f, "\"{v}\""),
Expr::Char(v) => write!(f, "'{v}'"),
Expr::Ident(v) => write!(f, "{v}"),
Expr::Unary(op, e) => write!(f, "{op}{e}"),
Expr::Postfix(op, e) => write!(f, "{e}{op}"),
Expr::Binary(op, e) => write!(f, "({} {op} {})", e[0], e[1]),
Expr::Index(e) => write!(f, "{}[{}]", e[0], e[1]),
}
}
}
}
pub mod parser {
#![allow(unused)]
use std::iter::Peekable;
use cl_lexer::Lexer;
use crate::{
expr::Expr,
token::{Op, Token, Tokenizer},
};
pub fn expr(text: &str) -> Option<Expr> {
let mut lexer = Tokenizer::new(Lexer::new(text)).peekable();
exprec(&mut lexer, 0)
}
/// Performs the pratt precedence ascent algorithm
fn exprec<I>(lexer: &mut Peekable<I>, min: u8) -> Option<Expr>
where
I: Iterator<Item = Token>,
{
let mut head = match lexer.next()? {
Token::Int(d) => Expr::Int(d),
Token::Char(c) => Expr::Char(c),
Token::Ident(c) => Expr::Ident(c),
Token::Str(c) => Expr::Str(c),
Token::Op(Op::Lpa) => {
let head = exprec(lexer, 0)?;
assert_eq!(lexer.next()?, Token::Op(Op::Rpa));
head
}
Token::Op(op) => {
let ((), after) = prefix(op)?;
Expr::Unary(op, Box::new(exprec(lexer, after)?))
}
};
loop {
let op = match lexer.peek() {
None => break,
Some(Token::Op(op)) => *op,
Some(t) => {
eprintln!("Bad token: {t}");
return Some(head);
}
};
if let Some((before, ())) = postfix(op) {
if before < min {
break;
}
lexer.next().expect("should not change since peeked");
head = match op {
Op::Lbk => {
let tail = exprec(lexer, 0)?;
assert_eq!(lexer.next(), Some(Token::Op(Op::Rbk)));
Expr::Index(Box::new([head, tail]))
}
_ => Expr::Postfix(op, Box::new(head)),
};
continue;
}
if let Some((before, after)) = infix(op) {
if before < min {
break;
}
lexer.next().expect("should not change since peeked");
let tail = exprec(lexer, after)?;
head = Expr::Binary(op, [head, tail].into());
continue;
}
break;
}
Some(head)
}
fn prefix(op: Op) -> Option<((), u8)> {
match op {
Op::Sub | Op::Not => Prec::Unary,
_ => None?,
}
.prefix()
}
fn infix(op: Op) -> Option<(u8, u8)> {
match op {
Op::Dot => Prec::Member,
Op::Not => Prec::Unary,
Op::Mul | Op::Div | Op::Rem => Prec::Term,
Op::Add | Op::Sub => Prec::Factor,
Op::Shl | Op::Shr => Prec::Shift,
Op::Ban | Op::Bor | Op::Bxr => Prec::Bitwise,
Op::Lan | Op::Lor | Op::Lxr => Prec::Logic,
Op::Inc | Op::Exc => Prec::Range,
Op::Lt | Op::Lte | Op::Eq | Op::Neq | Op::Gte | Op::Gt => Prec::Compare,
Op::Lpa => None?,
Op::Rpa => None?,
Op::Lbk => None?,
Op::Rbk => None?,
Op::Huh => None?,
}
.infix()
}
fn postfix(op: Op) -> Option<(u8, ())> {
match op {
Op::Lbk => Prec::Index,
Op::Huh => Prec::Postfix,
_ => None?,
}
.postfix()
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum Prec {
Compare,
Range,
Index,
Logic,
Bitwise,
Shift,
Factor,
Term,
Unary,
Postfix,
Member, // left-associative
}
impl Prec {
#[inline]
fn level(self) -> u8 {
(self as u8) << 1
}
fn prefix(self) -> Option<((), u8)> {
match self {
Self::Unary => Some(((), self.level())),
_ => None,
}
}
fn infix(self) -> Option<(u8, u8)> {
let level = self.level();
match self {
Self::Unary => None,
Self::Member => Some((level + 1, level)),
_ => Some((level, level + 1)),
}
}
fn postfix(self) -> Option<(u8, ())> {
match self {
Self::Index | Self::Postfix => Some((self.level(), ())),
_ => None,
}
}
}
}
pub mod token {
//! Custom token type, plus a [Tokenizer] iterator adapter for cl-lexer's token type
use cl_token::*;
pub struct Tokenizer<'t> {
lexer: cl_lexer::lexer_iter::LexerIter<'t>,
}
impl<'t> Tokenizer<'t> {
pub fn new(lexer: cl_lexer::Lexer<'t>) -> Self {
Self {
lexer: lexer.into_iter(),
}
}
}
impl Iterator for Tokenizer<'_> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
let token = self.lexer.next()?.ok()?;
let (ty, data) = (token.ty(), token.into_data());
match data {
Data::Integer(v) => return Some(Token::Int(v as _)),
Data::Character(v) => return Some(Token::Char(v)),
Data::Identifier(v) => return Some(Token::Ident(v.into_string())),
Data::String(v) => return Some(Token::Str(v.to_owned())),
_ => {}
}
match ty.try_into() {
Ok(op) => Some(Token::Op(op)),
Err(Er::Invalid) => self.next(),
Err(Er::NotAnOp) => None,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Token {
Int(usize),
Char(char),
Ident(String),
Str(String),
Op(Op),
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Token::Int(v) => write!(f, "{v}"),
Token::Char(v) => write!(f, "'{v}'"),
Token::Ident(v) => write!(f, "{v}"),
Token::Str(v) => write!(f, "\"{v}\""),
Token::Op(v) => write!(f, "{v}"),
}
}
}
macro_rules! operator {
(
$(#[$Meta:meta])*
$vis:vis enum $Name:ident {
$(
$(#[$meta:meta])*
#[$rep:literal]
$name:ident = $try_from:pat
),*$(,)?
}
) => {
$(#[$Meta])*
$vis enum $Name {$(
$(#[$meta])*
#[doc = $rep]
$name,
)*}
impl ::core::fmt::Display for $Name {
fn fmt(
&self, f: &mut ::core::fmt::Formatter<'_>
) -> ::core::fmt::Result {
match self { $($Name::$name => $rep,)* }.fmt(f)
}
}
impl TryFrom<cl_token::Type> for $Name {
type Error = $crate::token::Er;
fn try_from(value: cl_token::Type) -> Result<Self, Self::Error> {
match value {
cl_token::Type::Comment |
cl_token::Type::Invalid => Err(Er::Invalid),
$($try_from => Ok($Name::$name),)*
_ => Err(Er::NotAnOp)
}
}
}
};
}
operator! {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Op {
// Delimiter
#["("] Lpa = Type::LParen,
#[")"] Rpa = Type::RParen,
#["["] Lbk = Type::LBrack,
#["]"] Rbk = Type::RBrack,
// Member
#["."] Dot = Type::Dot,
// Factor
#["*"] Mul = Type::Star,
#["/"] Div = Type::Slash,
#["%"] Rem = Type::Rem,
// Term
#["+"] Add = Type::Plus,
#["-"] Sub = Type::Minus,
// Shift
#["<<"] Shl = Type::LtLt,
#[">>"] Shr = Type::GtGt,
// Bitwise
#["&"] Ban = Type::Amp,
#["|"] Bor = Type::Bar,
#["^"] Bxr = Type::Xor,
// Logic
#["&&"] Lan = Type::AmpAmp,
#["||"] Lor = Type::BarBar,
#["^^"] Lxr = Type::XorXor,
// Range
#["..="] Inc = Type::DotDotEq,
#[".."] Exc = Type::DotDot,
// Compare
#["<"] Lt = Type::Lt,
#["<="] Lte = Type::LtEq,
#["=="] Eq = Type::EqEq,
#["!="] Neq = Type::BangEq,
#[">="] Gte = Type::GtEq,
#[">"] Gt = Type::Gt,
// Unary-only
#["!"] Not = Type::Bang,
// Postfix unary
#["?"] Huh = Type::Question,
}
}
#[doc(hidden)]
pub enum Er {
Invalid,
NotAnOp,
}
}

13
src/main.rs Normal file
View File

@ -0,0 +1,13 @@
use cl_repl::repline::Repline;
use pratt::parser;
fn main() {
let mut rl = Repline::new("\x1b[32m", "crisp >", "what? >");
while let Ok(line) = rl.read() {
if let Some(expr) = parser::expr(&line) {
println!("\x1b[G\x1b[J{expr:?}");
rl.accept();
}
}
}