parser: Refactor coagulated binops as postfix operators

This allows them to intermingle more nicely with `Try`
This commit is contained in:
John 2025-09-15 10:28:08 -04:00 committed by Val
parent baf94a9dab
commit b6949147c4
6 changed files with 338 additions and 208 deletions

View File

@ -1,6 +1,6 @@
//! The Abstract Syntax Tree defines an interface between the parser and type checker
pub mod matcher;
pub mod macro_matcher;
/// A value with an annotation.
#[derive(Clone, Debug, PartialEq, Eq)]
@ -10,6 +10,7 @@ pub struct Anno<T: Annotation, A: Annotation = Span>(pub T, pub A);
pub trait Annotation: Clone + std::fmt::Display + std::fmt::Debug + PartialEq + Eq {}
impl<T: Clone + std::fmt::Debug + std::fmt::Display + PartialEq + Eq> Annotation for T {}
/// A literal value (boolean, character, integer, string)
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal: true | false
@ -25,13 +26,22 @@ pub enum Literal {
/// Binding patterns for each kind of matchable [Ty]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pat {
/// Matches anything without binding
Ignore,
/// Matches nothing; used for macro substitution.
MetId(String),
/// Matches anything, and binds it to a name
Name(String),
/// Matches a partial decomposition (`..rest`) or upper-bounded range (`..100`).
Rest(Option<Box<Pat>>),
/// Matches a literal value by equality comparison
Lit(Literal),
/// Matches the elements of a tuple
Tuple(Vec<Pat>),
/// Matches the elements
Slice(Vec<Pat>),
/// Matches one of the provided alternates
Alt(Vec<Pat>),
}
/// The arms of a make expression
@ -53,6 +63,8 @@ pub struct MatchArm<A: Annotation = Span>(pub Vec<Pat>, pub Anno<Expr<A>, A>);
pub enum Ty {
/// `_`
Infer,
/// `(Identifier :: )* Identifier`
Named(String),
/// `(..Tys)`
Tuple(Vec<Ty>),
/// `[Ty]`
@ -72,12 +84,12 @@ pub enum Expr<A: Annotation = Span> {
MetId(String),
/// A literal bool, string, char, or int
Lit(Literal),
/// let pattern = expr
/// let Pat<NoTopAlt> = expr
Let(Pat, Option<Box<Anno<Self, A>>>),
/// `const Pat (= Expr)?` (Basically let rec)
/// `const Pat<NoTopAlt> (= Expr)?` (Basically let rec)
Const(Pat, Box<Anno<Self, A>>),
/// `| Pat,* | Expr` | `|| Expr` | `fn (Pat,*) Expr`
Fn(Vec<Pat>, Box<Anno<Self, A>>),
/// `| Pat<Tuple> | Expr` | `|| Expr` | `fn (Pat,*) Expr`
Fn(Pat, Box<Anno<Self, A>>),
/// Expr { (Ident (: Expr)?),* }
Make(Box<Anno<Self, A>>, Vec<MakeArm<A>>),
/// match Expr { MatchArm,* }
@ -101,50 +113,22 @@ impl<A: Annotation> Expr<A> {
| Self::Op(Op::Deref, _)
)
}
// pub fn is_assignee(&self) -> bool {
// match self {
// Self::Id(_) => todo!(),
// Self::MetId(_) => todo!(),
// Self::Lit(literal) => todo!(),
// Self::Let(pat, anno) => todo!(),
// Self::Const(pat, anno) => todo!(),
// Self::Fn(pats, anno) => todo!(),
// Self::Make(anno, make_arms) => todo!(),
// Self::Match(anno, match_arms) => todo!(),
// Self::Op(Op::Add, annos) => todo!(),
// Self::Op(Op::And, _) => false,
// }
// }
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Op {
// -- fake operators used to assign precedences to special forms
Id, // Identifier
Mid, // MetaIdentifier
Lit, // Literal
Let, // let Pat = Expr
Const, // const Pat = Expr
Fn, // fn ( Pat,* ) Expr
Make, // Expr{ Expr,* }
Macro, // macro Expr => Expr
Match, // match Expr { MatchArm,* }
End, // Produces an empty value.
// -- true operators
Do, // Expr ; Expr
Macro, // macro Expr => Expr
Block, // { Expr }
Array, // [ Expr,* ]
Group, // ( Expr ,?)
Tuple, // ( Expr,* )
Tuple, // Expr (, Expr)*
Try, // Expr '?'
Index, // Expr [ Expr,* ]
Call, // Expr ( Expr,* )
Lambda, // |Pat?| Expr
Loop, // loop Expr
If, // if Expr Expr (else Expr)?
While, // while Expr Expr (else Expr)?
@ -220,20 +204,23 @@ impl<A: Annotation> Display for Expr<A> {
Self::Let(pat, None) => write!(f, "let {pat}"),
Self::Const(pat, expr) => write!(f, "const {pat} = {expr}"),
Self::Make(expr, make_arms) => {
f.delimit(fmt!("make {expr} {{"), "}").list(make_arms, ", ")
f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ")
}
Self::Match(expr, match_arms) => f
.delimit_indented(fmt!("match {expr} {{\n"), "\n}")
.list_end(match_arms, ",\n", ","),
Self::Fn(pats, expr) => f.delimit("fn (", fmt!(") {expr}")).list(pats, ", "),
.delimit_indented(fmt!("match {expr} {{\n"), "}")
.list_wrap("\n", match_arms, ",\n", ",\n"),
Self::Fn(pat, expr) => write!(f, "fn {pat} {expr}"),
Self::Op(op @ (Op::If | Op::While), exprs) => match exprs.as_slice() {
[cond, pass, fail] => write!(f, "{op}{cond} {pass} else {fail}"),
other => f.delimit(fmt!("({op}, "), ")").list(other, ", "),
},
Self::Op(Op::Array, exprs) => f.delimit("[", "]").list(exprs, ", "),
Self::Op(Op::Block, exprs) => f.delimit_indented("{\n", "\n}").list(exprs, ", "),
Self::Op(Op::Block, exprs) => f
.delimit_indented("{", "}")
.list_wrap("\n", exprs, "\n", "\n"),
Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "),
Self::Op(Op::Group, exprs) => f.list(exprs, ", "),
Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() {
[callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "),
@ -246,9 +233,9 @@ impl<A: Annotation> Display for Expr<A> {
Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"),
Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "),
Self::Op(op @ Op::Try, exprs) => f.delimit("", op).list(exprs, ", "),
Self::Op(op @ Op::Try, exprs) => f.delimit("(", fmt!("){op}")).list(exprs, ", "),
Self::Op(op, exprs) => match exprs.as_slice() {
[_] => f.delimit(op, "").list(exprs, ", "),
[one] => write!(f, "{op}({one})"),
many => f.delimit("(", ")").list(many, op),
},
}
@ -259,15 +246,7 @@ impl Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Op::Do => "; ".fmt(f),
Op::Id => "_".fmt(f),
Op::Mid => "`".fmt(f),
Op::Lit => "##".fmt(f),
Op::Let => "let ".fmt(f),
Op::Const => "const ".fmt(f),
Op::Fn => "fn ".fmt(f),
Op::Macro => "macro ".fmt(f),
Op::Match => "match ".fmt(f),
Op::End => "()".fmt(f),
Op::Block => "{}".fmt(f),
Op::Array => "[]".fmt(f),
Op::Group => "()".fmt(f),
@ -275,8 +254,6 @@ impl Display for Op {
Op::Try => "?".fmt(f),
Op::Index => "".fmt(f),
Op::Call => "".fmt(f),
Op::Make => "".fmt(f),
Op::Lambda => "".fmt(f),
Op::Loop => "loop ".fmt(f),
Op::If => "if ".fmt(f),
Op::While => "while ".fmt(f),
@ -342,6 +319,7 @@ impl Display for Pat {
Self::Rest(None) => write!(f, ".."),
Self::Tuple(pats) => f.delimit("(", ")").list(pats, ", "),
Self::Slice(pats) => f.delimit("[", "]").list(pats, ", "),
Self::Alt(pats) => f.delimit("<", ">").list(pats, " | "),
}
}
}

View File

@ -191,6 +191,8 @@ impl<A: Annotation> Match<A> for Pat {
(Pat::Tuple(_), _) => false,
(Pat::Slice(pat), Pat::Slice(expr)) => Match::recurse(sub, pat, expr),
(Pat::Slice(_), _) => false,
(Pat::Alt(pat), Pat::Alt(expr)) => Match::recurse(sub, pat, expr),
(Pat::Alt(_), _) => false,
}
}
@ -205,6 +207,7 @@ impl<A: Annotation> Match<A> for Pat {
Pat::Rest(pat) => pat.apply(sub),
Pat::Tuple(pats) => pats.apply(sub),
Pat::Slice(pats) => pats.apply(sub),
Pat::Alt(pats) => pats.apply(sub),
}
}
}

View File

@ -31,7 +31,7 @@ pub trait FmtAdapter: Write {
/// Formats bracketed lists of the kind (Item (Comma Item)*)?
#[inline]
fn list<Item: Display, Sep: Display>(&mut self, items: &[Item], sep: Sep) -> std::fmt::Result {
self.list_end(items, sep, "")
self.list_wrap("", items, sep, "")
}
fn list_end<Item: Display, Sep: Display, End: Display>(
@ -40,15 +40,31 @@ pub trait FmtAdapter: Write {
sep: Sep,
end: End,
) -> std::fmt::Result {
let mut pats = items;
while let [pat, rest @ ..] = pats {
self.list_wrap("", items, sep, end)
}
/// Wraps a list in `open` and `close`.
/// This differs from [`FmtAdapter::delimit`] because it prints nothing
/// if the list is empty.
fn list_wrap<Item: Display, Sep: Display, O: Display, E: Display>(
&mut self,
open: O,
mut items: &[Item],
sep: Sep,
close: E,
) -> std::fmt::Result {
if items.is_empty() {
return Ok(());
}
write!(self, "{open}")?;
while let [pat, rest @ ..] = items {
write!(self, "{pat}")?;
if !rest.is_empty() {
write!(self, "{sep}")?;
}
pats = rest
items = rest
}
write!(self, "{end}")
write!(self, "{close}")
}
}

View File

@ -46,9 +46,7 @@ impl<'t> Lexer<'t> {
fn advance_tail(&mut self) {
match self.iter.peek() {
Some(&(idx, _)) => self.tail = idx as u32,
None => {
self.tail = self.text.len() as _;
}
None => self.tail = self.text.len() as _,
}
}

View File

@ -3,7 +3,7 @@
use doughlang::{
ast::{
Expr,
matcher::{Match, Subst},
macro_matcher::{Match, Subst},
},
lexer::{LexError, Lexer},
parser::{ParseError, Parser},
@ -32,6 +32,7 @@ fn main() -> Result<(), Box<dyn Error>> {
Ok(Response::Deny)
}
_ => {
lex(line);
parse(line);
Ok(Response::Accept)
}
@ -82,14 +83,19 @@ fn subst() -> Result<(), Box<dyn Error>> {
continue;
};
if p.next_if(TKind::Colon).is_err() {
if p.next_if(TKind::Arrow).is_err() {
let Some(Subst { exp, pat }) = exp.construct(&pat) else {
println!("Match failed: {exp} <- {pat}");
continue;
};
for (name, pat) in pat.iter() {
let mut pats: Vec<_> = pat.into_iter().collect();
pats.sort_by(|(a, _), (b, _)| a.cmp(b));
for (name, pat) in pats {
println!("{name}: {pat}")
}
for (name, expr) in exp.iter() {
let mut exprs: Vec<_> = exp.into_iter().collect();
exprs.sort_by(|(a, _), (b, _)| a.cmp(b));
for (name, expr) in exprs.iter() {
println!("{name}: {expr}")
}
continue;
@ -108,7 +114,7 @@ fn parse(document: &str) {
let mut parser = Parser::new(Lexer::new(document));
loop {
match parser.parse::<Expr>(0) {
// Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break,
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break,
Err(e) => {
println!("\x1b[31m{e}\x1b[0m");
break;

View File

@ -13,6 +13,7 @@ pub mod numeric;
pub enum ParseError {
FromLexer(LexError),
Expected(TKind, Span),
NotLiteral(TKind, Span),
NotPattern(TKind, Span),
NotPrefix(TKind, Span),
NotInfix(TKind, Span),
@ -24,6 +25,7 @@ impl Display for ParseError {
match self {
Self::FromLexer(e) => e.fmt(f),
Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."),
Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."),
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."),
Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."),
@ -57,7 +59,7 @@ impl<'t> Parser<'t> {
}
/// Parses a value that implements the [Parse] trait.
pub fn parse<T: Parse<'t>>(&mut self, level: usize) -> PResult<T> {
pub fn parse<T: Parse<'t>>(&mut self, level: T::Prec) -> PResult<T> {
Parse::parse(self, level)
}
@ -107,17 +109,18 @@ impl<'t> Parser<'t> {
}
/// Parses a list of P separated by `sep` tokens, ending in an `end` token.
/// ```nobnf
/// ```ignore
/// List<T> = (T `sep`)* T? `end` ;
/// ```
pub fn list<P: Parse<'t>>(
&mut self,
mut elems: Vec<P>,
level: P::Prec,
sep: TKind,
end: TKind,
) -> PResult<Vec<P>> {
while self.peek_if(end).is_none() {
elems.push(self.parse(0)?);
elems.push(self.parse(level)?);
if self.next_if(sep).is_err() {
break;
}
@ -126,8 +129,26 @@ impl<'t> Parser<'t> {
Ok(elems)
}
/// Parses a list of one or more P at level `level`, separated by `sep` tokens
/// ```ignore
/// UnterminatedList<P> = P (`sep` P)*
/// ```
pub fn list_bare<P: Parse<'t>>(
&mut self,
mut elems: Vec<P>,
level: P::Prec,
sep: TKind,
) -> PResult<Vec<P>> {
loop {
elems.push(self.parse(level)?);
if self.next_if(sep).is_err() {
break Ok(elems);
}
}
}
/// Parses into an [`Option<P>`] if the next token is `next`
pub fn opt_if<P: Parse<'t>>(&mut self, level: usize, next: TKind) -> PResult<Option<P>> {
pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> {
Ok(match self.next_if(next) {
Ok(_) => Some(self.parse(level)?),
Err(_) => None,
@ -135,7 +156,7 @@ impl<'t> Parser<'t> {
}
/// Parses an expression into a vec unless the next token is `end`
pub fn opt<P: Parse<'t>>(&mut self, level: usize, end: TKind) -> PResult<Option<P>> {
pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
let out = match self.peek_if(end) {
None => Some(self.parse(level)?),
Some(_) => None,
@ -152,19 +173,25 @@ impl<'t> Parser<'t> {
}
pub trait Parse<'t> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
type Prec: Copy;
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
where Self: Sized;
}
impl<'t> Parse<'t> for Literal {
type Prec = usize;
fn parse(p: &mut Parser<'t>, _level: usize) -> PResult<Self> {
let tok = p.peek()?;
Ok(match tok.kind {
TKind::True => p.consume().then(Literal::Bool(true)),
TKind::False => p.consume().then(Literal::Bool(false)),
TKind::Character => {
Literal::Char(p.take_lexeme().expect("should have Token").remove(0))
}
TKind::Character => Literal::Char(
p.take_lexeme()
.expect("should have Token")
.chars()
.next()
.expect("should have one char in char literal"),
),
TKind::Integer => {
let Token { lexeme, kind: _, span } = p.take().expect("should have Token");
// TODO: more complex int parsing
@ -179,78 +206,128 @@ impl<'t> Parse<'t> for Literal {
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum PPrec {
Min,
Tuple,
Alt,
NoTopAlt,
}
impl<'t> Parse<'t> for Pat {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
type Prec = PPrec;
fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> {
while p.next_if(TKind::Comment).is_ok() {}
let tok = p.peek()?;
match tok.kind {
TKind::Comment => p.consume().parse(level),
// Prefix
let mut head = match tok.kind {
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
Ok(Pat::Lit(p.parse(0)?))
Pat::Lit(p.parse(0)?)
}
TKind::Identifier => match tok.lexeme.as_str() {
"_" => Ok(p.consume().then(Pat::Ignore)),
_ => Ok(Pat::Name(p.take_lexeme().expect("should have Token"))),
"_" => p.consume().then(Pat::Ignore),
_ => Pat::Name(p.take_lexeme().expect("should have Token")),
},
TKind::Grave => Ok(Pat::MetId(p.consume().next_if(TKind::Identifier)?.lexeme)),
TKind::DotDot => Ok(Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
TKind::Grave => Pat::MetId(p.consume().next_if(TKind::Identifier)?.lexeme),
TKind::DotDot => Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
Some(_) => Some(p.parse(level)?),
None => None,
})),
TKind::LParen => Ok(Pat::Tuple(p.consume().list(
vec![],
TKind::Comma,
TKind::RParen,
)?)),
TKind::LBrack => Ok(Pat::Slice(p.consume().list(
vec![],
TKind::Comma,
TKind::RBrack,
)?)),
_ => Err(ParseError::NotPattern(tok.kind, tok.span)),
}),
TKind::LParen => {
Pat::Tuple(
p.consume()
.list(vec![], PPrec::Tuple, TKind::Comma, TKind::RParen)?,
)
}
TKind::LBrack => {
Pat::Slice(
p.consume()
.list(vec![], PPrec::Tuple, TKind::Comma, TKind::RBrack)?,
)
}
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
};
// Infix
while let Ok(tok) = p.peek() {
let kind = tok.kind;
head = match kind {
TKind::Bar if level < PPrec::Alt => {
Pat::Alt(p.consume().list_bare(vec![head], PPrec::Alt, kind)?)
}
TKind::Comma if level < PPrec::Tuple => {
Pat::Tuple(p.consume().list_bare(vec![head], PPrec::Tuple, kind)?)
}
_ => break,
}
}
Ok(head)
}
}
impl<'t> Parse<'t> for MatchArm {
type Prec = usize;
fn parse(p: &mut Parser<'t>, _level: usize) -> PResult<Self> {
p.next_if(TKind::Bar).ok();
Ok(MatchArm(
p.list(vec![], TKind::Bar, TKind::FatArrow)?,
p.list(vec![], PPrec::Min, TKind::Bar, TKind::FatArrow)?,
p.parse(0)?,
))
}
}
impl<'t> Parse<'t> for MakeArm {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
Ok(MakeArm(p.next_if(TKind::Identifier)?.lexeme, {
p.next_if(TKind::Colon)
.ok()
.map(|_| p.parse(level))
.map(|_| p.parse(Prec::Min.value()))
.transpose()?
}))
}
}
/// Organizes the precedence hierarchy for syntactic elements
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum Prec {
Min,
/// The Semicolon Operator gets its own precedence level
Do,
/// An assignment
Assign,
/// Constructor for a tuple
Tuple,
/// Constructor for a struct
Make,
/// The body of a function, conditional, etc.
Body,
/// The short-circuiting logical operators [Prec::LogOr], [Prec::LogAnd]
Logical,
/// The short-circuiting "boolean or" operator
LogOr,
/// The short-circuiting "boolean and" operator
LogAnd,
/// Value comparison operators
Compare,
/// Constructor for a Range
Range,
/// Binary/bitwise operators
Binary,
/// Bit-shifting operators
Shift,
/// Addition and Subtraction operators
Factor,
/// Multiplication, Division, and Remainder operators
Term,
Project,
/// Negation, (De)reference, Try
Unary,
/// Place-projection operators
Project,
/// Array/Call subscripting and reference
Extend,
Max,
}
@ -274,96 +351,117 @@ impl Prec {
}
}
fn from_prefix(token: &Token) -> PResult<(Op, Prec)> {
/// PseudoOperator: fake operators used to give certain tokens special behavior.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Ps {
Id, // Identifier
Mid, // MetaIdentifier
Lit, // Literal
Let, // let Pat = Expr
Const, // const Pat = Expr
Fn, // fn ( Pat,* ) Expr
Lambda0, // || Expr
Lambda, // | Pat,* | Expr
DoubleRef, // && Expr
Make, // Expr{ Expr,* }
Match, // match Expr { MatchArm,* }
End, // Produces an empty value.
Op(Op), // A normal [ast::Op]
}
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Do => (Op::Do, Prec::Do),
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
(Op::Lit, Prec::Max)
(Ps::Lit, Prec::Max)
}
TKind::Identifier => (Op::Id, Prec::Max),
TKind::Grave => (Op::Mid, Prec::Max),
TKind::Fn => (Op::Fn, Prec::Body),
TKind::Match => (Op::Match, Prec::Body),
TKind::Macro => (Op::Macro, Prec::Assign),
TKind::Let => (Op::Let, Prec::Body),
TKind::Const => (Op::Const, Prec::Body),
TKind::Loop => (Op::Loop, Prec::Body),
TKind::If => (Op::If, Prec::Body),
TKind::While => (Op::While, Prec::Body),
TKind::Break => (Op::Break, Prec::Body),
TKind::Return => (Op::Return, Prec::Body),
TKind::Identifier => (Ps::Id, Prec::Max),
TKind::Grave => (Ps::Mid, Prec::Max),
TKind::Fn => (Ps::Fn, Prec::Body),
TKind::LBrack => (Op::Array, Prec::Min),
TKind::RBrack => (Op::End, Prec::Min),
TKind::LCurly => (Op::Block, Prec::Min),
TKind::RCurly => (Op::End, Prec::Min),
TKind::LParen => (Op::Group, Prec::Min),
TKind::RParen => (Op::End, Prec::Min),
TKind::Amp => (Op::Refer, Prec::Max),
// TKind::AmpAmp => todo!("addraddr"),
TKind::Bang => (Op::Not, Prec::Unary),
TKind::BangBang => (Op::Identity, Prec::Unary),
TKind::Bar => (Op::Lambda, Prec::Min),
TKind::BarBar => (Op::Lambda, Prec::Max),
TKind::DotDot => (Op::RangeEx, Prec::Range),
TKind::DotDotEq => (Op::RangeIn, Prec::Range),
TKind::Minus => (Op::Neg, Prec::Unary),
TKind::Plus => (Op::Identity, Prec::Unary),
TKind::Star => (Op::Deref, Prec::Unary),
TKind::Match => (Ps::Match, Prec::Body),
TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign),
TKind::Let => (Ps::Let, Prec::Body),
TKind::Const => (Ps::Const, Prec::Body),
TKind::Loop => (Ps::Op(Op::Loop), Prec::Body),
TKind::If => (Ps::Op(Op::If), Prec::Body),
TKind::While => (Ps::Op(Op::While), Prec::Body),
TKind::Break => (Ps::Op(Op::Break), Prec::Body),
TKind::Return => (Ps::Op(Op::Return), Prec::Body),
TKind::LCurly => (Ps::Op(Op::Block), Prec::Min),
TKind::RCurly => (Ps::End, Prec::Do),
TKind::LBrack => (Ps::Op(Op::Array), Prec::Min),
TKind::RBrack => (Ps::End, Prec::Tuple),
TKind::LParen => (Ps::Op(Op::Group), Prec::Min),
TKind::RParen => (Ps::End, Prec::Tuple),
TKind::Amp => (Ps::Op(Op::Refer), Prec::Extend),
TKind::AmpAmp => (Ps::DoubleRef, Prec::Extend),
TKind::Bang => (Ps::Op(Op::Not), Prec::Unary),
TKind::BangBang => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Bar => (Ps::Lambda, Prec::Body),
TKind::BarBar => (Ps::Lambda0, Prec::Body),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary),
TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Star => (Ps::Op(Op::Deref), Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?,
})
}
fn from_infix(token: &Token) -> PResult<(Op, Prec)> {
fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Semi => (Op::Do, Prec::Do), // the inspiration
TKind::RParen => (Op::End, Prec::Do),
TKind::Comma => (Op::Tuple, Prec::Tuple),
TKind::Eq => (Op::Set, Prec::Assign),
TKind::XorXor => (Op::LogXor, Prec::Logical),
TKind::AmpAmp => (Op::LogAnd, Prec::LogAnd),
TKind::BarBar => (Op::LogOr, Prec::LogOr),
TKind::Lt => (Op::Lt, Prec::Compare),
TKind::LtEq => (Op::Leq, Prec::Compare),
TKind::EqEq => (Op::Eq, Prec::Compare),
TKind::BangEq => (Op::Neq, Prec::Compare),
TKind::GtEq => (Op::Geq, Prec::Compare),
TKind::Gt => (Op::Gt, Prec::Compare),
TKind::DotDot => (Op::RangeEx, Prec::Range),
TKind::DotDotEq => (Op::RangeIn, Prec::Range),
TKind::Amp => (Op::And, Prec::Binary),
TKind::Xor => (Op::Xor, Prec::Binary),
TKind::Bar => (Op::Or, Prec::Binary),
TKind::LtLt => (Op::Shl, Prec::Shift),
TKind::GtGt => (Op::Shr, Prec::Shift),
TKind::Plus => (Op::Add, Prec::Factor),
TKind::Minus => (Op::Sub, Prec::Factor),
TKind::Star => (Op::Mul, Prec::Term),
TKind::Slash => (Op::Div, Prec::Term),
TKind::Rem => (Op::Rem, Prec::Term),
TKind::Dot => (Op::Dot, Prec::Project),
TKind::ColonColon => (Op::Path, Prec::Max),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::Eq => (Ps::Op(Op::Set), Prec::Assign),
TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical),
TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare),
TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare),
TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare),
TKind::BangEq => (Ps::Op(Op::Neq), Prec::Compare),
TKind::GtEq => (Ps::Op(Op::Geq), Prec::Compare),
TKind::Gt => (Ps::Op(Op::Gt), Prec::Compare),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Amp => (Ps::Op(Op::And), Prec::Binary),
TKind::Xor => (Ps::Op(Op::Xor), Prec::Binary),
TKind::Bar => (Ps::Op(Op::Or), Prec::Binary),
TKind::LtLt => (Ps::Op(Op::Shl), Prec::Shift),
TKind::GtGt => (Ps::Op(Op::Shr), Prec::Shift),
TKind::Plus => (Ps::Op(Op::Add), Prec::Factor),
TKind::Minus => (Ps::Op(Op::Sub), Prec::Factor),
TKind::Star => (Ps::Op(Op::Mul), Prec::Term),
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max),
TKind::Question => (Ps::End, Prec::Extend),
kind => Err(ParseError::NotInfix(kind, token.span))?,
})
}
fn from_postfix(token: &Token) -> PResult<(Op, Prec)> {
fn from_postfix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Question => (Op::Try, Prec::Unary),
TKind::LParen => (Op::Call, Prec::Extend),
TKind::LBrack => (Op::Index, Prec::Extend),
TKind::LCurly => (Op::Make, Prec::Make),
TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::LCurly => (Ps::Make, Prec::Make),
kind => Err(ParseError::NotPostfix(kind, token.span))?,
})
}
#[rustfmt::skip]
fn should_coagulate(prev: Op, op: Op) -> bool {
prev == op && (match prev {
Op::Do => true,
Op::Tuple => true,
prev == op && match prev {
Op::LogAnd => true,
Op::LogOr => true,
Op::Dot => false,
Op::Path => true,
Op::Lt => false,
@ -373,10 +471,12 @@ fn should_coagulate(prev: Op, op: Op) -> bool {
Op::Geq => false,
Op::Gt => false,
_ => false,
})
}
}
impl<'t> Parse<'t> for Expr {
type Prec = usize;
/// Parses an [Expr]ession.
///
/// The `level` parameter indicates the operator binding level of the expression.
@ -391,38 +491,44 @@ impl<'t> Parse<'t> for Expr {
let mut head = match op {
// Empty is returned when a block finisher is an expr prefix.
// It's the only expr that doesn't consume.
Op::End if level == Prec::Do.next() => Expr::Op(Op::Tuple, vec![]),
Op::End => Err(ParseError::NotPrefix(tok.kind, span))?,
Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]),
Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?,
Op::Id => Expr::Id(p.take_lexeme().expect("should have ident")),
Op::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme),
Op::Lit => Expr::Lit(p.parse(MIN)?),
Op::Let => Expr::Let(p.consume().parse(MIN)?, p.opt_if(prec.next(), TKind::Eq)?),
Op::Const => Expr::Const(p.consume().parse(prec.next())?, {
Ps::Id => Expr::Id(p.take_lexeme().expect("should have ident")),
Ps::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme),
Ps::Lit => Expr::Lit(p.parse(MIN)?),
Ps::Let => Expr::Let(
p.consume().parse(PPrec::NoTopAlt)?,
p.opt_if(prec.next(), TKind::Eq)?,
),
Ps::Const => Expr::Const(p.consume().parse(PPrec::NoTopAlt)?, {
p.next_if(TKind::Eq)?;
p.parse(prec.next())?
}),
Op::Macro => Expr::Op(
op,
Ps::Op(Op::Macro) => Expr::Op(
Op::Macro,
vec![p.consume().parse(prec.next())?, {
p.next_if(TKind::FatArrow)?;
p.parse(prec.next())?
}],
),
Op::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, {
Ps::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, {
p.next_if(TKind::LCurly)?;
p.list(vec![], TKind::Comma, TKind::RCurly)?
p.list(vec![], 0, TKind::Comma, TKind::RCurly)?
}),
Op::Block => Expr::Op(
op,
Ps::Op(Op::Block) => Expr::Op(
Op::Block,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Op::Array => Expr::Op(op, p.consume().list(vec![], TKind::Comma, TKind::RBrack)?),
Op::Group => match p.consume().opt(MIN, TKind::RParen)? {
Ps::Op(Op::Array) => Expr::Op(
Op::Array,
p.consume().list(vec![], 0, TKind::Comma, TKind::RBrack)?,
),
Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
Op::If | Op::While => {
Ps::Op(op @ (Op::If | Op::While)) => {
p.consume();
let exprs = vec![
// conditional restricted to Logical operators or above
@ -430,50 +536,68 @@ impl<'t> Parse<'t> for Expr {
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?,
_ => Expr::Op(Op::End, vec![]).anno(span.merge(p.span())),
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Op::Fn => {
Ps::Fn => {
// TODO: move this to 'item' parsing
p.consume().next_if(TKind::LParen)?;
Expr::Fn(
p.list(vec![], TKind::Comma, TKind::RParen)?,
Pat::Tuple(p.consume().list(
vec![],
PPrec::Tuple,
TKind::Comma,
TKind::RParen,
)?),
p.parse(prec.next())?,
)
}
// dirty hack: There are two closure operators, signaled by returned prec.
Op::Lambda if prec == Prec::Min => Expr::Fn(
p.consume().list(vec![], TKind::Comma, TKind::Bar)?,
Ps::Lambda => Expr::Fn(
Pat::Tuple(
p.consume()
.list(vec![], PPrec::Tuple, TKind::Comma, TKind::Bar)?,
),
p.parse(Prec::Body.next())?,
),
Op::Lambda => Expr::Fn(vec![], p.consume().parse(Prec::Body.next())?),
Ps::Lambda0 => Expr::Fn(Pat::Tuple(vec![]), p.consume().parse(Prec::Body.next())?),
Ps::DoubleRef => Expr::Op(
Op::Refer,
vec![Expr::Op(Op::Refer, vec![p.consume().parse(prec.next())?]).anno(span)],
),
_ => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
_ => unimplemented!("prefix {op:?}"),
};
// Postfix
while let Ok(tok) = p.peek()
&& let Ok((op, prec)) = from_postfix(tok)
&& level <= prec.prev()
&& op != Op::End
&& op != Ps::End
{
let kind = tok.kind;
let span = span.merge(p.span());
p.consume();
head = match op {
Op::Make => Expr::Make(
Ps::Make => Expr::Make(
head.anno(span).into(),
p.consume().list(vec![], TKind::Comma, TKind::RCurly)?,
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
),
Op::Index => Expr::Op(
op,
p.list(vec![head.anno(span)], TKind::Comma, TKind::RBrack)?,
Ps::Op(Op::Index) => Expr::Op(
Op::Index,
p.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?,
),
Op::Call => Expr::Op(
op,
p.list(vec![head.anno(span)], TKind::Comma, TKind::RParen)?,
Ps::Op(Op::Call) => Expr::Op(
Op::Call,
p.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RParen)?,
),
_ => Expr::Op(op, vec![head.anno(span)]),
Ps::Op(op @ (Op::Do | Op::Tuple | Op::Dot | Op::Path | Op::LogAnd | Op::LogOr)) => {
Expr::Op(op, p.list_bare(vec![head.anno(span)], prec.next(), kind)?)
}
Ps::Op(op) => Expr::Op(op, vec![head.anno(span)]),
_ => unimplemented!("postfix {op:?}"),
};
}
@ -481,18 +605,19 @@ impl<'t> Parse<'t> for Expr {
while let Ok(tok) = p.peek()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Op::End
&& op != Ps::End
{
let span = span.merge(p.span());
p.consume();
head = match head {
head = match (op, head) {
// controls expression chaining vs coagulating
Expr::Op(prev, mut args) if should_coagulate(prev, op) => {
(Ps::Op(op), Expr::Op(prev, mut args)) if should_coagulate(prev, op) => {
args.push(p.parse(prec.next())?);
Expr::Op(op, args)
}
head => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]),
(Ps::Op(op), head) => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]),
_ => unimplemented!("infix {op:?}"),
}
}
@ -501,15 +626,19 @@ impl<'t> Parse<'t> for Expr {
}
impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno<P> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
type Prec = P::Prec;
fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
where Self: Sized {
let start = p.span();
Ok(Anno(p.parse(level)?, start.merge(p.span())))
let anno = Anno(p.parse(level)?, start.merge(p.span()));
println!("{}:\t{anno}", anno.1);
Ok(anno)
}
}
impl<'t, P: Parse<'t>> Parse<'t> for Box<P> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
type Prec = P::Prec;
fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
where Self: Sized {
Ok(Box::new(p.parse(level)?))
}