From b6949147c41406f53c2f8424d6723445f82f145b Mon Sep 17 00:00:00 2001 From: John Date: Mon, 15 Sep 2025 10:28:08 -0400 Subject: [PATCH] parser: Refactor coagulated binops as postfix operators This allows them to intermingle more nicely with `Try` --- src/ast.rs | 82 +++----- src/ast/macro_matcher.rs | 3 + src/fmt.rs | 26 ++- src/lexer.rs | 4 +- src/main.rs | 16 +- src/parser.rs | 415 +++++++++++++++++++++++++-------------- 6 files changed, 338 insertions(+), 208 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 6838d42..6cb55e1 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,6 +1,6 @@ //! The Abstract Syntax Tree defines an interface between the parser and type checker -pub mod matcher; +pub mod macro_matcher; /// A value with an annotation. #[derive(Clone, Debug, PartialEq, Eq)] @@ -10,6 +10,7 @@ pub struct Anno(pub T, pub A); pub trait Annotation: Clone + std::fmt::Display + std::fmt::Debug + PartialEq + Eq {} impl Annotation for T {} +/// A literal value (boolean, character, integer, string) #[derive(Clone, Debug, PartialEq, Eq)] pub enum Literal { /// A boolean literal: true | false @@ -25,13 +26,22 @@ pub enum Literal { /// Binding patterns for each kind of matchable [Ty] #[derive(Clone, Debug, PartialEq, Eq)] pub enum Pat { + /// Matches anything without binding Ignore, + /// Matches nothing; used for macro substitution. MetId(String), + /// Matches anything, and binds it to a name Name(String), + /// Matches a partial decomposition (`..rest`) or upper-bounded range (`..100`). Rest(Option>), + /// Matches a literal value by equality comparison Lit(Literal), + /// Matches the elements of a tuple Tuple(Vec), + /// Matches the elements Slice(Vec), + /// Matches one of the provided alternates + Alt(Vec), } /// The arms of a make expression @@ -53,6 +63,8 @@ pub struct MatchArm(pub Vec, pub Anno, A>); pub enum Ty { /// `_` Infer, + /// `(Identifier :: )* Identifier` + Named(String), /// `(..Tys)` Tuple(Vec), /// `[Ty]` @@ -72,12 +84,12 @@ pub enum Expr { MetId(String), /// A literal bool, string, char, or int Lit(Literal), - /// let pattern = expr + /// let Pat = expr Let(Pat, Option>>), - /// `const Pat (= Expr)?` (Basically let rec) + /// `const Pat (= Expr)?` (Basically let rec) Const(Pat, Box>), - /// `| Pat,* | Expr` | `|| Expr` | `fn (Pat,*) Expr` - Fn(Vec, Box>), + /// `| Pat | Expr` | `|| Expr` | `fn (Pat,*) Expr` + Fn(Pat, Box>), /// Expr { (Ident (: Expr)?),* } Make(Box>, Vec>), /// match Expr { MatchArm,* } @@ -101,50 +113,22 @@ impl Expr { | Self::Op(Op::Deref, _) ) } - - // pub fn is_assignee(&self) -> bool { - // match self { - // Self::Id(_) => todo!(), - // Self::MetId(_) => todo!(), - // Self::Lit(literal) => todo!(), - // Self::Let(pat, anno) => todo!(), - // Self::Const(pat, anno) => todo!(), - // Self::Fn(pats, anno) => todo!(), - // Self::Make(anno, make_arms) => todo!(), - // Self::Match(anno, match_arms) => todo!(), - // Self::Op(Op::Add, annos) => todo!(), - // Self::Op(Op::And, _) => false, - // } - // } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Op { - // -- fake operators used to assign precedences to special forms - Id, // Identifier - Mid, // MetaIdentifier - Lit, // Literal - Let, // let Pat = Expr - Const, // const Pat = Expr - Fn, // fn ( Pat,* ) Expr - Make, // Expr{ Expr,* } - Macro, // macro Expr => Expr - Match, // match Expr { MatchArm,* } - End, // Produces an empty value. - // -- true operators Do, // Expr ; Expr + Macro, // macro Expr => Expr Block, // { Expr } Array, // [ Expr,* ] Group, // ( Expr ,?) - Tuple, // ( Expr,* ) + Tuple, // Expr (, Expr)* Try, // Expr '?' Index, // Expr [ Expr,* ] Call, // Expr ( Expr,* ) - Lambda, // |Pat?| Expr - Loop, // loop Expr If, // if Expr Expr (else Expr)? While, // while Expr Expr (else Expr)? @@ -220,20 +204,23 @@ impl Display for Expr { Self::Let(pat, None) => write!(f, "let {pat}"), Self::Const(pat, expr) => write!(f, "const {pat} = {expr}"), Self::Make(expr, make_arms) => { - f.delimit(fmt!("make {expr} {{"), "}").list(make_arms, ", ") + f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ") } Self::Match(expr, match_arms) => f - .delimit_indented(fmt!("match {expr} {{\n"), "\n}") - .list_end(match_arms, ",\n", ","), - Self::Fn(pats, expr) => f.delimit("fn (", fmt!(") {expr}")).list(pats, ", "), + .delimit_indented(fmt!("match {expr} {{\n"), "}") + .list_wrap("\n", match_arms, ",\n", ",\n"), + Self::Fn(pat, expr) => write!(f, "fn {pat} {expr}"), Self::Op(op @ (Op::If | Op::While), exprs) => match exprs.as_slice() { [cond, pass, fail] => write!(f, "{op}{cond} {pass} else {fail}"), other => f.delimit(fmt!("({op}, "), ")").list(other, ", "), }, Self::Op(Op::Array, exprs) => f.delimit("[", "]").list(exprs, ", "), - Self::Op(Op::Block, exprs) => f.delimit_indented("{\n", "\n}").list(exprs, ", "), + Self::Op(Op::Block, exprs) => f + .delimit_indented("{", "}") + .list_wrap("\n", exprs, "\n", "\n"), Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "), + Self::Op(Op::Group, exprs) => f.list(exprs, ", "), Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() { [callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "), @@ -246,9 +233,9 @@ impl Display for Expr { Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"), Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "), - Self::Op(op @ Op::Try, exprs) => f.delimit("", op).list(exprs, ", "), + Self::Op(op @ Op::Try, exprs) => f.delimit("(", fmt!("){op}")).list(exprs, ", "), Self::Op(op, exprs) => match exprs.as_slice() { - [_] => f.delimit(op, "").list(exprs, ", "), + [one] => write!(f, "{op}({one})"), many => f.delimit("(", ")").list(many, op), }, } @@ -259,15 +246,7 @@ impl Display for Op { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Op::Do => "; ".fmt(f), - Op::Id => "_".fmt(f), - Op::Mid => "`".fmt(f), - Op::Lit => "##".fmt(f), - Op::Let => "let ".fmt(f), - Op::Const => "const ".fmt(f), - Op::Fn => "fn ".fmt(f), Op::Macro => "macro ".fmt(f), - Op::Match => "match ".fmt(f), - Op::End => "()".fmt(f), Op::Block => "{}".fmt(f), Op::Array => "[]".fmt(f), Op::Group => "()".fmt(f), @@ -275,8 +254,6 @@ impl Display for Op { Op::Try => "?".fmt(f), Op::Index => "".fmt(f), Op::Call => "".fmt(f), - Op::Make => "".fmt(f), - Op::Lambda => "".fmt(f), Op::Loop => "loop ".fmt(f), Op::If => "if ".fmt(f), Op::While => "while ".fmt(f), @@ -342,6 +319,7 @@ impl Display for Pat { Self::Rest(None) => write!(f, ".."), Self::Tuple(pats) => f.delimit("(", ")").list(pats, ", "), Self::Slice(pats) => f.delimit("[", "]").list(pats, ", "), + Self::Alt(pats) => f.delimit("<", ">").list(pats, " | "), } } } diff --git a/src/ast/macro_matcher.rs b/src/ast/macro_matcher.rs index f289ea9..e6d7764 100644 --- a/src/ast/macro_matcher.rs +++ b/src/ast/macro_matcher.rs @@ -191,6 +191,8 @@ impl Match for Pat { (Pat::Tuple(_), _) => false, (Pat::Slice(pat), Pat::Slice(expr)) => Match::recurse(sub, pat, expr), (Pat::Slice(_), _) => false, + (Pat::Alt(pat), Pat::Alt(expr)) => Match::recurse(sub, pat, expr), + (Pat::Alt(_), _) => false, } } @@ -205,6 +207,7 @@ impl Match for Pat { Pat::Rest(pat) => pat.apply(sub), Pat::Tuple(pats) => pats.apply(sub), Pat::Slice(pats) => pats.apply(sub), + Pat::Alt(pats) => pats.apply(sub), } } } diff --git a/src/fmt.rs b/src/fmt.rs index 2839dc5..12bb311 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -31,7 +31,7 @@ pub trait FmtAdapter: Write { /// Formats bracketed lists of the kind (Item (Comma Item)*)? #[inline] fn list(&mut self, items: &[Item], sep: Sep) -> std::fmt::Result { - self.list_end(items, sep, "") + self.list_wrap("", items, sep, "") } fn list_end( @@ -40,15 +40,31 @@ pub trait FmtAdapter: Write { sep: Sep, end: End, ) -> std::fmt::Result { - let mut pats = items; - while let [pat, rest @ ..] = pats { + self.list_wrap("", items, sep, end) + } + + /// Wraps a list in `open` and `close`. + /// This differs from [`FmtAdapter::delimit`] because it prints nothing + /// if the list is empty. + fn list_wrap( + &mut self, + open: O, + mut items: &[Item], + sep: Sep, + close: E, + ) -> std::fmt::Result { + if items.is_empty() { + return Ok(()); + } + write!(self, "{open}")?; + while let [pat, rest @ ..] = items { write!(self, "{pat}")?; if !rest.is_empty() { write!(self, "{sep}")?; } - pats = rest + items = rest } - write!(self, "{end}") + write!(self, "{close}") } } diff --git a/src/lexer.rs b/src/lexer.rs index e54044c..9b22d8f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -46,9 +46,7 @@ impl<'t> Lexer<'t> { fn advance_tail(&mut self) { match self.iter.peek() { Some(&(idx, _)) => self.tail = idx as u32, - None => { - self.tail = self.text.len() as _; - } + None => self.tail = self.text.len() as _, } } diff --git a/src/main.rs b/src/main.rs index 03f7288..3efe9af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use doughlang::{ ast::{ Expr, - matcher::{Match, Subst}, + macro_matcher::{Match, Subst}, }, lexer::{LexError, Lexer}, parser::{ParseError, Parser}, @@ -32,6 +32,7 @@ fn main() -> Result<(), Box> { Ok(Response::Deny) } _ => { + lex(line); parse(line); Ok(Response::Accept) } @@ -82,14 +83,19 @@ fn subst() -> Result<(), Box> { continue; }; - if p.next_if(TKind::Colon).is_err() { + if p.next_if(TKind::Arrow).is_err() { let Some(Subst { exp, pat }) = exp.construct(&pat) else { + println!("Match failed: {exp} <- {pat}"); continue; }; - for (name, pat) in pat.iter() { + let mut pats: Vec<_> = pat.into_iter().collect(); + pats.sort_by(|(a, _), (b, _)| a.cmp(b)); + for (name, pat) in pats { println!("{name}: {pat}") } - for (name, expr) in exp.iter() { + let mut exprs: Vec<_> = exp.into_iter().collect(); + exprs.sort_by(|(a, _), (b, _)| a.cmp(b)); + for (name, expr) in exprs.iter() { println!("{name}: {expr}") } continue; @@ -108,7 +114,7 @@ fn parse(document: &str) { let mut parser = Parser::new(Lexer::new(document)); loop { match parser.parse::(0) { - // Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break, + Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break, Err(e) => { println!("\x1b[31m{e}\x1b[0m"); break; diff --git a/src/parser.rs b/src/parser.rs index a6085ec..cea44c1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -13,6 +13,7 @@ pub mod numeric; pub enum ParseError { FromLexer(LexError), Expected(TKind, Span), + NotLiteral(TKind, Span), NotPattern(TKind, Span), NotPrefix(TKind, Span), NotInfix(TKind, Span), @@ -24,6 +25,7 @@ impl Display for ParseError { match self { Self::FromLexer(e) => e.fmt(f), Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."), + Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."), Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."), Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."), Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."), @@ -57,7 +59,7 @@ impl<'t> Parser<'t> { } /// Parses a value that implements the [Parse] trait. - pub fn parse>(&mut self, level: usize) -> PResult { + pub fn parse>(&mut self, level: T::Prec) -> PResult { Parse::parse(self, level) } @@ -107,17 +109,18 @@ impl<'t> Parser<'t> { } /// Parses a list of P separated by `sep` tokens, ending in an `end` token. - /// ```nobnf + /// ```ignore /// List = (T `sep`)* T? `end` ; /// ``` pub fn list>( &mut self, mut elems: Vec

, + level: P::Prec, sep: TKind, end: TKind, ) -> PResult> { while self.peek_if(end).is_none() { - elems.push(self.parse(0)?); + elems.push(self.parse(level)?); if self.next_if(sep).is_err() { break; } @@ -126,8 +129,26 @@ impl<'t> Parser<'t> { Ok(elems) } + /// Parses a list of one or more P at level `level`, separated by `sep` tokens + /// ```ignore + /// UnterminatedList

= P (`sep` P)* + /// ``` + pub fn list_bare>( + &mut self, + mut elems: Vec

, + level: P::Prec, + sep: TKind, + ) -> PResult> { + loop { + elems.push(self.parse(level)?); + if self.next_if(sep).is_err() { + break Ok(elems); + } + } + } + /// Parses into an [`Option

`] if the next token is `next` - pub fn opt_if>(&mut self, level: usize, next: TKind) -> PResult> { + pub fn opt_if>(&mut self, level: P::Prec, next: TKind) -> PResult> { Ok(match self.next_if(next) { Ok(_) => Some(self.parse(level)?), Err(_) => None, @@ -135,7 +156,7 @@ impl<'t> Parser<'t> { } /// Parses an expression into a vec unless the next token is `end` - pub fn opt>(&mut self, level: usize, end: TKind) -> PResult> { + pub fn opt>(&mut self, level: P::Prec, end: TKind) -> PResult> { let out = match self.peek_if(end) { None => Some(self.parse(level)?), Some(_) => None, @@ -152,19 +173,25 @@ impl<'t> Parser<'t> { } pub trait Parse<'t> { - fn parse(p: &mut Parser<'t>, level: usize) -> PResult + type Prec: Copy; + fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult where Self: Sized; } impl<'t> Parse<'t> for Literal { + type Prec = usize; fn parse(p: &mut Parser<'t>, _level: usize) -> PResult { let tok = p.peek()?; Ok(match tok.kind { TKind::True => p.consume().then(Literal::Bool(true)), TKind::False => p.consume().then(Literal::Bool(false)), - TKind::Character => { - Literal::Char(p.take_lexeme().expect("should have Token").remove(0)) - } + TKind::Character => Literal::Char( + p.take_lexeme() + .expect("should have Token") + .chars() + .next() + .expect("should have one char in char literal"), + ), TKind::Integer => { let Token { lexeme, kind: _, span } = p.take().expect("should have Token"); // TODO: more complex int parsing @@ -179,78 +206,128 @@ impl<'t> Parse<'t> for Literal { } } +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum PPrec { + Min, + Tuple, + Alt, + NoTopAlt, +} + impl<'t> Parse<'t> for Pat { - fn parse(p: &mut Parser<'t>, level: usize) -> PResult { + type Prec = PPrec; + fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult { + while p.next_if(TKind::Comment).is_ok() {} let tok = p.peek()?; - match tok.kind { - TKind::Comment => p.consume().parse(level), + + // Prefix + let mut head = match tok.kind { TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { - Ok(Pat::Lit(p.parse(0)?)) + Pat::Lit(p.parse(0)?) } TKind::Identifier => match tok.lexeme.as_str() { - "_" => Ok(p.consume().then(Pat::Ignore)), - _ => Ok(Pat::Name(p.take_lexeme().expect("should have Token"))), + "_" => p.consume().then(Pat::Ignore), + _ => Pat::Name(p.take_lexeme().expect("should have Token")), }, - TKind::Grave => Ok(Pat::MetId(p.consume().next_if(TKind::Identifier)?.lexeme)), - TKind::DotDot => Ok(Pat::Rest(match p.consume().peek_if(TKind::Identifier) { + TKind::Grave => Pat::MetId(p.consume().next_if(TKind::Identifier)?.lexeme), + TKind::DotDot => Pat::Rest(match p.consume().peek_if(TKind::Identifier) { Some(_) => Some(p.parse(level)?), None => None, - })), - TKind::LParen => Ok(Pat::Tuple(p.consume().list( - vec![], - TKind::Comma, - TKind::RParen, - )?)), - TKind::LBrack => Ok(Pat::Slice(p.consume().list( - vec![], - TKind::Comma, - TKind::RBrack, - )?)), - _ => Err(ParseError::NotPattern(tok.kind, tok.span)), + }), + TKind::LParen => { + Pat::Tuple( + p.consume() + .list(vec![], PPrec::Tuple, TKind::Comma, TKind::RParen)?, + ) + } + TKind::LBrack => { + Pat::Slice( + p.consume() + .list(vec![], PPrec::Tuple, TKind::Comma, TKind::RBrack)?, + ) + } + _ => Err(ParseError::NotPattern(tok.kind, tok.span))?, + }; + + // Infix + while let Ok(tok) = p.peek() { + let kind = tok.kind; + + head = match kind { + TKind::Bar if level < PPrec::Alt => { + Pat::Alt(p.consume().list_bare(vec![head], PPrec::Alt, kind)?) + } + TKind::Comma if level < PPrec::Tuple => { + Pat::Tuple(p.consume().list_bare(vec![head], PPrec::Tuple, kind)?) + } + _ => break, + } } + + Ok(head) } } impl<'t> Parse<'t> for MatchArm { + type Prec = usize; fn parse(p: &mut Parser<'t>, _level: usize) -> PResult { p.next_if(TKind::Bar).ok(); Ok(MatchArm( - p.list(vec![], TKind::Bar, TKind::FatArrow)?, + p.list(vec![], PPrec::Min, TKind::Bar, TKind::FatArrow)?, p.parse(0)?, )) } } impl<'t> Parse<'t> for MakeArm { - fn parse(p: &mut Parser<'t>, level: usize) -> PResult { + type Prec = (); + fn parse(p: &mut Parser<'t>, _level: ()) -> PResult { Ok(MakeArm(p.next_if(TKind::Identifier)?.lexeme, { p.next_if(TKind::Colon) .ok() - .map(|_| p.parse(level)) + .map(|_| p.parse(Prec::Min.value())) .transpose()? })) } } +/// Organizes the precedence hierarchy for syntactic elements #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] enum Prec { Min, + /// The Semicolon Operator gets its own precedence level Do, + /// An assignment Assign, + /// Constructor for a tuple Tuple, + /// Constructor for a struct Make, + /// The body of a function, conditional, etc. Body, + /// The short-circuiting logical operators [Prec::LogOr], [Prec::LogAnd] Logical, + /// The short-circuiting "boolean or" operator LogOr, + /// The short-circuiting "boolean and" operator LogAnd, + /// Value comparison operators Compare, + /// Constructor for a Range Range, + /// Binary/bitwise operators Binary, + /// Bit-shifting operators Shift, + /// Addition and Subtraction operators Factor, + /// Multiplication, Division, and Remainder operators Term, - Project, + /// Negation, (De)reference, Try Unary, + /// Place-projection operators + Project, + /// Array/Call subscripting and reference Extend, Max, } @@ -274,96 +351,117 @@ impl Prec { } } -fn from_prefix(token: &Token) -> PResult<(Op, Prec)> { +/// PseudoOperator: fake operators used to give certain tokens special behavior. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Ps { + Id, // Identifier + Mid, // MetaIdentifier + Lit, // Literal + Let, // let Pat = Expr + Const, // const Pat = Expr + Fn, // fn ( Pat,* ) Expr + Lambda0, // || Expr + Lambda, // | Pat,* | Expr + DoubleRef, // && Expr + Make, // Expr{ Expr,* } + Match, // match Expr { MatchArm,* } + End, // Produces an empty value. + Op(Op), // A normal [ast::Op] +} + +fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { Ok(match token.kind { - TKind::Do => (Op::Do, Prec::Do), + TKind::Do => (Ps::Op(Op::Do), Prec::Do), TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { - (Op::Lit, Prec::Max) + (Ps::Lit, Prec::Max) } - TKind::Identifier => (Op::Id, Prec::Max), - TKind::Grave => (Op::Mid, Prec::Max), - TKind::Fn => (Op::Fn, Prec::Body), - TKind::Match => (Op::Match, Prec::Body), - TKind::Macro => (Op::Macro, Prec::Assign), - TKind::Let => (Op::Let, Prec::Body), - TKind::Const => (Op::Const, Prec::Body), - TKind::Loop => (Op::Loop, Prec::Body), - TKind::If => (Op::If, Prec::Body), - TKind::While => (Op::While, Prec::Body), - TKind::Break => (Op::Break, Prec::Body), - TKind::Return => (Op::Return, Prec::Body), + TKind::Identifier => (Ps::Id, Prec::Max), + TKind::Grave => (Ps::Mid, Prec::Max), + TKind::Fn => (Ps::Fn, Prec::Body), - TKind::LBrack => (Op::Array, Prec::Min), - TKind::RBrack => (Op::End, Prec::Min), - TKind::LCurly => (Op::Block, Prec::Min), - TKind::RCurly => (Op::End, Prec::Min), - TKind::LParen => (Op::Group, Prec::Min), - TKind::RParen => (Op::End, Prec::Min), - TKind::Amp => (Op::Refer, Prec::Max), - // TKind::AmpAmp => todo!("addraddr"), - TKind::Bang => (Op::Not, Prec::Unary), - TKind::BangBang => (Op::Identity, Prec::Unary), - TKind::Bar => (Op::Lambda, Prec::Min), - TKind::BarBar => (Op::Lambda, Prec::Max), - TKind::DotDot => (Op::RangeEx, Prec::Range), - TKind::DotDotEq => (Op::RangeIn, Prec::Range), - TKind::Minus => (Op::Neg, Prec::Unary), - TKind::Plus => (Op::Identity, Prec::Unary), - TKind::Star => (Op::Deref, Prec::Unary), + TKind::Match => (Ps::Match, Prec::Body), + TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign), + TKind::Let => (Ps::Let, Prec::Body), + TKind::Const => (Ps::Const, Prec::Body), + TKind::Loop => (Ps::Op(Op::Loop), Prec::Body), + TKind::If => (Ps::Op(Op::If), Prec::Body), + TKind::While => (Ps::Op(Op::While), Prec::Body), + TKind::Break => (Ps::Op(Op::Break), Prec::Body), + TKind::Return => (Ps::Op(Op::Return), Prec::Body), + + TKind::LCurly => (Ps::Op(Op::Block), Prec::Min), + TKind::RCurly => (Ps::End, Prec::Do), + TKind::LBrack => (Ps::Op(Op::Array), Prec::Min), + TKind::RBrack => (Ps::End, Prec::Tuple), + TKind::LParen => (Ps::Op(Op::Group), Prec::Min), + TKind::RParen => (Ps::End, Prec::Tuple), + TKind::Amp => (Ps::Op(Op::Refer), Prec::Extend), + TKind::AmpAmp => (Ps::DoubleRef, Prec::Extend), + TKind::Bang => (Ps::Op(Op::Not), Prec::Unary), + TKind::BangBang => (Ps::Op(Op::Identity), Prec::Unary), + TKind::Bar => (Ps::Lambda, Prec::Body), + TKind::BarBar => (Ps::Lambda0, Prec::Body), + TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range), + TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range), + TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary), + TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary), + TKind::Star => (Ps::Op(Op::Deref), Prec::Unary), kind => Err(ParseError::NotPrefix(kind, token.span))?, }) } -fn from_infix(token: &Token) -> PResult<(Op, Prec)> { +fn from_infix(token: &Token) -> PResult<(Ps, Prec)> { Ok(match token.kind { - TKind::Semi => (Op::Do, Prec::Do), // the inspiration - TKind::RParen => (Op::End, Prec::Do), - TKind::Comma => (Op::Tuple, Prec::Tuple), - TKind::Eq => (Op::Set, Prec::Assign), - TKind::XorXor => (Op::LogXor, Prec::Logical), - TKind::AmpAmp => (Op::LogAnd, Prec::LogAnd), - TKind::BarBar => (Op::LogOr, Prec::LogOr), - TKind::Lt => (Op::Lt, Prec::Compare), - TKind::LtEq => (Op::Leq, Prec::Compare), - TKind::EqEq => (Op::Eq, Prec::Compare), - TKind::BangEq => (Op::Neq, Prec::Compare), - TKind::GtEq => (Op::Geq, Prec::Compare), - TKind::Gt => (Op::Gt, Prec::Compare), - TKind::DotDot => (Op::RangeEx, Prec::Range), - TKind::DotDotEq => (Op::RangeIn, Prec::Range), - TKind::Amp => (Op::And, Prec::Binary), - TKind::Xor => (Op::Xor, Prec::Binary), - TKind::Bar => (Op::Or, Prec::Binary), - TKind::LtLt => (Op::Shl, Prec::Shift), - TKind::GtGt => (Op::Shr, Prec::Shift), - TKind::Plus => (Op::Add, Prec::Factor), - TKind::Minus => (Op::Sub, Prec::Factor), - TKind::Star => (Op::Mul, Prec::Term), - TKind::Slash => (Op::Div, Prec::Term), - TKind::Rem => (Op::Rem, Prec::Term), - TKind::Dot => (Op::Dot, Prec::Project), - TKind::ColonColon => (Op::Path, Prec::Max), + TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max), + TKind::Eq => (Ps::Op(Op::Set), Prec::Assign), + TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical), + TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare), + TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare), + TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare), + TKind::BangEq => (Ps::Op(Op::Neq), Prec::Compare), + TKind::GtEq => (Ps::Op(Op::Geq), Prec::Compare), + TKind::Gt => (Ps::Op(Op::Gt), Prec::Compare), + TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range), + TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range), + TKind::Amp => (Ps::Op(Op::And), Prec::Binary), + TKind::Xor => (Ps::Op(Op::Xor), Prec::Binary), + TKind::Bar => (Ps::Op(Op::Or), Prec::Binary), + TKind::LtLt => (Ps::Op(Op::Shl), Prec::Shift), + TKind::GtGt => (Ps::Op(Op::Shr), Prec::Shift), + TKind::Plus => (Ps::Op(Op::Add), Prec::Factor), + TKind::Minus => (Ps::Op(Op::Sub), Prec::Factor), + TKind::Star => (Ps::Op(Op::Mul), Prec::Term), + TKind::Slash => (Ps::Op(Op::Div), Prec::Term), + TKind::Rem => (Ps::Op(Op::Rem), Prec::Term), + TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max), + TKind::Question => (Ps::End, Prec::Extend), kind => Err(ParseError::NotInfix(kind, token.span))?, }) } -fn from_postfix(token: &Token) -> PResult<(Op, Prec)> { +fn from_postfix(token: &Token) -> PResult<(Ps, Prec)> { Ok(match token.kind { - TKind::Question => (Op::Try, Prec::Unary), - TKind::LParen => (Op::Call, Prec::Extend), - TKind::LBrack => (Op::Index, Prec::Extend), - TKind::LCurly => (Op::Make, Prec::Make), + TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration + TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple), + TKind::Dot => (Ps::Op(Op::Dot), Prec::Project), + TKind::ColonColon => (Ps::Op(Op::Path), Prec::Max), + TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd), + TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr), + TKind::Question => (Ps::Op(Op::Try), Prec::Unary), + TKind::LParen => (Ps::Op(Op::Call), Prec::Extend), + TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend), + TKind::LCurly => (Ps::Make, Prec::Make), kind => Err(ParseError::NotPostfix(kind, token.span))?, }) } #[rustfmt::skip] fn should_coagulate(prev: Op, op: Op) -> bool { - prev == op && (match prev { - Op::Do => true, - Op::Tuple => true, + prev == op && match prev { + Op::LogAnd => true, + Op::LogOr => true, Op::Dot => false, Op::Path => true, Op::Lt => false, @@ -373,10 +471,12 @@ fn should_coagulate(prev: Op, op: Op) -> bool { Op::Geq => false, Op::Gt => false, _ => false, - }) + } } impl<'t> Parse<'t> for Expr { + type Prec = usize; + /// Parses an [Expr]ession. /// /// The `level` parameter indicates the operator binding level of the expression. @@ -391,38 +491,44 @@ impl<'t> Parse<'t> for Expr { let mut head = match op { // Empty is returned when a block finisher is an expr prefix. // It's the only expr that doesn't consume. - Op::End if level == Prec::Do.next() => Expr::Op(Op::Tuple, vec![]), - Op::End => Err(ParseError::NotPrefix(tok.kind, span))?, + Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]), + Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?, - Op::Id => Expr::Id(p.take_lexeme().expect("should have ident")), - Op::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme), - Op::Lit => Expr::Lit(p.parse(MIN)?), - Op::Let => Expr::Let(p.consume().parse(MIN)?, p.opt_if(prec.next(), TKind::Eq)?), - Op::Const => Expr::Const(p.consume().parse(prec.next())?, { + Ps::Id => Expr::Id(p.take_lexeme().expect("should have ident")), + Ps::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme), + Ps::Lit => Expr::Lit(p.parse(MIN)?), + Ps::Let => Expr::Let( + p.consume().parse(PPrec::NoTopAlt)?, + p.opt_if(prec.next(), TKind::Eq)?, + ), + Ps::Const => Expr::Const(p.consume().parse(PPrec::NoTopAlt)?, { p.next_if(TKind::Eq)?; p.parse(prec.next())? }), - Op::Macro => Expr::Op( - op, + Ps::Op(Op::Macro) => Expr::Op( + Op::Macro, vec![p.consume().parse(prec.next())?, { p.next_if(TKind::FatArrow)?; p.parse(prec.next())? }], ), - Op::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, { + Ps::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, { p.next_if(TKind::LCurly)?; - p.list(vec![], TKind::Comma, TKind::RCurly)? + p.list(vec![], 0, TKind::Comma, TKind::RCurly)? }), - Op::Block => Expr::Op( - op, + Ps::Op(Op::Block) => Expr::Op( + Op::Block, p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(), ), - Op::Array => Expr::Op(op, p.consume().list(vec![], TKind::Comma, TKind::RBrack)?), - Op::Group => match p.consume().opt(MIN, TKind::RParen)? { + Ps::Op(Op::Array) => Expr::Op( + Op::Array, + p.consume().list(vec![], 0, TKind::Comma, TKind::RBrack)?, + ), + Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? { Some(value) => Expr::Op(Op::Group, vec![value]), None => Expr::Op(Op::Tuple, vec![]), }, - Op::If | Op::While => { + Ps::Op(op @ (Op::If | Op::While)) => { p.consume(); let exprs = vec![ // conditional restricted to Logical operators or above @@ -430,50 +536,68 @@ impl<'t> Parse<'t> for Expr { p.parse(prec.next())?, match p.peek() { Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?, - _ => Expr::Op(Op::End, vec![]).anno(span.merge(p.span())), + _ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())), }, ]; Expr::Op(op, exprs) } - Op::Fn => { + Ps::Fn => { + // TODO: move this to 'item' parsing p.consume().next_if(TKind::LParen)?; Expr::Fn( - p.list(vec![], TKind::Comma, TKind::RParen)?, + Pat::Tuple(p.consume().list( + vec![], + PPrec::Tuple, + TKind::Comma, + TKind::RParen, + )?), p.parse(prec.next())?, ) } - // dirty hack: There are two closure operators, signaled by returned prec. - Op::Lambda if prec == Prec::Min => Expr::Fn( - p.consume().list(vec![], TKind::Comma, TKind::Bar)?, + Ps::Lambda => Expr::Fn( + Pat::Tuple( + p.consume() + .list(vec![], PPrec::Tuple, TKind::Comma, TKind::Bar)?, + ), p.parse(Prec::Body.next())?, ), - Op::Lambda => Expr::Fn(vec![], p.consume().parse(Prec::Body.next())?), + Ps::Lambda0 => Expr::Fn(Pat::Tuple(vec![]), p.consume().parse(Prec::Body.next())?), + Ps::DoubleRef => Expr::Op( + Op::Refer, + vec![Expr::Op(Op::Refer, vec![p.consume().parse(prec.next())?]).anno(span)], + ), - _ => Expr::Op(op, vec![p.consume().parse(prec.next())?]), + Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]), + _ => unimplemented!("prefix {op:?}"), }; // Postfix while let Ok(tok) = p.peek() && let Ok((op, prec)) = from_postfix(tok) && level <= prec.prev() - && op != Op::End + && op != Ps::End { + let kind = tok.kind; let span = span.merge(p.span()); p.consume(); head = match op { - Op::Make => Expr::Make( + Ps::Make => Expr::Make( head.anno(span).into(), - p.consume().list(vec![], TKind::Comma, TKind::RCurly)?, + p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?, ), - Op::Index => Expr::Op( - op, - p.list(vec![head.anno(span)], TKind::Comma, TKind::RBrack)?, + Ps::Op(Op::Index) => Expr::Op( + Op::Index, + p.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?, ), - Op::Call => Expr::Op( - op, - p.list(vec![head.anno(span)], TKind::Comma, TKind::RParen)?, + Ps::Op(Op::Call) => Expr::Op( + Op::Call, + p.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RParen)?, ), - _ => Expr::Op(op, vec![head.anno(span)]), + Ps::Op(op @ (Op::Do | Op::Tuple | Op::Dot | Op::Path | Op::LogAnd | Op::LogOr)) => { + Expr::Op(op, p.list_bare(vec![head.anno(span)], prec.next(), kind)?) + } + Ps::Op(op) => Expr::Op(op, vec![head.anno(span)]), + _ => unimplemented!("postfix {op:?}"), }; } @@ -481,18 +605,19 @@ impl<'t> Parse<'t> for Expr { while let Ok(tok) = p.peek() && let Ok((op, prec)) = from_infix(tok) && level <= prec.prev() - && op != Op::End + && op != Ps::End { let span = span.merge(p.span()); p.consume(); - head = match head { + head = match (op, head) { // controls expression chaining vs coagulating - Expr::Op(prev, mut args) if should_coagulate(prev, op) => { + (Ps::Op(op), Expr::Op(prev, mut args)) if should_coagulate(prev, op) => { args.push(p.parse(prec.next())?); Expr::Op(op, args) } - head => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]), + (Ps::Op(op), head) => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]), + _ => unimplemented!("infix {op:?}"), } } @@ -501,15 +626,19 @@ impl<'t> Parse<'t> for Expr { } impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno

{ - fn parse(p: &mut Parser<'t>, level: usize) -> PResult + type Prec = P::Prec; + fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult where Self: Sized { let start = p.span(); - Ok(Anno(p.parse(level)?, start.merge(p.span()))) + let anno = Anno(p.parse(level)?, start.merge(p.span())); + println!("{}:\t{anno}", anno.1); + Ok(anno) } } impl<'t, P: Parse<'t>> Parse<'t> for Box

{ - fn parse(p: &mut Parser<'t>, level: usize) -> PResult + type Prec = P::Prec; + fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult where Self: Sized { Ok(Box::new(p.parse(level)?)) }