Files
Doughlang/src/parser.rs
2025-10-20 04:52:52 -04:00

1034 lines
37 KiB
Rust

//! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes.
use crate::{
ast::*,
lexer::{LexError, LexFailure, Lexer},
span::Span,
token::{Lexeme, TKind, Token},
};
use std::{error::Error, fmt::Display, iter, vec};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseError {
/// Reached the expected end of input.
EOF(Span),
/// Unexpectedly reached end of input.
UnexpectedEOF(Span),
FromLexer(LexError),
Expected(TKind, TKind, Span),
NotLiteral(TKind, Span),
NotPattern(TKind, Span),
NotType(TKind, Span),
NotPrefix(TKind, Span),
NotInfix(TKind, Span),
NotPostfix(TKind, Span),
}
pub use ParseError::EOF;
impl Error for ParseError {}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EOF(loc) => write!(f, "{loc}: Reached end of input."),
Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."),
Self::FromLexer(e) => e.fmt(f),
Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."),
Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."),
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."),
Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."),
Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."),
Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."),
}
}
}
pub type PResult<T> = Result<T, ParseError>;
trait PResultExt<T> {
fn no_eof(self) -> PResult<T>;
fn allow_eof(self) -> PResult<Option<T>>;
}
impl<T> PResultExt<T> for PResult<T> {
fn no_eof(self) -> Self {
match self {
Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)),
other => other,
}
}
fn allow_eof(self) -> PResult<Option<T>> {
match self {
Ok(t) => Ok(Some(t)),
Err(ParseError::EOF(_)) => Ok(None),
Err(e) => Err(e),
}
}
}
/// Opens a scope where [ParseError::EOF] is unexpected (See [PResultExt::no_eof])
fn no_eof<T>(f: impl FnOnce() -> PResult<T>) -> PResult<T> {
f().no_eof()
}
#[derive(Debug)]
pub struct Parser<'t> {
pub lexer: Lexer<'t>,
pub next_tok: Option<PResult<Token>>,
pub last_loc: Span,
pub elide_do: bool,
}
impl<'t> Parser<'t> {
/// Constructs a new Parser
pub fn new(lexer: Lexer<'t>) -> Self {
Self { lexer, next_tok: None, last_loc: Span::default(), elide_do: false }
}
/// The identity function. This exists to make production chaining easier.
pub fn then<T>(&self, t: T) -> T {
t
}
pub fn span(&self) -> Span {
self.last_loc
}
/// Parses a value that implements the [Parse] trait.
pub fn parse<T: Parse<'t>>(&mut self, level: T::Prec) -> PResult<T> {
Parse::parse(self, level)
}
/// Peeks the next [Token]. Returns [ParseError::FromLexer] on lexer error.
pub fn peek(&mut self) -> PResult<&Token> {
let next_tok = match self.next_tok.take() {
Some(tok) => tok,
None => loop {
match self.lexer.scan() {
Ok(Token { kind: TKind::Comment, .. }) => {}
Ok(tok) => break Ok(tok),
Err(LexError { pos, res: LexFailure::EOF }) => Err(ParseError::EOF(pos))?,
Err(e) => break Err(ParseError::FromLexer(e)),
}
},
};
self.next_tok = Some(next_tok);
let next_tok = self.next_tok.as_ref().expect("should have Some lex result");
if let Ok(tok) = next_tok {
self.last_loc = tok.span;
}
next_tok.as_ref().map_err(|e| *e)
}
/// Peeks the next token if it matches the `expected` [TKind]
pub fn peek_if(&mut self, expected: TKind) -> PResult<Option<&Token>> {
match self.peek() {
Ok(tok) if tok.kind == expected => Ok(Some(tok)),
Ok(_) => Ok(None),
Err(e) => Err(e),
}
}
/// Consumes and returns the currently-peeked [Token].
pub fn take(&mut self) -> PResult<Token> {
let tok = self
.next_tok
.take()
.unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc)));
self.elide_do = matches!(tok, Ok(Token { kind: TKind::RCurly | TKind::Semi, .. }));
tok
}
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
pub fn take_lexeme(&mut self) -> PResult<Lexeme> {
self.take().map(|tok| tok.lexeme)
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> PResult<Token> {
self.peek().no_eof()?;
Ok(self.take().expect("should have token here"))
}
/// Consumes and returns the next [Token] if it matches the `expected` [TKind]
pub fn next_if(&mut self, expected: TKind) -> PResult<Result<Token, TKind>> {
match self.peek() {
Ok(t) if t.kind == expected => self.take().map(Ok),
Ok(t) => Ok(Err(t.kind)),
Err(e) => Err(e),
}
}
/// Parses a list of P separated by `sep` tokens, ending in an `end` token.
/// ```ignore
/// List<T> = (T sep)* T? end ;
/// ```
pub fn list<P: Parse<'t>>(
&mut self,
mut elems: Vec<P>,
level: P::Prec,
sep: TKind,
end: TKind,
) -> PResult<Vec<P>> {
// TODO: This loses lexer errors
while self.peek_if(end).no_eof()?.is_none() {
elems.push(self.parse(level.clone()).no_eof()?);
match self.peek_if(sep)? {
Some(_) => self.consume(),
None => break,
};
}
self.next_if(end)?
.map_err(|tk| ParseError::Expected(end, tk, self.span()))?;
Ok(elems)
}
/// Parses a list of one or more P at level `level`, separated by `sep` tokens
/// ```ignore
/// UnterminatedList<P> = P (sep P)*
/// ```
pub fn list_bare<P: Parse<'t>>(
&mut self,
mut elems: Vec<P>,
level: P::Prec,
sep: TKind,
) -> PResult<Vec<P>> {
loop {
let elem = self.parse(level.clone()).no_eof()?;
elems.push(elem);
match self.peek_if(sep) {
Ok(Some(_)) => self.consume(),
Ok(None) | Err(ParseError::EOF(_)) => break Ok(elems),
Err(e) => Err(e)?,
};
}
}
/// Parses into an [`Option<P>`] if the next token is `next`
pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> {
Ok(match self.next_if(next)? {
Ok(_) => Some(self.parse(level).no_eof()?),
Err(_) => None,
})
}
/// Parses a P unless the next token is `end`
pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
let out = match self.peek_if(end)? {
None => Some(self.parse(level).no_eof()?),
Some(_) => None,
};
self.expect(end)?;
Ok(out)
}
pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> {
self.next_if(next)?
.map_err(|tk| ParseError::Expected(next, tk, self.span()))?;
Ok(self)
}
/// Consumes the currently peeked token without returning it.
pub fn consume(&mut self) -> &mut Self {
self.next_tok = None;
self
}
}
pub trait Parse<'t> {
type Prec: Clone;
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
where Self: Sized;
}
impl<'t> Parse<'t> for FqPath {
// ugly hack: provide a partial path to parse()
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let mut parts = vec![];
if p.next_if(TKind::ColonColon)?.is_ok() {
parts.push("".into()); // the "root"
}
while let Ok(id) = p.next_if(TKind::Identifier)? {
parts.push(id.lexeme.string().expect("Identifier should have String"));
if let None | Some(Err(_)) = p.next_if(TKind::ColonColon).allow_eof()? {
break;
}
}
Ok(FqPath { parts })
}
}
impl<'t> Parse<'t> for Literal {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
let tok = p.peek()?;
Ok(match tok.kind {
TKind::True => p.consume().then(Literal::Bool(true)),
TKind::False => p.consume().then(Literal::Bool(false)),
TKind::Character | TKind::Integer | TKind::String => {
match p.take().expect("should have Token").lexeme {
Lexeme::String(str) => Literal::Str(str),
Lexeme::Integer(int, base) => Literal::Int(int, base),
Lexeme::Char(chr) => Literal::Char(chr),
}
}
other => Err(ParseError::NotLiteral(other, tok.span))?,
})
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum PPrec {
Min,
Alt,
Tuple,
Typed,
Range,
Fn,
Max,
}
impl PPrec {
fn next(self) -> Self {
match self {
Self::Min => Self::Alt,
Self::Alt => Self::Tuple,
Self::Tuple => Self::Typed,
Self::Typed => Self::Range,
Self::Range => Self::Fn,
Self::Fn => Self::Max,
Self::Max => Self::Max,
}
}
}
enum PatPs {
Op(PatOp),
}
fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> {
Some(match token.kind {
TKind::DotDot => (PatPs::Op(PatOp::RangeEx), PPrec::Range),
TKind::DotDotEq => (PatPs::Op(PatOp::RangeIn), PPrec::Range),
TKind::Colon => (PatPs::Op(PatOp::Typed), PPrec::Typed),
TKind::Comma => (PatPs::Op(PatOp::Tuple), PPrec::Tuple),
TKind::Arrow => (PatPs::Op(PatOp::Fn), PPrec::Fn),
TKind::Bar => (PatPs::Op(PatOp::Alt), PPrec::Alt),
_ => None?,
})
}
impl<'t> Parse<'t> for Pat {
type Prec = PPrec;
fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> {
let tok = p.peek()?;
// Prefix
let mut head = match tok.kind {
TKind::Fn => return p.consume().parse(PPrec::Fn),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
Pat::Lit(p.parse(())?)
}
TKind::Bar => p.consume().parse(level)?,
TKind::Amp => Pat::Op(PatOp::Ref, vec![p.consume().parse(PPrec::Max)?]),
TKind::AmpAmp => Pat::Op(
PatOp::Ref,
vec![Pat::Op(PatOp::Ref, vec![p.consume().parse(PPrec::Max)?])],
),
TKind::Identifier => match tok.lexeme.str() {
Some("_") => p.consume().then(Pat::Ignore),
_ => {
let mut path: FqPath = p.parse(())?;
// TODO: make these postfix.
match p.peek().map(|t| t.kind) {
Ok(TKind::LParen) => Pat::NamedTuple(path, p.parse(PPrec::Typed)?),
Ok(TKind::LCurly) if level <= PPrec::Tuple => Pat::NamedStruct(
path,
p.consume()
.opt(PPrec::Alt, TKind::RCurly)?
.unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))),
),
Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() {
1 => Self::Name(path.parts.pop().expect("name has 1 part")),
_ => Self::Path(path),
},
Err(e) => Err(e)?,
}
}
},
TKind::Grave => Pat::MetId(p.consume().next()?.lexeme.to_string()),
TKind::DotDot => Pat::Op(
PatOp::Rest,
// Identifier in Rest position always becomes binder
match p.consume().peek().allow_eof()?.map(Token::kind) {
Some(TKind::Identifier) => vec![Pat::Name(
p.take_lexeme()
.expect("should have lexeme")
.string()
.expect("should be string"),
)],
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![],
},
),
TKind::DotDotEq => Pat::Op(
PatOp::RangeIn,
match p.consume().peek().allow_eof()?.map(Token::kind) {
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![],
},
),
TKind::LParen => Pat::Op(
PatOp::Tuple,
p.consume()
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RParen)?,
),
TKind::LBrack => Pat::Op(
PatOp::Slice,
p.consume()
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RBrack)?,
),
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
};
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Some((op, prec)) = pat_from_infix(tok)
&& level <= prec
{
let kind = tok.kind;
head = match op {
PatPs::Op(PatOp::Typed) => {
Pat::Op(PatOp::Typed, vec![head, p.consume().parse(PPrec::Max)?])
}
PatPs::Op(PatOp::Fn) => {
Pat::Op(PatOp::Fn, vec![head, p.consume().parse(PPrec::Fn.next())?])
}
PatPs::Op(op @ (PatOp::RangeEx | PatOp::RangeIn)) => Pat::Op(
op,
match p.consume().peek().map(|t| t.kind) {
Ok(TKind::Integer | TKind::Character | TKind::Identifier) => {
vec![head, p.parse(prec.next())?]
}
_ => vec![head],
},
),
PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?),
}
}
Ok(head)
}
}
// impl<'t> Parse<'t> for Ty {
// type Prec = ();
// fn parse(p: &mut Parser<'t>, level: Self::Prec) -> PResult<Self>
// where Self: Sized {
// let &Token { kind, span, .. } = p.peek()?;
// // TODO: this is a kinda jank way of error reporting
// let head = match kind {
// TKind::Identifier => match p.peek()?.lexeme.str() {
// Some("_") => p.consume().then(Ty::Infer),
// _ => Ty::Named(p.parse(())?),
// },
// TKind::Amp => Ty::Ref(p.consume().parse(())?),
// TKind::AmpAmp => Ty::Ref(Box::new(Ty::Ref(p.consume().parse(())?))),
// TKind::LBrack => {
// let ty = p.consume().parse(level)?;
// match p.next()? {
// Token { kind: TKind::Semi, .. } => {
// let ty = Ty::Array(ty, p.parse(Prec::Binary.next())?);
// p.expect(TKind::RBrack)?;
// ty
// }
// Token { kind: TKind::RBrack, .. } => Ty::Slice(ty),
// tok => Err(ParseError::NotType(tok.kind, tok.span))?,
// }
// }
// TKind::Fn => {
// p.consume();
// match p.parse(())? {
// Ty::Fn(args) => Ty::Fn(args),
// other @ Ty::Tuple(_) => Ty::Fn(vec![other, Ty::Tuple(vec![])]),
// other => Ty::Fn(vec![other, Ty::Tuple(vec![])]),
// }
// }
// TKind::LParen => {
// Ty::Tuple(p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?)
// }
// _ => Err(ParseError::NotType(kind, span))?,
// };
// Ok(match p.next_if(TKind::Arrow).allow_eof()? {
// Some(Ok(_)) => Ty::Fn(vec![
// match head {
// args @ Ty::Tuple(_) => args,
// arg => Ty::Tuple(vec![arg]),
// },
// p.parse(())?,
// ]),
// _ => head,
// })
// }
// }
/// Organizes the precedence hierarchy for syntactic elements
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Prec {
Min,
/// The Semicolon Operator gets its own precedence level
Do,
/// An assignment
Assign,
/// Constructor for a tuple
Tuple,
/// The body of a function, conditional, etc.
Body,
/// Constructor for a struct
Make,
/// The conditional of an `if` or `while` (which is really an `if`)
Logical,
/// The short-circuiting "boolean or" operator
LogOr,
/// The short-circuiting "boolean and" operator
LogAnd,
/// Value comparison operators
Compare,
/// Constructor for a Range
Range,
/// Binary/bitwise operators
Binary,
/// Bit-shifting operators
Shift,
/// Addition and Subtraction operators
Factor,
/// Multiplication, Division, and Remainder operators
Term,
/// Negation, (De)reference, Try
Unary,
/// Place-projection operators
Project,
/// Array/Call subscripting and reference
Extend,
Max,
}
impl Prec {
pub const MIN: usize = Prec::Min.value();
pub const fn value(self) -> usize {
self as usize * 2
}
pub const fn prev(self) -> usize {
match self {
Self::Assign => self.value() + 1,
_ => self.value(),
}
}
pub const fn next(self) -> usize {
match self {
Self::Assign => self.value(),
_ => self.value() + 1,
}
}
}
/// PseudoOperator: fake operators used to give certain tokens special behavior.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Ps {
Id, // Identifier
Mid, // MetaIdentifier
Lit, // Literal
Let, // let Pat = Expr (else Expr)?
Const, // const Pat = Expr
Static, // static Pat = Expr
Typedef, // struct { Pat } | struct ( Pat )
For, // for Pat in Expr Expr else Expr
Fn, // fn ( Pat,* ) Expr
Lambda0, // || Expr
Lambda, // | Pat,* | Expr
DoubleRef, // && Expr
Make, // Expr{ Expr,* }
Mod, // mod Ty Expr
ImplicitDo, // An implicit semicolon
ExplicitDo, // An explicit leading semicolon
End, // Produces an empty value.
Op(Op), // A normal [ast::Op]
}
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
TKind::Semi => (Ps::End, Prec::Body),
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
TKind::Grave => (Ps::Mid, Prec::Max),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
(Ps::Lit, Prec::Max)
}
TKind::Public => (Ps::Op(Op::Pub), Prec::Body),
TKind::For => (Ps::For, Prec::Body),
TKind::Fn => (Ps::Fn, Prec::Body),
TKind::Match => (Ps::Op(Op::Match), Prec::Body),
TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign),
TKind::Module => (Ps::Mod, Prec::Body),
TKind::Let => (Ps::Let, Prec::Tuple),
TKind::Const => (Ps::Const, Prec::Body),
TKind::Struct | TKind::Enum => (Ps::Typedef, Prec::Body),
TKind::Loop => (Ps::Op(Op::Loop), Prec::Body),
TKind::If => (Ps::Op(Op::If), Prec::Body),
TKind::While => (Ps::Op(Op::While), Prec::Body),
TKind::Break => (Ps::Op(Op::Break), Prec::Body),
TKind::Return => (Ps::Op(Op::Return), Prec::Body),
TKind::LCurly => (Ps::Op(Op::Block), Prec::Min),
TKind::RCurly => (Ps::End, Prec::Do),
TKind::LBrack => (Ps::Op(Op::Array), Prec::Tuple),
TKind::RBrack => (Ps::End, Prec::Tuple),
TKind::LParen => (Ps::Op(Op::Group), Prec::Min),
TKind::RParen => (Ps::End, Prec::Tuple),
TKind::Amp => (Ps::Op(Op::Refer), Prec::Extend),
TKind::AmpAmp => (Ps::DoubleRef, Prec::Extend),
TKind::Bang => (Ps::Op(Op::Not), Prec::Unary),
TKind::BangBang => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Bar => (Ps::Lambda, Prec::Body),
TKind::BarBar => (Ps::Lambda0, Prec::Body),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary),
TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Star => (Ps::Op(Op::Deref), Prec::Unary),
TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?,
})
}
fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration
TKind::In => (Ps::Op(Op::Do), Prec::Do),
TKind::Eq => (Ps::Op(Op::Set), Prec::Assign),
TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign),
TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign),
TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign),
TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign),
TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign),
TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign),
TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign),
TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign),
TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign),
TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::LCurly => (Ps::Make, Prec::Make),
TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare),
TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare),
TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare),
TKind::BangEq => (Ps::Op(Op::Neq), Prec::Compare),
TKind::GtEq => (Ps::Op(Op::Geq), Prec::Compare),
TKind::Gt => (Ps::Op(Op::Gt), Prec::Compare),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Amp => (Ps::Op(Op::And), Prec::Binary),
TKind::Xor => (Ps::Op(Op::Xor), Prec::Binary),
TKind::Bar => (Ps::Op(Op::Or), Prec::Binary),
TKind::LtLt => (Ps::Op(Op::Shl), Prec::Shift),
TKind::GtGt => (Ps::Op(Op::Shr), Prec::Shift),
TKind::Plus => (Ps::Op(Op::Add), Prec::Factor),
TKind::Minus => (Ps::Op(Op::Sub), Prec::Factor),
TKind::Star => (Ps::Op(Op::Mul), Prec::Term),
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::As => (Ps::Op(Op::As), Prec::Max),
_ => (Ps::ImplicitDo, Prec::Do),
})
}
impl<'t> Parse<'t> for Typedef {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let tok = p.next()?;
match tok.kind {
TKind::Enum => Ok(Self(TypedefKind::Enum, p.parse(PPrec::Tuple)?)),
TKind::Struct => Ok(Self(TypedefKind::Struct, p.parse(PPrec::Tuple)?)),
_ => Err(ParseError::NotType(tok.kind, tok.span)),
}
}
}
impl<'t> Parse<'t> for Bind {
type Prec = BindKind;
fn parse(p: &mut Parser<'t>, level: Self::Prec) -> PResult<Self> {
match level {
BindKind::Match => {
// |? Pat => Expr
p.next_if(TKind::Bar)?.ok(); // and discard
Ok(Self(
level,
p.parse(PPrec::Alt)?,
vec![p.expect(TKind::FatArrow)?.parse(Prec::Body.next())?],
))
}
BindKind::Mod => Ok(Self(
level,
p.consume().parse(PPrec::Max)?,
vec![p.parse(Prec::Body.next())?],
)),
BindKind::Fn => Ok(Self(
level,
p.consume().parse(PPrec::Fn)?,
vec![p.parse(Prec::Body.next())?],
)),
_ => {
// let Pat
let pat = p.consume().parse(PPrec::Tuple)?;
if p.next_if(TKind::Eq).allow_eof()?.is_none_or(|v| v.is_err()) {
return Ok(Self(level, pat, vec![]));
}
// = Expr
let body = p.parse(Prec::Tuple.value())?;
if p.next_if(TKind::Else)
.allow_eof()?
.is_none_or(|v| v.is_err())
{
return Ok(Self(level, pat, vec![body]));
}
// else Expr
Ok(Self(level, pat, vec![body, p.parse(Prec::Body.next())?]))
}
}
}
}
impl<'t> Parse<'t> for MakeArm {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
let name = p
.next_if(TKind::Identifier)?
.map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?;
Ok(MakeArm(
name.lexeme.string().expect("Identifier should have String"),
p.opt_if(Prec::Body.value(), TKind::Colon)?,
))
}
}
fn parse_for<'t>(p: &mut Parser<'t>, _level: ()) -> PResult<Expr> {
// for Pat
let pat = p.consume().parse(PPrec::Tuple)?;
// in Expr
let iter: Anno<Expr> = p.expect(TKind::In)?.parse(Prec::Logical.next())?;
let cspan = iter.1;
// Expr
let pass: Anno<Expr> = p.parse(Prec::Body.next())?;
let pspan = pass.1;
// else Expr?
let fail = match p.next_if(TKind::Else).allow_eof()? {
Some(Ok(_)) => p.parse(Prec::Body.next())?,
_ => Expr::Op(Op::Tuple, vec![]).anno(pspan),
};
let fspan = fail.1;
/*
for `pat in `iter `pass else `fail
==>
match (`iter).into_iter() {
#iter => loop match #iter.next() {
None => break `fail,
Some(`pat) => `pass,
},
}
*/
Ok(Expr::Op(
Op::Match,
vec![
Expr::Op(
Op::Dot,
vec![
iter,
Expr::Op(Op::Call, vec![Expr::Id("into_iter".into()).anno(cspan)]).anno(cspan),
],
)
.anno(cspan),
Expr::Bind(Box::new(Bind(
BindKind::Match,
Pat::Name("#iter".into()),
vec![
Expr::Op(
Op::Loop,
vec![
Expr::Op(
Op::Match,
vec![
Expr::Op(
Op::Dot,
vec![
Expr::Id("#iter".into()).anno(cspan),
Expr::Op(
Op::Call,
vec![Expr::Id("next".into()).anno(cspan)],
)
.anno(cspan),
],
)
.anno(cspan),
Expr::Bind(Box::new(Bind(
BindKind::Match,
Pat::Name("None".into()),
vec![Expr::Op(Op::Break, vec![fail]).anno(fspan)],
)))
.anno(fspan),
Expr::Bind(Box::new(Bind(
BindKind::Match,
Pat::NamedTuple(
"Some".into(),
Box::new(Pat::Op(PatOp::Tuple, vec![pat])),
),
vec![pass],
)))
.anno(pspan),
],
)
.anno(pspan),
],
)
.anno(pspan),
],
)))
.anno(pspan),
],
))
}
impl<'t> Parse<'t> for Expr {
type Prec = usize;
/// Parses an [Expr]ession.
///
/// The `level` parameter indicates the operator binding level of the expression.
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
const MIN: usize = Prec::MIN;
// TODO: in-tree doc comments
while p.next_if(TKind::Doc)?.is_ok() {}
// Prefix
let tok @ &Token { kind, span, .. } = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, span);
no_eof(move || {
let mut head = match op {
// "End" is produced when an "empty" expression is syntactically required.
// This happens when a semi or closing delimiter begins an expression.
// The token which emitted "End" cannot be consumed, as it is expected elsewhere.
Ps::End if level <= prec.next() => Expr::Op(Op::Tuple, vec![]),
Ps::End => Err(ParseError::NotPrefix(kind, span))?,
Ps::Id => Expr::Id(p.parse(())?),
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
Ps::Lit => Expr::Lit(p.parse(())?),
Ps::Typedef => Expr::Struct(p.parse(())?),
Ps::Let => Expr::Bind(p.parse(BindKind::Let)?),
Ps::Const => Expr::Bind(p.parse(BindKind::Const)?),
Ps::Static => Expr::Bind(p.parse(BindKind::Static)?),
Ps::Mod => Expr::Bind(p.parse(BindKind::Mod)?),
Ps::Fn => Expr::Bind(p.parse(BindKind::Fn)?),
Ps::Lambda | Ps::Lambda0 => {
p.consume();
let args = if op == Ps::Lambda {
p.opt(PPrec::Tuple, TKind::Bar)?
.unwrap_or(Pat::Op(PatOp::Tuple, vec![]))
} else {
Pat::Op(PatOp::Tuple, vec![])
};
let rety = p.opt_if(PPrec::Max, TKind::Arrow)?.unwrap_or(Pat::Ignore);
Expr::Bind(Box::new(Bind(
BindKind::Fn,
Pat::Op(PatOp::Fn, vec![args, rety]),
vec![p.parse(Prec::Body.next())?],
)))
}
Ps::For => parse_for(p, ())?,
Ps::Op(Op::Match) => parse_match(p)?,
Ps::Op(Op::Meta) => Expr::Op(
Op::Meta,
vec![
p.consume()
.expect(TKind::LBrack)?
.opt(MIN, TKind::RBrack)?
.unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)),
p.parse(level)?,
],
),
Ps::Op(Op::Block) => Expr::Op(
Op::Block,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Ps::Op(Op::Array) => parse_array(p)?,
Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
Ps::Op(op @ (Op::If | Op::While)) => {
p.consume();
let exprs = vec![
// conditional restricted to Logical operators or above
p.parse(Prec::Logical.value())?,
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => {
p.consume().parse(prec.next())?
}
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| {
Expr::Op(
Op::Refer,
vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)],
)
})?,
Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
_ => unimplemented!("prefix {op:?}"),
};
// Infix and Postfix
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Ps::End
{
let kind = tok.kind;
let span = span.merge(p.span());
head = match op {
// Make (structor expressions) are context-sensitive
Ps::Make => match &head {
Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make(
head.anno(span),
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
))),
_ => break,
},
// As is ImplicitDo (semicolon elision)
Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?),
Ps::ImplicitDo => break,
Ps::Op(Op::Do) => head.and_do(span, p.consume().parse(prec.next())?),
Ps::Op(Op::Index) => Expr::Op(
Op::Index,
p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?,
),
Ps::Op(Op::Call) => Expr::Op(
Op::Call,
vec![
head.anno(span),
p.consume()
.opt(0, TKind::RParen)?
.unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)),
],
),
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
op,
p.consume()
.list_bare(vec![head.anno(span)], prec.next(), kind)?,
),
Ps::Op(op @ Op::Try) => {
p.consume();
Expr::Op(op, vec![head.anno(span)])
}
Ps::Op(op) => {
Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?])
}
_ => Err(ParseError::NotInfix(kind, span))?,
}
}
Ok(head)
})
}
}
/// Parses an array with 0 or more elements, or an array-repetition
fn parse_array<'t>(p: &mut Parser<'t>) -> PResult<Expr> {
if p.consume().peek()?.kind == TKind::RBrack {
p.consume();
return Ok(Expr::Op(Op::Array, vec![]));
}
let prec = Prec::Tuple;
let item = p.parse(prec.value())?;
let repeat = p.opt_if(prec.next(), TKind::Semi)?;
p.expect(TKind::RBrack)?;
Ok(match (repeat, item) {
(Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]),
(None, Anno(Expr::Op(Op::Tuple, items), _)) => Expr::Op(Op::Array, items),
(None, item) => Expr::Op(Op::Array, vec![item]),
})
}
fn parse_match<'t>(p: &mut Parser<'t>) -> PResult<Expr> {
let scrutinee = p.consume().parse(Prec::Logical.value())?;
let arms = p
.expect(TKind::LCurly)?
.list(vec![], BindKind::Match, TKind::Comma, TKind::RCurly)?
.into_iter()
.map(|Anno(arm, span)| Anno(Expr::Bind(Box::new(arm)), span));
let expr = Expr::Op(Op::Match, iter::once(scrutinee).chain(arms).collect());
Ok(expr)
}
impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno<P> {
type Prec = P::Prec;
fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
where Self: Sized {
let start = p.span();
let anno = Anno(p.parse(level)?, start.merge(p.span()));
Ok(anno)
}
}
impl<'t, P: Parse<'t>> Parse<'t> for Box<P> {
type Prec = P::Prec;
fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
where Self: Sized {
Ok(Box::new(p.parse(level)?))
}
}