src: Split parser into modules, reorganize AST

This commit is contained in:
2025-10-28 22:15:06 -04:00
parent a3cab92b35
commit 8e2bc5ad85
8 changed files with 1146 additions and 1075 deletions

592
src/parser/expr.rs Normal file
View File

@@ -0,0 +1,592 @@
use super::{PResult, PResultExt, Parse, ParseError, Parser, no_eof, pat::Prec as PPrec};
use crate::{
ast::*,
token::{TKind, Token},
};
use std::iter;
/// Organizes the precedence hierarchy for syntactic elements
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Prec {
Min,
/// The Semicolon Operator gets its own precedence level
Do,
/// An assignment
Assign,
/// Constructor for a tuple
Tuple,
/// The body of a function, conditional, etc.
Body,
/// Constructor for a struct
Make,
/// The conditional of an `if` or `while` (which is really an `if`)
Logical,
/// The short-circuiting "boolean or" operator
LogOr,
/// The short-circuiting "boolean and" operator
LogAnd,
/// Value comparison operators
Compare,
/// Constructor for a Range
Range,
/// Binary/bitwise operators
Binary,
/// Bit-shifting operators
Shift,
/// Addition and Subtraction operators
Factor,
/// Multiplication, Division, and Remainder operators
Term,
/// Negation, (De)reference, Try
Unary,
/// Place-projection operators
Project,
/// Array/Call subscripting and reference
Extend,
Max,
}
impl Prec {
pub const MIN: usize = Prec::Min.value();
pub const fn value(self) -> usize {
self as usize * 2
}
pub const fn prev(self) -> usize {
match self {
Self::Assign => self.value() + 1,
_ => self.value(),
}
}
pub const fn next(self) -> usize {
match self {
Self::Assign => self.value(),
_ => self.value() + 1,
}
}
}
/// `PseudoOperator`: fake operators used to give certain tokens special behavior.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Ps {
Id, // Identifier
Mid, // MetaIdentifier
Lit, // Literal
Use, // use Use
Def, // any definition (let, const, static, struct, enum, fn, ...)
For, // for Pat in Expr Expr else Expr
Lambda0, // || Expr
Lambda, // | Pat,* | Expr
DoubleRef, // && Expr
Make, // Expr{ Expr,* }
ImplicitDo, // An implicit semicolon
End, // Produces an empty value.
Op(Op), // A normal [ast::Op]
}
/// Tries to map the incoming [Token] to a prefix [expression operator](Op)
/// and its [precedence level](Prec)
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
TKind::Semi => (Ps::End, Prec::Body),
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
TKind::Grave => (Ps::Mid, Prec::Max),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
(Ps::Lit, Prec::Max)
}
TKind::Use => (Ps::Use, Prec::Max),
TKind::Pub => (Ps::Op(Op::Pub), Prec::Body),
TKind::For => (Ps::For, Prec::Body),
TKind::Match => (Ps::Op(Op::Match), Prec::Body),
TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign),
TKind::Fn
| TKind::Mod
| TKind::Impl
| TKind::Let
| TKind::Const
| TKind::Static
| TKind::Type
| TKind::Struct
| TKind::Enum => (Ps::Def, Prec::Body),
TKind::Loop => (Ps::Op(Op::Loop), Prec::Body),
TKind::If => (Ps::Op(Op::If), Prec::Body),
TKind::While => (Ps::Op(Op::While), Prec::Body),
TKind::Break => (Ps::Op(Op::Break), Prec::Body),
TKind::Return => (Ps::Op(Op::Return), Prec::Body),
TKind::LCurly => (Ps::Op(Op::Block), Prec::Min),
TKind::RCurly => (Ps::End, Prec::Do),
TKind::LBrack => (Ps::Op(Op::Array), Prec::Tuple),
TKind::RBrack => (Ps::End, Prec::Tuple),
TKind::LParen => (Ps::Op(Op::Group), Prec::Min),
TKind::RParen => (Ps::End, Prec::Tuple),
TKind::Amp => (Ps::Op(Op::Refer), Prec::Extend),
TKind::AmpAmp => (Ps::DoubleRef, Prec::Extend),
TKind::Bang => (Ps::Op(Op::Not), Prec::Unary),
TKind::BangBang => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Bar => (Ps::Lambda, Prec::Body),
TKind::BarBar => (Ps::Lambda0, Prec::Body),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary),
TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Star => (Ps::Op(Op::Deref), Prec::Unary),
TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?,
})
}
/// Tries to map the incoming [Token] to an infix [expression operator](Op)
/// and its [precedence level](Prec)
const fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration
TKind::In => (Ps::Op(Op::Do), Prec::Do),
TKind::Eq => (Ps::Op(Op::Set), Prec::Assign),
TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign),
TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign),
TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign),
TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign),
TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign),
TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign),
TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign),
TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign),
TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign),
TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::LCurly => (Ps::Make, Prec::Make),
TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare),
TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare),
TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare),
TKind::BangEq => (Ps::Op(Op::Neq), Prec::Compare),
TKind::GtEq => (Ps::Op(Op::Geq), Prec::Compare),
TKind::Gt => (Ps::Op(Op::Gt), Prec::Compare),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Amp => (Ps::Op(Op::And), Prec::Binary),
TKind::Xor => (Ps::Op(Op::Xor), Prec::Binary),
TKind::Bar => (Ps::Op(Op::Or), Prec::Binary),
TKind::LtLt => (Ps::Op(Op::Shl), Prec::Shift),
TKind::GtGt => (Ps::Op(Op::Shr), Prec::Shift),
TKind::Plus => (Ps::Op(Op::Add), Prec::Factor),
TKind::Minus => (Ps::Op(Op::Sub), Prec::Factor),
TKind::Star => (Ps::Op(Op::Mul), Prec::Term),
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::As => (Ps::Op(Op::As), Prec::Max),
_ => (Ps::ImplicitDo, Prec::Do),
})
}
impl<'t> Parse<'t> for Expr {
type Prec = usize;
/// Parses an [Expr]ession.
///
/// The `level` parameter indicates the operator binding level of the expression.
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
const MIN: usize = Prec::MIN;
// TODO: in-tree doc comments
while p.next_if(TKind::Doc)?.is_ok() {}
// Prefix
let tok @ &Token { kind, span, .. } = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, span);
no_eof(move || {
let mut head = match op {
// "End" is produced when an "empty" expression is syntactically required.
// This happens when a semi or closing delimiter begins an expression.
// The token which emitted "End" cannot be consumed, as it is expected
// elsewhere.
Ps::End if level <= prec.next() => Expr::Omitted,
Ps::End => Err(ParseError::NotPrefix(kind, span))?,
Ps::Id => Expr::Id(p.parse(())?),
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
Ps::Lit => Expr::Lit(p.parse(())?),
Ps::Use => Expr::Use(p.consume().parse(())?),
Ps::Def => Expr::Bind(p.parse(None)?),
Ps::Lambda | Ps::Lambda0 => {
p.consume();
let args = if op == Ps::Lambda {
p.opt(PPrec::Tuple, TKind::Bar)?
.unwrap_or(Pat::Op(PatOp::Tuple, vec![]))
} else {
Pat::Op(PatOp::Tuple, vec![])
};
let rety = p.opt_if(PPrec::Max, TKind::Arrow)?.unwrap_or(Pat::Ignore);
Expr::Bind(Box::new(Bind(
BindOp::Fn,
vec![],
Pat::Op(PatOp::Fn, vec![args, rety]),
vec![p.parse(Prec::Body.next())?],
)))
}
Ps::For => parse_for(p, ())?,
Ps::Op(Op::Match) => parse_match(p)?,
Ps::Op(Op::Meta) => Expr::Op(
Op::Meta,
vec![
p.consume()
.expect(TKind::LBrack)?
.opt(MIN, TKind::RBrack)?
.unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)),
p.parse(level)?,
],
),
Ps::Op(Op::Block) => Expr::Op(
Op::Block,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Ps::Op(Op::Array) => parse_array(p)?,
Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
Ps::Op(op @ (Op::If | Op::While)) => {
p.consume();
let exprs = vec![
// conditional restricted to Logical operators or above
p.parse(Prec::Logical.value())?,
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => {
p.consume().parse(prec.next())?
}
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| {
Expr::Op(
Op::Refer,
vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)],
)
})?,
Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
_ => unimplemented!("prefix {op:?}"),
};
// Infix and Postfix
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Ps::End
{
let kind = tok.kind;
let span = span.merge(p.span());
head = match op {
// Make (structor expressions) are context-sensitive
Ps::Make => match &head {
Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make(
head.anno(span),
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
))),
_ => break,
},
// As is ImplicitDo (semicolon elision)
Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?),
Ps::ImplicitDo => break,
// Allow `;` at end of file
Ps::Op(Op::Do) => head.and_do(
span,
match p.consume().peek().allow_eof()? {
Some(_) => p.parse(prec.next())?,
None => Anno(Default::default(), span),
},
),
Ps::Op(Op::Index) => Expr::Op(
Op::Index,
p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?,
),
Ps::Op(Op::Call) => Expr::Op(
Op::Call,
vec![
head.anno(span),
p.consume()
.opt(0, TKind::RParen)?
.unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)),
],
),
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
op,
p.consume()
.list_bare(vec![head.anno(span)], prec.next(), kind)?,
),
Ps::Op(op @ Op::Try) => {
p.consume();
Expr::Op(op, vec![head.anno(span)])
}
Ps::Op(op) => {
Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?])
}
_ => Err(ParseError::NotInfix(kind, span))?,
}
}
Ok(head)
})
}
}
/// Parses an array with 0 or more elements, or an array-repetition
fn parse_array(p: &mut Parser<'_>) -> PResult<Expr> {
if p.consume().peek()?.kind == TKind::RBrack {
p.consume();
return Ok(Expr::Op(Op::Array, vec![]));
}
let prec = Prec::Tuple;
let item = p.parse(prec.value())?;
let repeat = p.opt_if(prec.next(), TKind::Semi)?;
p.expect(TKind::RBrack)?;
Ok(match (repeat, item) {
(Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]),
(None, Anno(Expr::Op(Op::Tuple, items), _)) => Expr::Op(Op::Array, items),
(None, item) => Expr::Op(Op::Array, vec![item]),
})
}
/// Parses a `match` expression
///
/// ```ignore
/// match scrutinee {
/// (Pat => Expr),*
/// }
/// ```
fn parse_match(p: &mut Parser<'_>) -> PResult<Expr> {
let scrutinee = p.consume().parse(Prec::Logical.value())?;
let arms = p
.expect(TKind::LCurly)?
.list(vec![], Some(BindOp::Match), TKind::Comma, TKind::RCurly)?
.into_iter()
.map(|Anno(arm, span)| Anno(Expr::Bind(Box::new(arm)), span));
let expr = Expr::Op(Op::Match, iter::once(scrutinee).chain(arms).collect());
Ok(expr)
}
/// Parses and desugars a `for` loop expression
///
/// Assumes the existence of the following items:
///
/// 1. `enum<T> Option { None, Some(T) }`
/// 2. `fn T::into_iter(&mut self) -> U`
/// 3. `U::next() -> Option<V>`
fn parse_for(p: &mut Parser<'_>, _level: ()) -> PResult<Expr> {
// for Pat
let pat = p.consume().parse(PPrec::Tuple)?;
// in Expr
let iter: Anno<Expr> = p.expect(TKind::In)?.parse(Prec::Logical.next())?;
let cspan = iter.1;
// Expr
let pass: Anno<Expr> = p.parse(Prec::Body.next())?;
let pspan = pass.1;
// else Expr?
let fail = match p.next_if(TKind::Else).allow_eof()? {
Some(Ok(_)) => p.parse(Prec::Body.next())?,
_ => Expr::Op(Op::Tuple, vec![]).anno(pspan),
};
let fspan = fail.1;
/*
for `pat in `iter `pass else `fail
==>
match (`iter).into_iter() {
#iter => loop match #iter.next() {
None => break `fail,
Some(`pat) => `pass,
},
}
*/
// TODO: A better way to do this kind of substitution desugaring
// without losing span information!
Ok(Expr::Op(
Op::Match,
vec![
Expr::Op(
Op::Dot,
vec![
iter,
Expr::Op(Op::Call, vec![Expr::Id("into_iter".into()).anno(cspan)]).anno(cspan),
],
)
.anno(cspan),
Expr::Bind(Box::new(Bind(
BindOp::Match,
vec![],
Pat::Name("#iter".into()),
vec![
Expr::Op(
Op::Loop,
vec![
Expr::Op(
Op::Match,
vec![
Expr::Op(
Op::Dot,
vec![
Expr::Id("#iter".into()).anno(cspan),
Expr::Op(
Op::Call,
vec![Expr::Id("next".into()).anno(cspan)],
)
.anno(cspan),
],
)
.anno(cspan),
Expr::Bind(Box::new(Bind(
BindOp::Match,
vec![],
Pat::Name("None".into()),
vec![Expr::Op(Op::Break, vec![fail]).anno(fspan)],
)))
.anno(fspan),
Expr::Bind(Box::new(Bind(
BindOp::Match,
vec![],
Pat::NamedTuple(
"Some".into(),
Box::new(Pat::Op(PatOp::Tuple, vec![pat])),
),
vec![pass],
)))
.anno(pspan),
],
)
.anno(pspan),
],
)
.anno(pspan),
],
)))
.anno(pspan),
],
))
}
/// Returns the [BindOp], [pattern precedence](PPrec), [arrow TKind](TKind), [body precedence](Prec),
/// and [else precedence](Prec), (if applicable,) which controls the parsing of Bind expressions.
///
/// The returned expression [Prec]edences are expected to be [`Prec::next`]ed, so they may
/// be one level of precedence lower than would be intuitive (i.e. [Prec::Assign] instead of [Prec::Tuple])
#[rustfmt::skip]
#[allow(clippy::type_complexity)]
fn from_bind(p: &mut Parser<'_>) -> PResult<(BindOp, PPrec, Option<TKind>, Option<Prec>, Option<Prec>)> {
let bk = match p.peek()?.kind {
// Token Operator Pat prec Body Token Body prec Else prec
TKind::Let => (BindOp::Let, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Compare), Some(Prec::Body)),
TKind::Const => (BindOp::Const, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Assign), None),
TKind::Static => (BindOp::Static, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Assign), None),
TKind::Type => (BindOp::Type, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Project), None),
TKind::Struct => (BindOp::Struct, PPrec::Tuple, None, None, None),
TKind::Enum => (BindOp::Enum, PPrec::Tuple, None, None, None),
TKind::Fn => (BindOp::Fn, PPrec::Fn, None, Some(Prec::Body), None),
TKind::Mod => (BindOp::Mod, PPrec::Max, None, Some(Prec::Body), None),
TKind::Impl => (BindOp::Impl, PPrec::Max, None, Some(Prec::Body), None),
TKind::Bar => (BindOp::Match, PPrec::Alt, Some(TKind::FatArrow), Some(Prec::Body), None),
// no consume!
_ => return Ok((BindOp::Match, PPrec::Alt, Some(TKind::FatArrow), Some(Prec::Body), None)),
};
p.consume();
Ok(bk)
}
impl<'t> Parse<'t> for Bind {
type Prec = Option<BindOp>;
fn parse(p: &mut Parser<'t>, expected_level: Self::Prec) -> PResult<Self> {
// let
let (level, patp, arrow, bodyp, failp) = from_bind(p)?;
if let Some(expected) = expected_level
&& level != expected
{
Err(ParseError::NotMatch(level, expected, p.span()))?
}
// <T,*>
let generics = match p.next_if(TKind::Lt)? {
Ok(_) => p.list(vec![], (), TKind::Comma, TKind::Gt)?,
Err(_) => vec![],
};
// Pat
let pat = p.parse(patp)?;
let Some(bodyp) = bodyp else {
return Ok(Self(level, generics, pat, vec![]));
};
// `=>` for match, `=`? for everything else
if let Some(arrow) = arrow
&& p.next_if(arrow).allow_eof()?.is_none_or(|v| v.is_err())
{
return Ok(Self(level, generics, pat, vec![]));
}
// `=` Expr
let body = p.parse(bodyp.next())?;
let Some(failp) = failp else {
return Ok(Self(level, generics, pat, vec![body]));
};
// `else` Expr
if p.next_if(TKind::Else)
.allow_eof()?
.is_none_or(|v| v.is_err())
{
return Ok(Self(level, generics, pat, vec![body]));
}
let fail = p.parse(failp.next())?;
Ok(Self(level, generics, pat, vec![body, fail]))
}
}
impl<'t> Parse<'t> for MakeArm {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
let name = p
.next_if(TKind::Identifier)?
.map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?;
Ok(MakeArm(
name.lexeme.string().expect("Identifier should have String"),
p.opt_if(Prec::Body.value(), TKind::Colon)?,
))
}
}