Initial Commit
This commit is contained in:
516
src/parser.rs
Normal file
516
src/parser.rs
Normal file
@@ -0,0 +1,516 @@
|
||||
//! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes.
|
||||
use crate::{
|
||||
ast::*,
|
||||
lexer::{LexError, Lexer},
|
||||
span::Span,
|
||||
token::{TKind, Token},
|
||||
};
|
||||
use std::{error::Error, fmt::Display, vec};
|
||||
|
||||
pub mod numeric;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ParseError {
|
||||
FromLexer(LexError),
|
||||
Expected(TKind, Span),
|
||||
NotPattern(TKind, Span),
|
||||
NotPrefix(TKind, Span),
|
||||
NotInfix(TKind, Span),
|
||||
NotPostfix(TKind, Span),
|
||||
}
|
||||
impl Error for ParseError {}
|
||||
impl Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::FromLexer(e) => e.fmt(f),
|
||||
Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."),
|
||||
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
|
||||
Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."),
|
||||
Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."),
|
||||
Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type PResult<T> = Result<T, ParseError>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Parser<'t> {
|
||||
pub lexer: Lexer<'t>,
|
||||
pub next_tok: Option<Token>,
|
||||
pub last_loc: Span,
|
||||
}
|
||||
|
||||
impl<'t> Parser<'t> {
|
||||
/// Constructs a new Parser
|
||||
pub fn new(lexer: Lexer<'t>) -> Self {
|
||||
Self { lexer, next_tok: None, last_loc: Span::default() }
|
||||
}
|
||||
|
||||
/// The identity function. This exists to make production chaining easier.
|
||||
pub fn then<T>(&self, t: T) -> T {
|
||||
t
|
||||
}
|
||||
|
||||
pub fn span(&self) -> Span {
|
||||
self.last_loc
|
||||
}
|
||||
|
||||
/// Parses a value that implements the [Parse] trait.
|
||||
pub fn parse<T: Parse<'t>>(&mut self, level: usize) -> PResult<T> {
|
||||
Parse::parse(self, level)
|
||||
}
|
||||
|
||||
/// Peeks the next [Token]. Returns [ParseError::FromLexer] on lexer error.
|
||||
pub fn peek(&mut self) -> PResult<&Token> {
|
||||
let next_tok = match self.next_tok.take() {
|
||||
Some(tok) => tok,
|
||||
None => match self.lexer.scan() {
|
||||
Ok(tok) => tok,
|
||||
Err(e) => Err(ParseError::FromLexer(e))?,
|
||||
},
|
||||
};
|
||||
self.last_loc = next_tok.span;
|
||||
self.next_tok = Some(next_tok);
|
||||
Ok(self.next_tok.as_ref().expect("should have token"))
|
||||
}
|
||||
|
||||
/// Peeks the next token if it matches the `expected` [TKind]
|
||||
pub fn peek_if(&mut self, expected: TKind) -> Option<&Token> {
|
||||
self.peek().into_iter().find(|tok| tok.kind == expected)
|
||||
}
|
||||
|
||||
/// Consumes and returns the currently-peeked [Token].
|
||||
pub fn take(&mut self) -> Option<Token> {
|
||||
self.next_tok.take()
|
||||
}
|
||||
|
||||
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
|
||||
pub fn take_lexeme(&mut self) -> Option<String> {
|
||||
self.take().map(|tok| tok.lexeme)
|
||||
}
|
||||
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> PResult<Token> {
|
||||
self.peek()?;
|
||||
Ok(self.take().expect("should have token here"))
|
||||
}
|
||||
|
||||
/// Consumes and returns the next [Token] if it matches the `expected` [TKind]
|
||||
pub fn next_if(&mut self, expected: TKind) -> PResult<Token> {
|
||||
let token = self.peek()?;
|
||||
if token.kind == expected {
|
||||
Ok(self.take().expect("should have token here"))
|
||||
} else {
|
||||
Err(ParseError::Expected(expected, token.span))
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a list of P separated by `sep` tokens, ending in an `end` token.
|
||||
/// ```nobnf
|
||||
/// List<T> = (T `sep`)* T? `end` ;
|
||||
/// ```
|
||||
pub fn list<P: Parse<'t>>(
|
||||
&mut self,
|
||||
mut elems: Vec<P>,
|
||||
sep: TKind,
|
||||
end: TKind,
|
||||
) -> PResult<Vec<P>> {
|
||||
while self.peek_if(end).is_none() {
|
||||
elems.push(self.parse(0)?);
|
||||
if self.next_if(sep).is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.next_if(end)?;
|
||||
Ok(elems)
|
||||
}
|
||||
|
||||
/// Parses into an [`Option<P>`] if the next token is `next`
|
||||
pub fn opt_if<P: Parse<'t>>(&mut self, level: usize, next: TKind) -> PResult<Option<P>> {
|
||||
Ok(match self.next_if(next) {
|
||||
Ok(_) => Some(self.parse(level)?),
|
||||
Err(_) => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses an expression into a vec unless the next token is `end`
|
||||
pub fn opt<P: Parse<'t>>(&mut self, level: usize, end: TKind) -> PResult<Option<P>> {
|
||||
let out = match self.peek_if(end) {
|
||||
None => Some(self.parse(level)?),
|
||||
Some(_) => None,
|
||||
};
|
||||
self.next_if(end)?;
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// Consumes the currently peeked token without returning it.
|
||||
pub fn consume(&mut self) -> &mut Self {
|
||||
self.next_tok = None;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Parse<'t> {
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
|
||||
where Self: Sized;
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for Literal {
|
||||
fn parse(p: &mut Parser<'t>, _level: usize) -> PResult<Self> {
|
||||
let tok = p.peek()?;
|
||||
Ok(match tok.kind {
|
||||
TKind::True => p.consume().then(Literal::Bool(true)),
|
||||
TKind::False => p.consume().then(Literal::Bool(false)),
|
||||
TKind::Character => {
|
||||
Literal::Char(p.take_lexeme().expect("should have Token").remove(0))
|
||||
}
|
||||
TKind::Integer => {
|
||||
let Token { lexeme, kind: _, span } = p.take().expect("should have Token");
|
||||
// TODO: more complex int parsing
|
||||
let int = lexeme
|
||||
.parse()
|
||||
.map_err(|_| ParseError::Expected(TKind::Integer, span))?;
|
||||
Literal::Int(int)
|
||||
}
|
||||
TKind::String => Literal::Str(p.take_lexeme().expect("should have Token")),
|
||||
_ => Err(ParseError::Expected(TKind::Integer, tok.span))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for Pat {
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
|
||||
let tok = p.peek()?;
|
||||
match tok.kind {
|
||||
TKind::Comment => p.consume().parse(level),
|
||||
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
|
||||
Ok(Pat::Lit(p.parse(0)?))
|
||||
}
|
||||
TKind::Identifier => match tok.lexeme.as_str() {
|
||||
"_" => Ok(p.consume().then(Pat::Ignore)),
|
||||
_ => Ok(Pat::Name(p.take_lexeme().expect("should have Token"))),
|
||||
},
|
||||
TKind::Grave => Ok(Pat::MetId(p.consume().next_if(TKind::Identifier)?.lexeme)),
|
||||
TKind::DotDot => Ok(Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
|
||||
Some(_) => Some(p.parse(level)?),
|
||||
None => None,
|
||||
})),
|
||||
TKind::LParen => Ok(Pat::Tuple(p.consume().list(
|
||||
vec![],
|
||||
TKind::Comma,
|
||||
TKind::RParen,
|
||||
)?)),
|
||||
TKind::LBrack => Ok(Pat::Slice(p.consume().list(
|
||||
vec![],
|
||||
TKind::Comma,
|
||||
TKind::RBrack,
|
||||
)?)),
|
||||
_ => Err(ParseError::NotPattern(tok.kind, tok.span)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for MatchArm {
|
||||
fn parse(p: &mut Parser<'t>, _level: usize) -> PResult<Self> {
|
||||
p.next_if(TKind::Bar).ok();
|
||||
Ok(MatchArm(
|
||||
p.list(vec![], TKind::Bar, TKind::FatArrow)?,
|
||||
p.parse(0)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for MakeArm {
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
|
||||
Ok(MakeArm(p.next_if(TKind::Identifier)?.lexeme, {
|
||||
p.next_if(TKind::Colon)
|
||||
.ok()
|
||||
.map(|_| p.parse(level))
|
||||
.transpose()?
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
enum Prec {
|
||||
Min,
|
||||
Do,
|
||||
Assign,
|
||||
Tuple,
|
||||
Make,
|
||||
Body,
|
||||
Logical,
|
||||
LogOr,
|
||||
LogAnd,
|
||||
Compare,
|
||||
Range,
|
||||
Binary,
|
||||
Shift,
|
||||
Factor,
|
||||
Term,
|
||||
Project,
|
||||
Unary,
|
||||
Extend,
|
||||
Max,
|
||||
}
|
||||
|
||||
impl Prec {
|
||||
pub const MIN: usize = Prec::Min.value();
|
||||
pub const fn value(self) -> usize {
|
||||
self as usize * 2
|
||||
}
|
||||
pub const fn prev(self) -> usize {
|
||||
match self {
|
||||
Self::Assign => self.value() + 1,
|
||||
_ => self.value(),
|
||||
}
|
||||
}
|
||||
pub const fn next(self) -> usize {
|
||||
match self {
|
||||
Self::Assign => self.value(),
|
||||
_ => self.value() + 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_prefix(token: &Token) -> PResult<(Op, Prec)> {
|
||||
Ok(match token.kind {
|
||||
TKind::Do => (Op::Do, Prec::Do),
|
||||
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
|
||||
(Op::Lit, Prec::Max)
|
||||
}
|
||||
TKind::Identifier => (Op::Id, Prec::Max),
|
||||
TKind::Grave => (Op::Mid, Prec::Max),
|
||||
TKind::Fn => (Op::Fn, Prec::Body),
|
||||
|
||||
TKind::Match => (Op::Match, Prec::Body),
|
||||
TKind::Macro => (Op::Macro, Prec::Assign),
|
||||
TKind::Let => (Op::Let, Prec::Body),
|
||||
TKind::Const => (Op::Const, Prec::Body),
|
||||
TKind::Loop => (Op::Loop, Prec::Body),
|
||||
TKind::If => (Op::If, Prec::Body),
|
||||
TKind::While => (Op::While, Prec::Body),
|
||||
TKind::Break => (Op::Break, Prec::Body),
|
||||
TKind::Return => (Op::Return, Prec::Body),
|
||||
|
||||
TKind::LBrack => (Op::Array, Prec::Min),
|
||||
TKind::RBrack => (Op::End, Prec::Min),
|
||||
TKind::LCurly => (Op::Block, Prec::Min),
|
||||
TKind::RCurly => (Op::End, Prec::Min),
|
||||
TKind::LParen => (Op::Group, Prec::Min),
|
||||
TKind::RParen => (Op::End, Prec::Min),
|
||||
TKind::Amp => (Op::Refer, Prec::Max),
|
||||
// TKind::AmpAmp => todo!("addraddr"),
|
||||
TKind::Bang => (Op::Not, Prec::Unary),
|
||||
TKind::BangBang => (Op::Identity, Prec::Unary),
|
||||
TKind::Bar => (Op::Lambda, Prec::Min),
|
||||
TKind::BarBar => (Op::Lambda, Prec::Max),
|
||||
TKind::DotDot => (Op::RangeEx, Prec::Range),
|
||||
TKind::DotDotEq => (Op::RangeIn, Prec::Range),
|
||||
TKind::Minus => (Op::Neg, Prec::Unary),
|
||||
TKind::Plus => (Op::Identity, Prec::Unary),
|
||||
TKind::Star => (Op::Deref, Prec::Unary),
|
||||
|
||||
kind => Err(ParseError::NotPrefix(kind, token.span))?,
|
||||
})
|
||||
}
|
||||
|
||||
fn from_infix(token: &Token) -> PResult<(Op, Prec)> {
|
||||
Ok(match token.kind {
|
||||
TKind::Semi => (Op::Do, Prec::Do), // the inspiration
|
||||
TKind::RParen => (Op::End, Prec::Do),
|
||||
TKind::Comma => (Op::Tuple, Prec::Tuple),
|
||||
TKind::Eq => (Op::Set, Prec::Assign),
|
||||
TKind::XorXor => (Op::LogXor, Prec::Logical),
|
||||
TKind::AmpAmp => (Op::LogAnd, Prec::LogAnd),
|
||||
TKind::BarBar => (Op::LogOr, Prec::LogOr),
|
||||
TKind::Lt => (Op::Lt, Prec::Compare),
|
||||
TKind::LtEq => (Op::Leq, Prec::Compare),
|
||||
TKind::EqEq => (Op::Eq, Prec::Compare),
|
||||
TKind::BangEq => (Op::Neq, Prec::Compare),
|
||||
TKind::GtEq => (Op::Geq, Prec::Compare),
|
||||
TKind::Gt => (Op::Gt, Prec::Compare),
|
||||
TKind::DotDot => (Op::RangeEx, Prec::Range),
|
||||
TKind::DotDotEq => (Op::RangeIn, Prec::Range),
|
||||
TKind::Amp => (Op::And, Prec::Binary),
|
||||
TKind::Xor => (Op::Xor, Prec::Binary),
|
||||
TKind::Bar => (Op::Or, Prec::Binary),
|
||||
TKind::LtLt => (Op::Shl, Prec::Shift),
|
||||
TKind::GtGt => (Op::Shr, Prec::Shift),
|
||||
TKind::Plus => (Op::Add, Prec::Factor),
|
||||
TKind::Minus => (Op::Sub, Prec::Factor),
|
||||
TKind::Star => (Op::Mul, Prec::Term),
|
||||
TKind::Slash => (Op::Div, Prec::Term),
|
||||
TKind::Rem => (Op::Rem, Prec::Term),
|
||||
TKind::Dot => (Op::Dot, Prec::Project),
|
||||
TKind::ColonColon => (Op::Path, Prec::Max),
|
||||
kind => Err(ParseError::NotInfix(kind, token.span))?,
|
||||
})
|
||||
}
|
||||
|
||||
fn from_postfix(token: &Token) -> PResult<(Op, Prec)> {
|
||||
Ok(match token.kind {
|
||||
TKind::Question => (Op::Try, Prec::Unary),
|
||||
TKind::LParen => (Op::Call, Prec::Extend),
|
||||
TKind::LBrack => (Op::Index, Prec::Extend),
|
||||
TKind::LCurly => (Op::Make, Prec::Make),
|
||||
kind => Err(ParseError::NotPostfix(kind, token.span))?,
|
||||
})
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
fn should_coagulate(prev: Op, op: Op) -> bool {
|
||||
prev == op && (match prev {
|
||||
Op::Do => true,
|
||||
Op::Tuple => true,
|
||||
Op::Dot => false,
|
||||
Op::Path => true,
|
||||
Op::Lt => false,
|
||||
Op::Leq => false,
|
||||
Op::Eq => false,
|
||||
Op::Neq => false,
|
||||
Op::Geq => false,
|
||||
Op::Gt => false,
|
||||
_ => false,
|
||||
})
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for Expr {
|
||||
/// Parses an [Expr]ession.
|
||||
///
|
||||
/// The `level` parameter indicates the operator binding level of the expression.
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
|
||||
const MIN: usize = Prec::MIN;
|
||||
while p.next_if(TKind::Comment).is_ok() {}
|
||||
|
||||
// Prefix
|
||||
let tok = p.peek()?;
|
||||
let ((op, prec), span) = (from_prefix(tok)?, tok.span);
|
||||
|
||||
let mut head = match op {
|
||||
// Empty is returned when a block finisher is an expr prefix.
|
||||
// It's the only expr that doesn't consume.
|
||||
Op::End if level == Prec::Do.next() => Expr::Op(Op::Tuple, vec![]),
|
||||
Op::End => Err(ParseError::NotPrefix(tok.kind, span))?,
|
||||
|
||||
Op::Id => Expr::Id(p.take_lexeme().expect("should have ident")),
|
||||
Op::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme),
|
||||
Op::Lit => Expr::Lit(p.parse(MIN)?),
|
||||
Op::Let => Expr::Let(p.consume().parse(MIN)?, p.opt_if(prec.next(), TKind::Eq)?),
|
||||
Op::Const => Expr::Const(p.consume().parse(prec.next())?, {
|
||||
p.next_if(TKind::Eq)?;
|
||||
p.parse(prec.next())?
|
||||
}),
|
||||
Op::Macro => Expr::Op(
|
||||
op,
|
||||
vec![p.consume().parse(prec.next())?, {
|
||||
p.next_if(TKind::FatArrow)?;
|
||||
p.parse(prec.next())?
|
||||
}],
|
||||
),
|
||||
Op::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, {
|
||||
p.next_if(TKind::LCurly)?;
|
||||
p.list(vec![], TKind::Comma, TKind::RCurly)?
|
||||
}),
|
||||
Op::Block => Expr::Op(
|
||||
op,
|
||||
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
|
||||
),
|
||||
Op::Array => Expr::Op(op, p.consume().list(vec![], TKind::Comma, TKind::RBrack)?),
|
||||
Op::Group => match p.consume().opt(MIN, TKind::RParen)? {
|
||||
Some(value) => Expr::Op(Op::Group, vec![value]),
|
||||
None => Expr::Op(Op::Tuple, vec![]),
|
||||
},
|
||||
Op::If | Op::While => {
|
||||
p.consume();
|
||||
let exprs = vec![
|
||||
// conditional restricted to Logical operators or above
|
||||
p.parse(Prec::Logical.value())?,
|
||||
p.parse(prec.next())?,
|
||||
match p.peek() {
|
||||
Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?,
|
||||
_ => Expr::Op(Op::End, vec![]).anno(span.merge(p.span())),
|
||||
},
|
||||
];
|
||||
Expr::Op(op, exprs)
|
||||
}
|
||||
Op::Fn => {
|
||||
p.consume().next_if(TKind::LParen)?;
|
||||
Expr::Fn(
|
||||
p.list(vec![], TKind::Comma, TKind::RParen)?,
|
||||
p.parse(prec.next())?,
|
||||
)
|
||||
}
|
||||
// dirty hack: There are two closure operators, signaled by returned prec.
|
||||
Op::Lambda if prec == Prec::Min => Expr::Fn(
|
||||
p.consume().list(vec![], TKind::Comma, TKind::Bar)?,
|
||||
p.parse(Prec::Body.next())?,
|
||||
),
|
||||
Op::Lambda => Expr::Fn(vec![], p.consume().parse(Prec::Body.next())?),
|
||||
|
||||
_ => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
|
||||
};
|
||||
|
||||
// Postfix
|
||||
while let Ok(tok) = p.peek()
|
||||
&& let Ok((op, prec)) = from_postfix(tok)
|
||||
&& level <= prec.prev()
|
||||
&& op != Op::End
|
||||
{
|
||||
let span = span.merge(p.span());
|
||||
p.consume();
|
||||
head = match op {
|
||||
Op::Make => Expr::Make(
|
||||
head.anno(span).into(),
|
||||
p.consume().list(vec![], TKind::Comma, TKind::RCurly)?,
|
||||
),
|
||||
Op::Index => Expr::Op(
|
||||
op,
|
||||
p.list(vec![head.anno(span)], TKind::Comma, TKind::RBrack)?,
|
||||
),
|
||||
Op::Call => Expr::Op(
|
||||
op,
|
||||
p.list(vec![head.anno(span)], TKind::Comma, TKind::RParen)?,
|
||||
),
|
||||
_ => Expr::Op(op, vec![head.anno(span)]),
|
||||
};
|
||||
}
|
||||
|
||||
// Infix
|
||||
while let Ok(tok) = p.peek()
|
||||
&& let Ok((op, prec)) = from_infix(tok)
|
||||
&& level <= prec.prev()
|
||||
&& op != Op::End
|
||||
{
|
||||
let span = span.merge(p.span());
|
||||
p.consume();
|
||||
|
||||
head = match head {
|
||||
// controls expression chaining vs coagulating
|
||||
Expr::Op(prev, mut args) if should_coagulate(prev, op) => {
|
||||
args.push(p.parse(prec.next())?);
|
||||
Expr::Op(op, args)
|
||||
}
|
||||
head => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(head)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno<P> {
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
|
||||
where Self: Sized {
|
||||
let start = p.span();
|
||||
Ok(Anno(p.parse(level)?, start.merge(p.span())))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, P: Parse<'t>> Parse<'t> for Box<P> {
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
|
||||
where Self: Sized {
|
||||
Ok(Box::new(p.parse(level)?))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user