doughlang: Preserve errors through entire pipeline

lexer:
- Un-stringify errors
- Reserve more words
- Doc the comments

parser:
- MASSIVE changes to peek, peek_if, next_if, consume_if=>expect.
- Keep track of when EOF is allowable
- TKind is stupidly cheap with >100 niches, so we can fit like 4 of them in a single ParseError lmao
- TODO: make sure EOF/UnexpectedEOF propagation is correct. It seems... Kinda Not correct.
- Add meta-expressions
This commit is contained in:
2025-10-17 06:25:11 -04:00
parent c8f1f082c4
commit 6368e68941
6 changed files with 543 additions and 351 deletions

View File

@@ -23,7 +23,7 @@ impl<T: Clone + std::fmt::Debug + std::fmt::Display + PartialEq + Eq> Annotation
pub struct FqPath {
// TODO: Identifier interning
pub parts: Vec<String>,
// TODO:
// TODO: generic parameters
}
impl From<&str> for FqPath {
@@ -251,6 +251,7 @@ pub enum Op {
ArRep, // [ Expr ; Expr ]
Group, // ( Expr ,?)
Tuple, // Expr (, Expr)*
Meta, // #[ Expr ]
Try, // Expr '?'
Index, // Expr [ Expr,* ]
@@ -298,7 +299,17 @@ pub enum Op {
LogXor, // Expr ^^ Expr
LogOr, // Expr || Expr
Set, // Expr = Expr
Set, // Expr = Expr
MulSet, // Expr *= Expr
DivSet, // Expr /= Expr
RemSet, // Expr %= Expr
AddSet, // Expr += Expr
SubSet, // Expr -= Expr
ShlSet, // Expr <<= Expr
ShrSet, // Expr >>= Expr
AndSet, // Expr &= Expr
XorSet, // Expr ^= Expr
OrSet, // Expr |= Expr
}
use crate::{fmt::FmtAdapter, span::Span};
@@ -381,19 +392,19 @@ impl<A: Annotation> Display for Mod<A> {
impl Display for Typedef {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(kind, pat) = self;
let kind = match kind {
TypedefKind::Alias => "type",
TypedefKind::Struct => "struct",
TypedefKind::Enum => "enum",
};
f.write_str(match kind {
TypedefKind::Alias => "type ",
TypedefKind::Struct => "struct ",
TypedefKind::Enum => "enum ",
})?;
match pat {
Pat::Struct(name, bind) => match bind.as_ref() {
Pat::Op(PatOp::Tuple, parts) => f
.delimit_indented(fmt!("{kind} {name} {{"), "}")
.delimit_indented(fmt!("{name} {{"), "}")
.list_wrap("\n", parts, ",\n", ",\n"),
other => write!(f, "{name} {{ {other} }}"),
},
_ => write!(f, "{kind} {pat}"),
_ => pat.fmt(f),
}
}
}
@@ -426,6 +437,10 @@ impl<A: Annotation> Display for Expr<A> {
.list_wrap("\n", exprs, "\n", "\n"),
Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "),
Self::Op(Op::Group, exprs) => f.list(exprs, ", "),
Self::Op(Op::Meta, exprs) => match exprs.as_slice() {
[meta, expr @ ..] => f.delimit(fmt!("#[{meta}]\n"), "").list(expr, ","),
[] => write!(f, "#[]"),
},
Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() {
[callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "),
@@ -436,7 +451,7 @@ impl<A: Annotation> Display for Expr<A> {
[] => write!(f, "{op}"),
},
Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"),
Self::Op(op @ Op::Do, exprs) => f.list(exprs, op),
Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "),
Self::Op(op @ Op::Try, exprs) => f.delimit("(", fmt!("){op}")).list(exprs, ", "),
Self::Op(op, exprs) => match exprs.as_slice() {
@@ -449,53 +464,64 @@ impl<A: Annotation> Display for Expr<A> {
impl Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Op::Do => "; ".fmt(f),
Op::As => " as ".fmt(f),
Op::Macro => "macro ".fmt(f),
Op::Block => "{}".fmt(f),
Op::Array => "[]".fmt(f),
Op::ArRep => "[;]".fmt(f),
Op::Group => "()".fmt(f),
Op::Tuple => "()".fmt(f),
Op::Try => "?".fmt(f),
Op::Index => "".fmt(f),
Op::Call => "".fmt(f),
Op::Pub => "pub ".fmt(f),
Op::Loop => "loop ".fmt(f),
Op::If => "if ".fmt(f),
Op::While => "while ".fmt(f),
Op::Break => "break ".fmt(f),
Op::Return => "return ".fmt(f),
Op::Dot => ".".fmt(f),
Op::RangeEx => "..".fmt(f),
Op::RangeIn => "..=".fmt(f),
Op::Neg => "-".fmt(f),
Op::Not => "!".fmt(f),
Op::Identity => "!!".fmt(f),
Op::Refer => "&".fmt(f),
Op::Deref => "*".fmt(f),
Op::Mul => " * ".fmt(f),
Op::Div => " / ".fmt(f),
Op::Rem => " % ".fmt(f),
Op::Add => " + ".fmt(f),
Op::Sub => " - ".fmt(f),
Op::Shl => " << ".fmt(f),
Op::Shr => " >> ".fmt(f),
Op::And => " & ".fmt(f),
Op::Xor => " ^ ".fmt(f),
Op::Or => " | ".fmt(f),
Op::Lt => " < ".fmt(f),
Op::Leq => " <= ".fmt(f),
Op::Eq => " == ".fmt(f),
Op::Neq => " != ".fmt(f),
Op::Geq => " >= ".fmt(f),
Op::Gt => " > ".fmt(f),
Op::LogAnd => " && ".fmt(f),
Op::LogXor => " ^^ ".fmt(f),
Op::LogOr => " || ".fmt(f),
Op::Set => " = ".fmt(f),
}
f.write_str(match self {
Op::Do => "; ",
Op::As => " as ",
Op::Macro => "macro ",
Op::Block => "{}",
Op::Array => "[]",
Op::ArRep => "[;]",
Op::Group => "()",
Op::Tuple => "()",
Op::Meta => "#[]",
Op::Try => "?",
Op::Index => "",
Op::Call => "",
Op::Pub => "pub ",
Op::Loop => "loop ",
Op::If => "if ",
Op::While => "while ",
Op::Break => "break ",
Op::Return => "return ",
Op::Dot => ".",
Op::RangeEx => "..",
Op::RangeIn => "..=",
Op::Neg => "-",
Op::Not => "!",
Op::Identity => "!!",
Op::Refer => "&",
Op::Deref => "*",
Op::Mul => " * ",
Op::Div => " / ",
Op::Rem => " % ",
Op::Add => " + ",
Op::Sub => " - ",
Op::Shl => " << ",
Op::Shr => " >> ",
Op::And => " & ",
Op::Xor => " ^ ",
Op::Or => " | ",
Op::Lt => " < ",
Op::Leq => " <= ",
Op::Eq => " == ",
Op::Neq => " != ",
Op::Geq => " >= ",
Op::Gt => " > ",
Op::LogAnd => " && ",
Op::LogXor => " ^^ ",
Op::LogOr => " || ",
Op::Set => " = ",
Op::MulSet => " *= ",
Op::DivSet => " /= ",
Op::RemSet => " %= ",
Op::AddSet => " += ",
Op::SubSet => " -= ",
Op::ShlSet => " <<= ",
Op::ShrSet => " >>= ",
Op::AndSet => " &= ",
Op::XorSet => " ^= ",
Op::OrSet => " |= ",
})
}
}

View File

@@ -79,6 +79,11 @@ impl<'f, F: Write + ?Sized> Indent<'f, F> {
pub fn new(f: &'f mut F, indent: &'static str) -> Self {
Indent { f, needs_indent: false, indent }
}
/// Gets mutable access to the inner [Write]-adapter
pub fn inner(&mut self) -> &mut F {
self.f
}
}
impl<F: Write + ?Sized> Write for Indent<'_, F> {
@@ -103,10 +108,18 @@ impl<F: Write + ?Sized> Write for Indent<'_, F> {
/// Prints delimiters around anything formatted with this. Implies [Indent]
pub struct Delimit<'f, F: Write + ?Sized, E: Display = &'static str> {
f: &'f mut F,
/// The formatter
pub f: &'f mut F,
close: E,
}
impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> {
/// Gets mutable access to the inner [Write]-adapter
pub fn inner(&mut self) -> &mut F {
self.f
}
}
impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> {
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
let _ = write!(f, "{open}");
@@ -133,6 +146,13 @@ pub struct DelimitIndent<'f, F: Write + ?Sized, E: Display = &'static str> {
close: E,
}
impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> {
/// Gets mutable access to the inner [Write]-adapter
pub fn inner(&mut self) -> &mut F {
self.f.inner()
}
}
impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> {
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
let mut f = f.indent();

View File

@@ -8,9 +8,10 @@ use crate::{span::Span, token::*};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LexError {
pub pos: u32,
pub res: &'static str,
pub pos: Span,
pub res: LexFailure,
}
impl std::error::Error for LexError {}
impl std::fmt::Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -19,6 +20,44 @@ impl std::fmt::Display for LexError {
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum LexFailure {
/// Reached end of file
EOF,
UnexpectedEOF,
Unexpected(char),
UnterminatedBlockComment,
UnterminatedCharacter,
UnterminatedString,
UnterminatedUnicodeEscape,
InvalidUnicodeEscape(u32),
InvalidDigitForBase(char, u32),
IntegerOverflow,
}
use LexFailure::*;
pub use LexFailure::{EOF, UnexpectedEOF};
impl std::fmt::Display for LexFailure {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EOF => "EOF".fmt(f),
Self::UnexpectedEOF => "Unexpected EOF".fmt(f),
Self::Unexpected(c) => write!(f, "Character '{c:?}'"),
Self::UnterminatedBlockComment => "Unterminated Block Comment".fmt(f),
Self::UnterminatedCharacter => "Unterminated Character".fmt(f),
Self::UnterminatedString => "Unterminated String".fmt(f),
Self::UnterminatedUnicodeEscape => "Unterminated Unicode Escape".fmt(f),
Self::InvalidUnicodeEscape(hex) => {
write!(f, "'\\u{{{hex:x}}}' is not a valid UTF-8 codepoint")
}
Self::InvalidDigitForBase(digit, base) => {
write!(f, "Invalid digit {digit} for base {base}")
}
Self::IntegerOverflow => "Integer literal does not fit in 128 bits".fmt(f),
}
}
}
#[derive(Clone, Debug)]
pub struct Lexer<'t> {
/// The source text
@@ -72,8 +111,8 @@ impl<'t> Lexer<'t> {
}
/// Produces a LexError at the start of the current token
fn error(&self, res: &'static str) -> LexError {
LexError { pos: self.head, res }
fn error(&self, res: LexFailure) -> LexError {
LexError { pos: Span(self.head, self.tail), res }
}
/// Gets the Lexer's current &[str] lexeme and [Span]
@@ -118,7 +157,7 @@ impl<'t> Lexer<'t> {
.skip_whitespace()
.start_token()
.peek()
.ok_or_else(|| self.error("EOF"))?
.ok_or_else(|| self.error(EOF))?
{
'!' => Bang,
'"' => return self.string(),
@@ -154,7 +193,7 @@ impl<'t> Lexer<'t> {
'~' => Tilde,
'_' => return self.identifier(),
c if is_xid_start(c) => return self.identifier(),
_ => Err(self.error("Invalid"))?,
c => Err(self.error(Unexpected(c)))?,
};
// Handle digraphs
@@ -217,8 +256,12 @@ impl<'t> Lexer<'t> {
/// Consumes characters until the lexer reaches a newline `'\n'`
pub fn line_comment(&mut self) -> Result<Token, LexError> {
let kind = match self.consume().peek() {
Some('!' | '/') => TKind::Doc,
_ => TKind::Comment,
};
while self.consume().peek().is_some_and(|c| c != '\n') {}
Ok(self.produce(TKind::Comment))
Ok(self.produce(kind))
}
/// Consumes characters until the lexer reaches the end of a *nested* block comment.
@@ -232,7 +275,7 @@ impl<'t> Lexer<'t> {
_ => continue,
};
}
Err(self.error("Unterminated block comment"))
Err(self.error(UnterminatedBlockComment))
}
/// Consumes characters until it reaches a character not in [is_xid_continue].
@@ -257,6 +300,7 @@ impl<'t> Lexer<'t> {
"fn" => TKind::Fn,
"for" => TKind::For,
"if" => TKind::If,
"impl" => TKind::Impl,
"in" => TKind::In,
"let" => TKind::Let,
"loop" => TKind::Loop,
@@ -266,6 +310,7 @@ impl<'t> Lexer<'t> {
"or" => TKind::Or,
"pub" => TKind::Public,
"return" => TKind::Return,
"static" => TKind::Const, // TODO: Static
"struct" => TKind::Struct,
"then" => TKind::Do,
"true" => TKind::True,
@@ -286,7 +331,7 @@ impl<'t> Lexer<'t> {
if self.take().is_some_and(|c| c == '\'') {
Ok(self.produce_with_lexeme(TKind::Character, Lexeme::Char(c)))
} else {
Err(self.error("Unterminated character"))
Err(self.error(UnterminatedCharacter))
}
}
@@ -296,7 +341,7 @@ impl<'t> Lexer<'t> {
self.consume();
loop {
lexeme.push(match self.take() {
None => Err(self.error("Unterminated string"))?,
None => Err(self.error(UnterminatedString))?,
Some('\\') => self.escape()?,
Some('"') => break,
Some(c) => c,
@@ -308,40 +353,44 @@ impl<'t> Lexer<'t> {
/// Parses a single escape sequence into its resulting char value.
pub fn escape(&mut self) -> Result<char, LexError> {
Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
' ' => '\u{a0}', // Non-breaking space
'0' => '\0', // C0 Null Character
'a' => '\x07', // C0 Acknowledge
'b' => '\x08', // C0 Bell
'e' => '\x1b', // C0 Escape
'f' => '\x0c', // Form Feed
'n' => '\n', // New Line
'r' => '\r', // Carriage Return
't' => '\t', // Tab
'u' => self.unicode_escape()?,
'x' => self.hex_escape()?,
c => c,
})
Ok(
match self.take().ok_or_else(|| self.error(UnexpectedEOF))? {
' ' => '\u{a0}', // Non-breaking space
'0' => '\0', // C0 Null Character
'a' => '\x07', // C0 Acknowledge
'b' => '\x08', // C0 Bell
'e' => '\x1b', // C0 Escape
'f' => '\x0c', // Form Feed
'n' => '\n', // New Line
'r' => '\r', // Carriage Return
't' => '\t', // Tab
'u' => self.unicode_escape()?,
'x' => self.hex_escape()?,
c => c,
},
)
}
/// Parses two hex-digits and constructs a [char] out of them.
pub fn hex_escape(&mut self) -> Result<char, LexError> {
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
char::from_u32(out).ok_or(self.error("Invalid digit"))
char::from_u32(out).ok_or(self.error(InvalidUnicodeEscape(out)))
}
/// Parses a sequence of `{}`-bracketed hex-digits and constructs a [char] out of them.
pub fn unicode_escape(&mut self) -> Result<char, LexError> {
self.next_if('{')
.ok_or_else(|| self.error("No unicode escape opener"))?;
.ok_or_else(|| self.error(UnterminatedUnicodeEscape))?;
let mut out = 0;
while let Some(c) = self.take() {
if c == '}' {
return char::from_u32(out).ok_or_else(|| self.error("Bad unicode value"));
return char::from_u32(out).ok_or_else(|| self.error(InvalidUnicodeEscape(out)));
}
out = out * 16 + c.to_digit(16).ok_or_else(|| self.error("Invalid digit"))?;
out = out * 16
+ c.to_digit(16)
.ok_or_else(|| self.error(InvalidDigitForBase(c, 16)))?;
}
Err(self.error("Unterminated unicode escape"))
Err(self.error(UnterminatedUnicodeEscape))
}
/// Parses a sequence of digits (and underscores) in base `BASE`, where 2 <= `BASE` <= 36.
@@ -353,7 +402,10 @@ impl<'t> Lexer<'t> {
while let Some(c) = self.peek() {
int = match c.to_digit(BASE).ok_or(c) {
Err('_') => int,
Ok(c) => int.wrapping_mul(BASE as _).wrapping_add(c as _),
Ok(c) => int
.checked_mul(BASE as _)
.and_then(|int| int.checked_add(c as _))
.ok_or_else(|| self.error(IntegerOverflow))?,
_ => break,
};
self.consume();
@@ -362,12 +414,13 @@ impl<'t> Lexer<'t> {
Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
}
/// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36
/// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36.
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
let digit = self.take().ok_or_else(|| self.error(UnexpectedEOF))?;
if let Some(digit) = digit.to_digit(BASE) {
Ok(digit)
} else {
Err(self.error("Invalid digit"))
Err(self.error(InvalidDigitForBase(digit, BASE)))
}
}
}

View File

@@ -9,7 +9,7 @@ use doughlang::{
Expr,
macro_matcher::{Match, Subst},
},
lexer::{LexError, Lexer},
lexer::{EOF, LexError, Lexer},
parser::{ParseError, Parser},
span::Span,
token::{TKind, Token},
@@ -73,7 +73,7 @@ fn lex() -> Result<(), Box<dyn Error>> {
}
loop {
match lexer.scan() {
Err(LexError { res: "EOF", .. }) => {
Err(LexError { res: EOF, .. }) => {
break Ok(Response::Accept);
}
Err(e) => {
@@ -97,7 +97,7 @@ fn exprs() -> Result<(), Box<dyn Error>> {
}
for idx in 0.. {
match parser.parse::<Anno<Expr>>(0) {
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => {
Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
return Ok(Response::Accept);
}
Err(e) => {
@@ -120,7 +120,7 @@ fn pats() -> Result<(), Box<dyn Error>> {
}
loop {
match parser.parse::<Pat>(PPrec::Min) {
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => {
Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
break Ok(Response::Accept);
}
Err(e) => {
@@ -142,7 +142,7 @@ fn tys() -> Result<(), Box<dyn Error>> {
}
loop {
match parser.parse::<Ty>(()) {
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => {
Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
break Ok(Response::Accept);
}
Err(e) => {
@@ -205,16 +205,25 @@ fn subst() -> Result<(), Box<dyn Error>> {
fn parse(document: &str) {
let mut parser = Parser::new(Lexer::new(document));
let isatty = std::io::stdin().is_terminal();
for idx in 0.. {
match parser.parse::<Expr>(0) {
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break,
Err(e) => {
println!("\x1b[31m{e}\x1b[0m");
Err(e @ ParseError::EOF(s)) if s.tail == document.len() as _ => {
println!("\x1b[92m{e} (total {} bytes)\x1b[0m", document.len());
break;
}
Ok(v) => {
Err(e @ ParseError::EOF(_)) => {
println!("\x1b[93m{e} (total {} bytes)\x1b[0m", document.len());
break;
}
Err(e) => {
println!("\x1b[91m{e}\x1b[0m");
break;
}
Ok(v) if isatty => {
println!("\x1b[{}m{v}", (idx + 5) % 6 + 31);
}
_ => {}
}
}
}

View File

@@ -1,7 +1,7 @@
//! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes.
use crate::{
ast::*,
lexer::{LexError, Lexer},
lexer::{LexError, LexFailure, Lexer},
span::Span,
token::{Lexeme, TKind, Token},
};
@@ -9,8 +9,12 @@ use std::{error::Error, fmt::Display, vec};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseError {
/// Reached the expected end of input.
EOF(Span),
/// Unexpectedly reached end of input.
UnexpectedEOF(Span),
FromLexer(LexError),
Expected(TKind, Span),
Expected(TKind, TKind, Span),
NotLiteral(TKind, Span),
NotPattern(TKind, Span),
NotType(TKind, Span),
@@ -19,12 +23,16 @@ pub enum ParseError {
NotPostfix(TKind, Span),
}
pub use ParseError::EOF;
impl Error for ParseError {}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EOF(loc) => write!(f, "{loc}: Reached end of input."),
Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."),
Self::FromLexer(e) => e.fmt(f),
Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."),
Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."),
Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."),
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."),
@@ -37,10 +45,36 @@ impl Display for ParseError {
pub type PResult<T> = Result<T, ParseError>;
trait PResultExt<T> {
fn no_eof(self) -> PResult<T>;
fn allow_eof(self) -> PResult<Option<T>>;
}
impl<T> PResultExt<T> for PResult<T> {
fn no_eof(self) -> Self {
match self {
Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)),
other => other,
}
}
fn allow_eof(self) -> PResult<Option<T>> {
match self {
Ok(t) => Ok(Some(t)),
Err(ParseError::EOF(_)) => Ok(None),
Err(e) => Err(e),
}
}
}
/// Opens a scope where [ParseError::EOF] is unexpected (See [PResultExt::no_eof])
fn no_eof<T>(f: impl FnOnce() -> PResult<T>) -> PResult<T> {
f().no_eof()
}
#[derive(Debug)]
pub struct Parser<'t> {
pub lexer: Lexer<'t>,
pub next_tok: Option<Token>,
pub next_tok: Option<PResult<Token>>,
pub last_loc: Span,
pub elide_do: bool,
}
@@ -72,46 +106,59 @@ impl<'t> Parser<'t> {
None => loop {
match self.lexer.scan() {
Ok(Token { kind: TKind::Comment, .. }) => {}
Ok(tok) => break tok,
Err(e) => Err(ParseError::FromLexer(e))?,
Ok(tok) => break Ok(tok),
Err(LexError { pos, res: LexFailure::EOF }) => Err(ParseError::EOF(pos))?,
Err(e) => break Err(ParseError::FromLexer(e)),
}
},
};
self.last_loc = next_tok.span;
self.next_tok = Some(next_tok);
Ok(self.next_tok.as_ref().expect("should have token"))
let next_tok = self.next_tok.as_ref().expect("should have Some lex result");
if let Ok(tok) = next_tok {
self.last_loc = tok.span;
}
next_tok.as_ref().map_err(|e| *e)
}
/// Peeks the next token if it matches the `expected` [TKind]
pub fn peek_if(&mut self, expected: TKind) -> Option<&Token> {
self.peek().into_iter().find(|tok| tok.kind == expected)
pub fn peek_if(&mut self, expected: TKind) -> PResult<Option<&Token>> {
match self.peek() {
Ok(tok) if tok.kind == expected => Ok(Some(tok)),
Ok(_) => Ok(None),
Err(e) => Err(e),
}
}
/// Consumes and returns the currently-peeked [Token].
pub fn take(&mut self) -> Option<Token> {
let tok = self.next_tok.take();
self.elide_do = matches!(tok, Some(Token { kind: TKind::RCurly, .. }));
pub fn take(&mut self) -> PResult<Token> {
let tok = self
.next_tok
.take()
.unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc)));
self.elide_do = matches!(tok, Ok(Token { kind: TKind::RCurly | TKind::Semi, .. }));
tok
}
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
pub fn take_lexeme(&mut self) -> Option<Lexeme> {
pub fn take_lexeme(&mut self) -> PResult<Lexeme> {
self.take().map(|tok| tok.lexeme)
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> PResult<Token> {
self.peek()?;
self.peek().no_eof()?;
Ok(self.take().expect("should have token here"))
}
/// Consumes and returns the next [Token] if it matches the `expected` [TKind]
pub fn next_if(&mut self, expected: TKind) -> PResult<Token> {
let token = self.peek()?;
if token.kind == expected {
Ok(self.take().expect("should have token here"))
} else {
Err(ParseError::Expected(expected, token.span))
pub fn next_if(&mut self, expected: TKind) -> PResult<Result<Token, TKind>> {
match self.peek() {
Ok(t) if t.kind == expected => self.take().map(Ok),
Ok(t) => Ok(Err(t.kind)),
Err(e) => Err(e),
}
}
@@ -127,13 +174,15 @@ impl<'t> Parser<'t> {
end: TKind,
) -> PResult<Vec<P>> {
// TODO: This loses lexer errors
while self.peek_if(end).is_none() {
elems.push(self.parse(level.clone())?);
if self.next_if(sep).is_err() {
break;
}
while self.peek_if(end).no_eof()?.is_none() {
elems.push(self.parse(level.clone()).no_eof()?);
match self.peek_if(sep)? {
Some(_) => self.consume(),
None => break,
};
}
self.next_if(end)?;
self.next_if(end)?
.map_err(|tk| ParseError::Expected(end, tk, self.span()))?;
Ok(elems)
}
@@ -148,33 +197,37 @@ impl<'t> Parser<'t> {
sep: TKind,
) -> PResult<Vec<P>> {
loop {
elems.push(self.parse(level.clone())?);
if self.next_if(sep).is_err() {
break Ok(elems);
}
let elem = self.parse(level.clone()).no_eof()?;
elems.push(elem);
match self.peek_if(sep) {
Ok(Some(_)) => self.consume(),
Ok(None) | Err(ParseError::EOF(_)) => break Ok(elems),
Err(e) => Err(e)?,
};
}
}
/// Parses into an [`Option<P>`] if the next token is `next`
pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> {
Ok(match self.next_if(next) {
Ok(_) => Some(self.parse(level)?),
Ok(match self.next_if(next)? {
Ok(_) => Some(self.parse(level).no_eof()?),
Err(_) => None,
})
}
/// Parses a P unless the next token is `end`
pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
let out = match self.peek_if(end) {
None => Some(self.parse(level)?),
let out = match self.peek_if(end)? {
None => Some(self.parse(level).no_eof()?),
Some(_) => None,
};
self.next_if(end)?;
self.expect(end)?;
Ok(out)
}
pub fn consume_if(&mut self, next: TKind) -> PResult<&mut Self> {
self.next_if(next)?;
pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> {
self.next_if(next)?
.map_err(|tk| ParseError::Expected(next, tk, self.span()))?;
Ok(self)
}
@@ -197,17 +250,12 @@ impl<'t> Parse<'t> for FqPath {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let mut parts = vec![];
if p.next_if(TKind::ColonColon).is_ok() {
if p.next_if(TKind::ColonColon)?.is_ok() {
parts.push("".into()); // the "root"
}
loop {
parts.push(
p.next_if(TKind::Identifier)?
.lexeme
.string()
.expect("Identifier should have String"),
);
if p.next_if(TKind::ColonColon).is_err() {
while let Ok(id) = p.next_if(TKind::Identifier)? {
parts.push(id.lexeme.string().expect("Identifier should have String"));
if let None | Some(Err(_)) = p.next_if(TKind::ColonColon).allow_eof()? {
break;
}
}
@@ -223,26 +271,19 @@ impl<'t> Parse<'t> for Literal {
Ok(match tok.kind {
TKind::True => p.consume().then(Literal::Bool(true)),
TKind::False => p.consume().then(Literal::Bool(false)),
TKind::Character => Literal::Char(
p.take_lexeme()
.expect("should have Token")
.char()
.expect("should have one char in char literal"),
),
TKind::Integer => {
let Token { lexeme, span, .. } = p.take().expect("should have Token");
let Lexeme::Integer(int, _) = lexeme else {
Err(ParseError::Expected(TKind::Integer, span))?
};
Literal::Int(int)
}
TKind::String => Literal::Str({
let Token { lexeme, span, .. } = p.take().expect("should have Token");
lexeme
.string()
.ok_or(ParseError::Expected(TKind::String, span))?
TKind::Character => Literal::Char({
let Token { lexeme, .. } = p.take().expect("should have Token");
lexeme.char().expect("char token should have char")
}),
_ => Err(ParseError::Expected(TKind::Integer, tok.span))?,
TKind::Integer => Literal::Int({
let Token { lexeme, .. } = p.take().expect("should have Token");
lexeme.int().expect("integer token should have int")
}),
TKind::String => Literal::Str({
let Token { lexeme, .. } = p.take().expect("should have Token");
lexeme.string().expect("string token should have string")
}),
other => Err(ParseError::NotLiteral(other, tok.span))?,
})
}
}
@@ -288,6 +329,7 @@ fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> {
impl<'t> Parse<'t> for Pat {
type Prec = PPrec;
fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> {
let tok = p.peek()?;
@@ -315,12 +357,10 @@ impl<'t> Parse<'t> for Pat {
.opt(PPrec::Alt, TKind::RCurly)?
.unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))),
),
Ok(_) | Err(ParseError::FromLexer(LexError { pos: _, res: "EOF" })) => {
match path.parts.len() {
1 => Self::Name(path.parts.pop().expect("name has 1 part")),
_ => Self::Path(path),
}
}
Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() {
1 => Self::Name(path.parts.pop().expect("name has 1 part")),
_ => Self::Path(path),
},
Err(e) => Err(e)?,
}
}
@@ -329,21 +369,21 @@ impl<'t> Parse<'t> for Pat {
TKind::DotDot => Pat::Op(
PatOp::Rest,
// Identifier in Rest position always becomes binder
match p.consume().peek()?.kind {
TKind::Identifier => vec![Pat::Name(
match p.consume().peek().allow_eof()?.map(Token::kind) {
Some(TKind::Identifier) => vec![Pat::Name(
p.take_lexeme()
.expect("should have lexeme")
.string()
.expect("should be string"),
)],
TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?],
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![],
},
),
TKind::DotDotEq => Pat::Op(
PatOp::RangeIn,
match p.consume().peek()?.kind {
TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?],
match p.consume().peek().allow_eof()?.map(Token::kind) {
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![],
},
),
@@ -360,14 +400,14 @@ impl<'t> Parse<'t> for Pat {
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
};
while let Ok(tok) = p.peek()
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Some((op, prec)) = pat_from_infix(tok)
&& level <= prec
{
let kind = tok.kind;
head = match op {
PatPs::Typed => Pat::Typed(head.into(), p.consume().parse(())?),
PatPs::Op(op @ PatOp::RangeEx) => Pat::Op(
PatPs::Op(op @ (PatOp::RangeEx | PatOp::RangeIn)) => Pat::Op(
op,
match p.consume().peek().map(|t| t.kind) {
Ok(TKind::Integer | TKind::Character | TKind::Identifier) => {
@@ -379,7 +419,6 @@ impl<'t> Parse<'t> for Pat {
PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?),
}
}
Ok(head)
}
}
@@ -389,10 +428,11 @@ impl<'t> Parse<'t> for Ty {
fn parse(p: &mut Parser<'t>, level: Self::Prec) -> PResult<Self>
where Self: Sized {
let tok = p.peek()?;
let &Token { kind, span, .. } = p.peek()?;
let head = match tok.kind {
TKind::Identifier => match tok.lexeme.str() {
// TODO: this is a kinda jank way of error reporting
let head = match kind {
TKind::Identifier => match p.peek()?.lexeme.str() {
Some("_") => p.consume().then(Ty::Infer),
_ => Ty::Named(p.parse(())?),
},
@@ -403,7 +443,7 @@ impl<'t> Parse<'t> for Ty {
match p.next()? {
Token { kind: TKind::Semi, .. } => {
let ty = Ty::Array(ty, p.parse(Prec::Binary.next())?);
p.next_if(TKind::RBrack)?;
p.expect(TKind::RBrack)?;
ty
}
Token { kind: TKind::RBrack, .. } => Ty::Slice(ty),
@@ -411,30 +451,27 @@ impl<'t> Parse<'t> for Ty {
}
}
TKind::Fn => {
p.consume().consume_if(TKind::LParen)?;
let mut tys = p.list(vec![], (), TKind::Comma, TKind::RParen)?;
tys.push(match p.next_if(TKind::Arrow) {
Ok(_) => p.parse(())?,
_ => Ty::Tuple(vec![]),
});
Ty::Fn(tys)
}
TKind::LParen => {
let mut tys = p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?;
match p.next_if(TKind::Arrow) {
Ok(_) => {
tys.push(p.parse(())?);
Ty::Fn(tys)
}
_ => Ty::Tuple(tys),
p.consume();
match p.parse(())? {
Ty::Fn(args) => Ty::Fn(args),
other @ Ty::Tuple(_) => Ty::Fn(vec![other, Ty::Tuple(vec![])]),
other => Ty::Fn(vec![other, Ty::Tuple(vec![])]),
}
}
_ => Err(ParseError::NotType(tok.kind, tok.span))?,
TKind::LParen => {
Ty::Tuple(p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?)
}
_ => Err(ParseError::NotType(kind, span))?,
};
Ok(match p.next_if(TKind::Arrow) {
Ok(_) => Ty::Fn(vec![head, p.parse(())?]),
Ok(match p.next_if(TKind::Arrow).allow_eof()? {
Some(Ok(_)) => Ty::Fn(vec![
match head {
args @ Ty::Tuple(_) => args,
arg => Ty::Tuple(vec![arg]),
},
p.parse(())?,
]),
_ => head,
})
}
@@ -483,15 +520,18 @@ pub enum Prec {
impl Prec {
pub const MIN: usize = Prec::Min.value();
pub const fn value(self) -> usize {
self as usize * 2
}
pub const fn prev(self) -> usize {
match self {
Self::Assign => self.value() + 1,
_ => self.value(),
}
}
pub const fn next(self) -> usize {
match self {
Self::Assign => self.value(),
@@ -526,7 +566,7 @@ pub enum Ps {
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
TKind::Semi => (Ps::ExplicitDo, Prec::Do),
TKind::Semi => (Ps::End, Prec::Body),
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
TKind::Grave => (Ps::Mid, Prec::Max),
@@ -566,6 +606,7 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary),
TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Star => (Ps::Op(Op::Deref), Prec::Unary),
TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?,
})
@@ -574,18 +615,24 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration
TKind::As => (Ps::Op(Op::As), Prec::Max),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::LCurly => (Ps::Make, Prec::Make),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::In => (Ps::Op(Op::Do), Prec::Do),
TKind::Eq => (Ps::Op(Op::Set), Prec::Assign),
TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign),
TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign),
TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign),
TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign),
TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign),
TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign),
TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign),
TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign),
TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign),
TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::LCurly => (Ps::Make, Prec::Make),
TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare),
TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare),
TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare),
@@ -605,6 +652,13 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::As => (Ps::Op(Op::As), Prec::Max),
_ => (Ps::ImplicitDo, Prec::Do),
})
}
@@ -615,7 +669,7 @@ impl<'t> Parse<'t> for Const {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
Ok(Self(
p.consume().parse(PPrec::Tuple)?,
p.consume_if(TKind::Eq)?.parse(Prec::Tuple.value())?,
p.expect(TKind::Eq)?.parse(Prec::Tuple.value())?,
))
}
}
@@ -637,7 +691,7 @@ impl<'t> Parse<'t> for Fn {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
match p.consume().next_if(TKind::Identifier) {
match p.consume().next_if(TKind::Identifier)? {
Ok(Token { lexeme, .. }) => Ok(Self(
lexeme.string(),
p.parse(PPrec::Tuple)?,
@@ -648,7 +702,7 @@ impl<'t> Parse<'t> for Fn {
None,
Pat::Op(
PatOp::Tuple,
p.consume_if(TKind::LParen)?.list(
p.expect(TKind::LParen)?.list(
vec![],
PPrec::Tuple,
TKind::Comma,
@@ -667,12 +721,15 @@ impl<'t> Parse<'t> for Let {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let pat = p.consume().parse(PPrec::Tuple)?;
if p.next_if(TKind::Eq).is_err() {
if p.next_if(TKind::Eq).allow_eof()?.is_none_or(|v| v.is_err()) {
return Ok(Self(pat, vec![]));
}
let body = p.parse(Prec::Tuple.value())?;
if p.next_if(TKind::Else).is_err() {
if p.next_if(TKind::Else)
.allow_eof()?
.is_none_or(|v| v.is_err())
{
return Ok(Self(pat, vec![body]));
}
@@ -685,42 +742,41 @@ impl<'t> Parse<'t> for Match {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
Ok(Self(p.consume().parse(Prec::Logical.value())?, {
p.next_if(TKind::LCurly)?;
p.expect(TKind::LCurly)?;
p.list(vec![], Prec::Body.next(), TKind::Comma, TKind::RCurly)?
}))
}
}
impl<'t> Parse<'t> for MatchArm {
type Prec = usize;
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
p.next_if(TKind::Bar).ok();
p.next_if(TKind::Bar)?.ok(); // and discard
Ok(MatchArm(
p.parse(PPrec::Min)?,
p.consume_if(TKind::FatArrow)?.parse(level)?,
p.expect(TKind::FatArrow)?.parse(level)?,
))
}
}
impl<'t> Parse<'t> for MakeArm {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
let name = p
.next_if(TKind::Identifier)?
.map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?;
Ok(MakeArm(
p.next_if(TKind::Identifier)?
.lexeme
.string()
.expect("Identifier should have String"),
{
p.next_if(TKind::Colon)
.ok()
.map(|_| p.parse(Prec::Body.value()))
.transpose()?
},
name.lexeme.string().expect("Identifier should have String"),
p.opt_if(Prec::Body.value(), TKind::Colon)?,
))
}
}
impl<'t> Parse<'t> for Mod {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let ty = p.consume().parse(())?;
let body = p.parse(Prec::Body.value())?;
@@ -732,14 +788,14 @@ fn parse_for<'t>(p: &mut Parser<'t>, _level: ()) -> PResult<Expr> {
// for Pat
let pat = p.consume().parse(PPrec::Tuple)?;
// in Expr
let iter: Anno<Expr> = p.consume_if(TKind::In)?.parse(Prec::Logical.next())?;
let iter: Anno<Expr> = p.expect(TKind::In)?.parse(Prec::Logical.next())?;
let cspan = iter.1;
// Expr
let pass: Anno<Expr> = p.parse(Prec::Body.next())?;
let pspan = pass.1;
// else Expr?
let fail = match p.next_if(TKind::Else) {
Ok(_) => p.parse(Prec::Body.next())?,
let fail = match p.next_if(TKind::Else).allow_eof()? {
Some(Ok(_)) => p.parse(Prec::Body.next())?,
_ => Expr::Op(Op::Tuple, vec![]).anno(pspan),
};
let fspan = fail.1;
@@ -834,125 +890,143 @@ impl<'t> Parse<'t> for Expr {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
const MIN: usize = Prec::MIN;
// TODO: in-tree doc comments
while p.next_if(TKind::Doc)?.is_ok() {}
// Prefix
let tok = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, tok.span);
let tok @ &Token { kind, span, .. } = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, span);
no_eof(move || {
let mut head = match op {
// "End" is produced when an "empty" expression is syntactically required.
// This happens when a semi or closing delimiter begins an expression.
// The token which emitted "End" cannot be consumed, as it is expected elsewhere.
Ps::End if level <= prec.next() => Expr::Op(Op::Tuple, vec![]),
Ps::End => Err(ParseError::NotPrefix(kind, span))?,
let mut head = match op {
// Empty is returned when a block finisher is an expr prefix.
// It's the only expr that doesn't consume.
Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]),
Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?,
Ps::ExplicitDo => {
p.consume();
Expr::Op(Op::Tuple, vec![])
}
Ps::Id => Expr::Id(p.parse(())?),
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
Ps::Lit => Expr::Lit(p.parse(())?),
Ps::Let => Expr::Let(p.parse(())?),
Ps::For => parse_for(p, ())?,
Ps::Const => Expr::Const(p.parse(())?),
Ps::Typedef => Expr::Struct(p.parse(())?),
Ps::Match => Expr::Match(p.parse(())?),
Ps::Mod => Expr::Mod(p.parse(())?),
Ps::Op(Op::Block) => Expr::Op(
Op::Block,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Ps::Op(Op::Array) => parse_array(p)?,
Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
Ps::Op(op @ (Op::If | Op::While)) => {
p.consume();
let exprs = vec![
// conditional restricted to Logical operators or above
p.parse(Prec::Logical.value())?,
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?,
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Ps::Fn => Expr::Fn(p.parse(())?),
Ps::Lambda => Expr::Fn(Box::new(Fn(
None,
p.consume()
.opt(PPrec::Tuple, TKind::Bar)?
.unwrap_or(Pat::Op(PatOp::Tuple, vec![])),
p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer),
p.parse(Prec::Body.next())?,
))),
Ps::Lambda0 => Expr::Fn(Box::new(Fn(
None,
Pat::Op(PatOp::Tuple, vec![]),
p.consume().opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer),
p.parse(Prec::Body.next())?,
))),
Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| {
Expr::Op(
Op::Refer,
vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)],
)
})?,
Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
_ => unimplemented!("prefix {op:?}"),
};
// Infix and Postfix
while let Ok(tok) = p.peek()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Ps::End
{
let kind = tok.kind;
let span = span.merge(p.span());
head = match op {
// Make (structor expressions) are context-sensitive
Ps::Make => match &head {
Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make(
head.anno(span),
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
))),
_ => break,
Ps::Id => Expr::Id(p.parse(())?),
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
Ps::Lit => Expr::Lit(p.parse(())?),
Ps::Let => Expr::Let(p.parse(())?),
Ps::For => parse_for(p, ())?,
Ps::Const => Expr::Const(p.parse(())?),
Ps::Typedef => Expr::Struct(p.parse(())?),
Ps::Match => Expr::Match(p.parse(())?),
Ps::Mod => Expr::Mod(p.parse(())?),
Ps::Op(Op::Meta) => Expr::Op(
Op::Meta,
vec![
p.consume()
.expect(TKind::LBrack)?
.opt(MIN, TKind::RBrack)?
.unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)),
p.parse(level)?,
],
),
Ps::Op(Op::Block) => Expr::Op(
Op::Block,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Ps::Op(Op::Array) => parse_array(p)?,
Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
// As is ImplicitDo (semicolon elision)
Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?),
Ps::ImplicitDo => break,
Ps::Op(Op::Do) => head.and_do(span, p.consume().parse(prec.next())?),
Ps::Op(Op::Index) => Expr::Op(
Op::Index,
p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?,
),
Ps::Op(Op::Call) => Expr::Op(
Op::Call,
p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RParen)?,
),
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
op,
p.consume()
.list_bare(vec![head.anno(span)], prec.next(), kind)?,
),
Ps::Op(op @ Op::Try) => {
Ps::Op(op @ (Op::If | Op::While)) => {
p.consume();
Expr::Op(op, vec![head.anno(span)])
let exprs = vec![
// conditional restricted to Logical operators or above
p.parse(Prec::Logical.value())?,
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => {
p.consume().parse(prec.next())?
}
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Ps::Op(op) => Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]),
_ => Err(ParseError::NotInfix(kind, span))?,
}
}
Ps::Fn => Expr::Fn(p.parse(())?),
Ps::Lambda => Expr::Fn(Box::new(Fn(
None,
p.consume()
.opt(PPrec::Tuple, TKind::Bar)?
.unwrap_or(Pat::Op(PatOp::Tuple, vec![])),
p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer),
p.parse(Prec::Body.next())?,
))),
Ps::Lambda0 => Expr::Fn(Box::new(Fn(
None,
Pat::Op(PatOp::Tuple, vec![]),
p.consume().opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer),
p.parse(Prec::Body.next())?,
))),
Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| {
Expr::Op(
Op::Refer,
vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)],
)
})?,
Ok(head)
Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
_ => unimplemented!("prefix {op:?}"),
};
// Infix and Postfix
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Ps::End
{
let kind = tok.kind;
let span = span.merge(p.span());
head = match op {
// Make (structor expressions) are context-sensitive
Ps::Make => match &head {
Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make(
head.anno(span),
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
))),
_ => break,
},
// As is ImplicitDo (semicolon elision)
Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?),
Ps::ImplicitDo => break,
Ps::Op(Op::Do) => head.and_do(span, p.consume().parse(prec.next())?),
Ps::Op(Op::Index) => Expr::Op(
Op::Index,
p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?,
),
Ps::Op(Op::Call) => Expr::Op(
Op::Call,
vec![
head.anno(span),
p.consume()
.opt(0, TKind::RParen)?
.unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)),
],
),
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
op,
p.consume()
.list_bare(vec![head.anno(span)], prec.next(), kind)?,
),
Ps::Op(op @ Op::Try) => {
p.consume();
Expr::Op(op, vec![head.anno(span)])
}
Ps::Op(op) => {
Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?])
}
_ => Err(ParseError::NotInfix(kind, span))?,
}
}
Ok(head)
})
}
}
@@ -966,7 +1040,7 @@ fn parse_array<'t>(p: &mut Parser<'t>) -> PResult<Expr> {
let prec = Prec::Tuple;
let item = p.parse(prec.value())?;
let repeat = p.opt_if(prec.next(), TKind::Semi)?;
p.next_if(TKind::RBrack)?;
p.expect(TKind::RBrack)?;
Ok(match (repeat, item) {
(Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]),

View File

@@ -9,6 +9,12 @@ pub struct Token {
pub span: Span,
}
impl Token {
pub fn kind(&self) -> TKind {
self.kind
}
}
#[derive(Clone, Debug)]
pub enum Lexeme {
String(String),
@@ -55,7 +61,9 @@ impl std::fmt::Display for Lexeme {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TKind {
Comment,
Comment, // Line or block comment
Doc, // Doc comment
And,
As,
Break,
@@ -67,6 +75,7 @@ pub enum TKind {
Fn,
For,
If,
Impl,
In,
Let,
Loop,
@@ -76,6 +85,7 @@ pub enum TKind {
Or,
Public,
Return,
Static,
Struct,
True,
While,