From 6368e68941edce17da941a15d0558605742068b0 Mon Sep 17 00:00:00 2001 From: John Date: Fri, 17 Oct 2025 06:25:11 -0400 Subject: [PATCH] doughlang: Preserve errors through entire pipeline lexer: - Un-stringify errors - Reserve more words - Doc the comments parser: - MASSIVE changes to peek, peek_if, next_if, consume_if=>expect. - Keep track of when EOF is allowable - TKind is stupidly cheap with >100 niches, so we can fit like 4 of them in a single ParseError lmao - TODO: make sure EOF/UnexpectedEOF propagation is correct. It seems... Kinda Not correct. - Add meta-expressions --- src/ast.rs | 140 +++++++----- src/fmt.rs | 22 +- src/lexer.rs | 119 ++++++++--- src/main.rs | 27 ++- src/parser.rs | 574 ++++++++++++++++++++++++++++---------------------- src/token.rs | 12 +- 6 files changed, 543 insertions(+), 351 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 103e637..395ec8c 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -23,7 +23,7 @@ impl Annotation pub struct FqPath { // TODO: Identifier interning pub parts: Vec, - // TODO: + // TODO: generic parameters } impl From<&str> for FqPath { @@ -251,6 +251,7 @@ pub enum Op { ArRep, // [ Expr ; Expr ] Group, // ( Expr ,?) Tuple, // Expr (, Expr)* + Meta, // #[ Expr ] Try, // Expr '?' Index, // Expr [ Expr,* ] @@ -298,7 +299,17 @@ pub enum Op { LogXor, // Expr ^^ Expr LogOr, // Expr || Expr - Set, // Expr = Expr + Set, // Expr = Expr + MulSet, // Expr *= Expr + DivSet, // Expr /= Expr + RemSet, // Expr %= Expr + AddSet, // Expr += Expr + SubSet, // Expr -= Expr + ShlSet, // Expr <<= Expr + ShrSet, // Expr >>= Expr + AndSet, // Expr &= Expr + XorSet, // Expr ^= Expr + OrSet, // Expr |= Expr } use crate::{fmt::FmtAdapter, span::Span}; @@ -381,19 +392,19 @@ impl Display for Mod { impl Display for Typedef { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self(kind, pat) = self; - let kind = match kind { - TypedefKind::Alias => "type", - TypedefKind::Struct => "struct", - TypedefKind::Enum => "enum", - }; + f.write_str(match kind { + TypedefKind::Alias => "type ", + TypedefKind::Struct => "struct ", + TypedefKind::Enum => "enum ", + })?; match pat { Pat::Struct(name, bind) => match bind.as_ref() { Pat::Op(PatOp::Tuple, parts) => f - .delimit_indented(fmt!("{kind} {name} {{"), "}") + .delimit_indented(fmt!("{name} {{"), "}") .list_wrap("\n", parts, ",\n", ",\n"), other => write!(f, "{name} {{ {other} }}"), }, - _ => write!(f, "{kind} {pat}"), + _ => pat.fmt(f), } } } @@ -426,6 +437,10 @@ impl Display for Expr { .list_wrap("\n", exprs, "\n", "\n"), Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "), Self::Op(Op::Group, exprs) => f.list(exprs, ", "), + Self::Op(Op::Meta, exprs) => match exprs.as_slice() { + [meta, expr @ ..] => f.delimit(fmt!("#[{meta}]\n"), "").list(expr, ","), + [] => write!(f, "#[]"), + }, Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() { [callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "), @@ -436,7 +451,7 @@ impl Display for Expr { [] => write!(f, "{op}"), }, - Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"), + Self::Op(op @ Op::Do, exprs) => f.list(exprs, op), Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "), Self::Op(op @ Op::Try, exprs) => f.delimit("(", fmt!("){op}")).list(exprs, ", "), Self::Op(op, exprs) => match exprs.as_slice() { @@ -449,53 +464,64 @@ impl Display for Expr { impl Display for Op { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Op::Do => "; ".fmt(f), - Op::As => " as ".fmt(f), - Op::Macro => "macro ".fmt(f), - Op::Block => "{}".fmt(f), - Op::Array => "[]".fmt(f), - Op::ArRep => "[;]".fmt(f), - Op::Group => "()".fmt(f), - Op::Tuple => "()".fmt(f), - Op::Try => "?".fmt(f), - Op::Index => "".fmt(f), - Op::Call => "".fmt(f), - Op::Pub => "pub ".fmt(f), - Op::Loop => "loop ".fmt(f), - Op::If => "if ".fmt(f), - Op::While => "while ".fmt(f), - Op::Break => "break ".fmt(f), - Op::Return => "return ".fmt(f), - Op::Dot => ".".fmt(f), - Op::RangeEx => "..".fmt(f), - Op::RangeIn => "..=".fmt(f), - Op::Neg => "-".fmt(f), - Op::Not => "!".fmt(f), - Op::Identity => "!!".fmt(f), - Op::Refer => "&".fmt(f), - Op::Deref => "*".fmt(f), - Op::Mul => " * ".fmt(f), - Op::Div => " / ".fmt(f), - Op::Rem => " % ".fmt(f), - Op::Add => " + ".fmt(f), - Op::Sub => " - ".fmt(f), - Op::Shl => " << ".fmt(f), - Op::Shr => " >> ".fmt(f), - Op::And => " & ".fmt(f), - Op::Xor => " ^ ".fmt(f), - Op::Or => " | ".fmt(f), - Op::Lt => " < ".fmt(f), - Op::Leq => " <= ".fmt(f), - Op::Eq => " == ".fmt(f), - Op::Neq => " != ".fmt(f), - Op::Geq => " >= ".fmt(f), - Op::Gt => " > ".fmt(f), - Op::LogAnd => " && ".fmt(f), - Op::LogXor => " ^^ ".fmt(f), - Op::LogOr => " || ".fmt(f), - Op::Set => " = ".fmt(f), - } + f.write_str(match self { + Op::Do => "; ", + Op::As => " as ", + Op::Macro => "macro ", + Op::Block => "{}", + Op::Array => "[]", + Op::ArRep => "[;]", + Op::Group => "()", + Op::Tuple => "()", + Op::Meta => "#[]", + Op::Try => "?", + Op::Index => "", + Op::Call => "", + Op::Pub => "pub ", + Op::Loop => "loop ", + Op::If => "if ", + Op::While => "while ", + Op::Break => "break ", + Op::Return => "return ", + Op::Dot => ".", + Op::RangeEx => "..", + Op::RangeIn => "..=", + Op::Neg => "-", + Op::Not => "!", + Op::Identity => "!!", + Op::Refer => "&", + Op::Deref => "*", + Op::Mul => " * ", + Op::Div => " / ", + Op::Rem => " % ", + Op::Add => " + ", + Op::Sub => " - ", + Op::Shl => " << ", + Op::Shr => " >> ", + Op::And => " & ", + Op::Xor => " ^ ", + Op::Or => " | ", + Op::Lt => " < ", + Op::Leq => " <= ", + Op::Eq => " == ", + Op::Neq => " != ", + Op::Geq => " >= ", + Op::Gt => " > ", + Op::LogAnd => " && ", + Op::LogXor => " ^^ ", + Op::LogOr => " || ", + Op::Set => " = ", + Op::MulSet => " *= ", + Op::DivSet => " /= ", + Op::RemSet => " %= ", + Op::AddSet => " += ", + Op::SubSet => " -= ", + Op::ShlSet => " <<= ", + Op::ShrSet => " >>= ", + Op::AndSet => " &= ", + Op::XorSet => " ^= ", + Op::OrSet => " |= ", + }) } } diff --git a/src/fmt.rs b/src/fmt.rs index 12bb311..482025c 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -79,6 +79,11 @@ impl<'f, F: Write + ?Sized> Indent<'f, F> { pub fn new(f: &'f mut F, indent: &'static str) -> Self { Indent { f, needs_indent: false, indent } } + + /// Gets mutable access to the inner [Write]-adapter + pub fn inner(&mut self) -> &mut F { + self.f + } } impl Write for Indent<'_, F> { @@ -103,10 +108,18 @@ impl Write for Indent<'_, F> { /// Prints delimiters around anything formatted with this. Implies [Indent] pub struct Delimit<'f, F: Write + ?Sized, E: Display = &'static str> { - f: &'f mut F, + /// The formatter + pub f: &'f mut F, close: E, } +impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> { + /// Gets mutable access to the inner [Write]-adapter + pub fn inner(&mut self) -> &mut F { + self.f + } +} + impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> { pub fn new(f: &'f mut F, open: O, close: E) -> Self { let _ = write!(f, "{open}"); @@ -133,6 +146,13 @@ pub struct DelimitIndent<'f, F: Write + ?Sized, E: Display = &'static str> { close: E, } +impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> { + /// Gets mutable access to the inner [Write]-adapter + pub fn inner(&mut self) -> &mut F { + self.f.inner() + } +} + impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> { pub fn new(f: &'f mut F, open: O, close: E) -> Self { let mut f = f.indent(); diff --git a/src/lexer.rs b/src/lexer.rs index 8c424c5..2222ac8 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -8,9 +8,10 @@ use crate::{span::Span, token::*}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct LexError { - pub pos: u32, - pub res: &'static str, + pub pos: Span, + pub res: LexFailure, } + impl std::error::Error for LexError {} impl std::fmt::Display for LexError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -19,6 +20,44 @@ impl std::fmt::Display for LexError { } } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LexFailure { + /// Reached end of file + EOF, + UnexpectedEOF, + Unexpected(char), + UnterminatedBlockComment, + UnterminatedCharacter, + UnterminatedString, + UnterminatedUnicodeEscape, + InvalidUnicodeEscape(u32), + InvalidDigitForBase(char, u32), + IntegerOverflow, +} +use LexFailure::*; +pub use LexFailure::{EOF, UnexpectedEOF}; + +impl std::fmt::Display for LexFailure { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::EOF => "EOF".fmt(f), + Self::UnexpectedEOF => "Unexpected EOF".fmt(f), + Self::Unexpected(c) => write!(f, "Character '{c:?}'"), + Self::UnterminatedBlockComment => "Unterminated Block Comment".fmt(f), + Self::UnterminatedCharacter => "Unterminated Character".fmt(f), + Self::UnterminatedString => "Unterminated String".fmt(f), + Self::UnterminatedUnicodeEscape => "Unterminated Unicode Escape".fmt(f), + Self::InvalidUnicodeEscape(hex) => { + write!(f, "'\\u{{{hex:x}}}' is not a valid UTF-8 codepoint") + } + Self::InvalidDigitForBase(digit, base) => { + write!(f, "Invalid digit {digit} for base {base}") + } + Self::IntegerOverflow => "Integer literal does not fit in 128 bits".fmt(f), + } + } +} + #[derive(Clone, Debug)] pub struct Lexer<'t> { /// The source text @@ -72,8 +111,8 @@ impl<'t> Lexer<'t> { } /// Produces a LexError at the start of the current token - fn error(&self, res: &'static str) -> LexError { - LexError { pos: self.head, res } + fn error(&self, res: LexFailure) -> LexError { + LexError { pos: Span(self.head, self.tail), res } } /// Gets the Lexer's current &[str] lexeme and [Span] @@ -118,7 +157,7 @@ impl<'t> Lexer<'t> { .skip_whitespace() .start_token() .peek() - .ok_or_else(|| self.error("EOF"))? + .ok_or_else(|| self.error(EOF))? { '!' => Bang, '"' => return self.string(), @@ -154,7 +193,7 @@ impl<'t> Lexer<'t> { '~' => Tilde, '_' => return self.identifier(), c if is_xid_start(c) => return self.identifier(), - _ => Err(self.error("Invalid"))?, + c => Err(self.error(Unexpected(c)))?, }; // Handle digraphs @@ -217,8 +256,12 @@ impl<'t> Lexer<'t> { /// Consumes characters until the lexer reaches a newline `'\n'` pub fn line_comment(&mut self) -> Result { + let kind = match self.consume().peek() { + Some('!' | '/') => TKind::Doc, + _ => TKind::Comment, + }; while self.consume().peek().is_some_and(|c| c != '\n') {} - Ok(self.produce(TKind::Comment)) + Ok(self.produce(kind)) } /// Consumes characters until the lexer reaches the end of a *nested* block comment. @@ -232,7 +275,7 @@ impl<'t> Lexer<'t> { _ => continue, }; } - Err(self.error("Unterminated block comment")) + Err(self.error(UnterminatedBlockComment)) } /// Consumes characters until it reaches a character not in [is_xid_continue]. @@ -257,6 +300,7 @@ impl<'t> Lexer<'t> { "fn" => TKind::Fn, "for" => TKind::For, "if" => TKind::If, + "impl" => TKind::Impl, "in" => TKind::In, "let" => TKind::Let, "loop" => TKind::Loop, @@ -266,6 +310,7 @@ impl<'t> Lexer<'t> { "or" => TKind::Or, "pub" => TKind::Public, "return" => TKind::Return, + "static" => TKind::Const, // TODO: Static "struct" => TKind::Struct, "then" => TKind::Do, "true" => TKind::True, @@ -286,7 +331,7 @@ impl<'t> Lexer<'t> { if self.take().is_some_and(|c| c == '\'') { Ok(self.produce_with_lexeme(TKind::Character, Lexeme::Char(c))) } else { - Err(self.error("Unterminated character")) + Err(self.error(UnterminatedCharacter)) } } @@ -296,7 +341,7 @@ impl<'t> Lexer<'t> { self.consume(); loop { lexeme.push(match self.take() { - None => Err(self.error("Unterminated string"))?, + None => Err(self.error(UnterminatedString))?, Some('\\') => self.escape()?, Some('"') => break, Some(c) => c, @@ -308,40 +353,44 @@ impl<'t> Lexer<'t> { /// Parses a single escape sequence into its resulting char value. pub fn escape(&mut self) -> Result { - Ok(match self.take().ok_or_else(|| self.error("EOF"))? { - ' ' => '\u{a0}', // Non-breaking space - '0' => '\0', // C0 Null Character - 'a' => '\x07', // C0 Acknowledge - 'b' => '\x08', // C0 Bell - 'e' => '\x1b', // C0 Escape - 'f' => '\x0c', // Form Feed - 'n' => '\n', // New Line - 'r' => '\r', // Carriage Return - 't' => '\t', // Tab - 'u' => self.unicode_escape()?, - 'x' => self.hex_escape()?, - c => c, - }) + Ok( + match self.take().ok_or_else(|| self.error(UnexpectedEOF))? { + ' ' => '\u{a0}', // Non-breaking space + '0' => '\0', // C0 Null Character + 'a' => '\x07', // C0 Acknowledge + 'b' => '\x08', // C0 Bell + 'e' => '\x1b', // C0 Escape + 'f' => '\x0c', // Form Feed + 'n' => '\n', // New Line + 'r' => '\r', // Carriage Return + 't' => '\t', // Tab + 'u' => self.unicode_escape()?, + 'x' => self.hex_escape()?, + c => c, + }, + ) } /// Parses two hex-digits and constructs a [char] out of them. pub fn hex_escape(&mut self) -> Result { let out = (self.digit::<16>()? << 4) + self.digit::<16>()?; - char::from_u32(out).ok_or(self.error("Invalid digit")) + char::from_u32(out).ok_or(self.error(InvalidUnicodeEscape(out))) } /// Parses a sequence of `{}`-bracketed hex-digits and constructs a [char] out of them. pub fn unicode_escape(&mut self) -> Result { self.next_if('{') - .ok_or_else(|| self.error("No unicode escape opener"))?; + .ok_or_else(|| self.error(UnterminatedUnicodeEscape))?; let mut out = 0; while let Some(c) = self.take() { if c == '}' { - return char::from_u32(out).ok_or_else(|| self.error("Bad unicode value")); + return char::from_u32(out).ok_or_else(|| self.error(InvalidUnicodeEscape(out))); } - out = out * 16 + c.to_digit(16).ok_or_else(|| self.error("Invalid digit"))?; + out = out * 16 + + c.to_digit(16) + .ok_or_else(|| self.error(InvalidDigitForBase(c, 16)))?; } - Err(self.error("Unterminated unicode escape")) + Err(self.error(UnterminatedUnicodeEscape)) } /// Parses a sequence of digits (and underscores) in base `BASE`, where 2 <= `BASE` <= 36. @@ -353,7 +402,10 @@ impl<'t> Lexer<'t> { while let Some(c) = self.peek() { int = match c.to_digit(BASE).ok_or(c) { Err('_') => int, - Ok(c) => int.wrapping_mul(BASE as _).wrapping_add(c as _), + Ok(c) => int + .checked_mul(BASE as _) + .and_then(|int| int.checked_add(c as _)) + .ok_or_else(|| self.error(IntegerOverflow))?, _ => break, }; self.consume(); @@ -362,12 +414,13 @@ impl<'t> Lexer<'t> { Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE))) } - /// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36 + /// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36. pub fn digit(&mut self) -> Result { - if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) { + let digit = self.take().ok_or_else(|| self.error(UnexpectedEOF))?; + if let Some(digit) = digit.to_digit(BASE) { Ok(digit) } else { - Err(self.error("Invalid digit")) + Err(self.error(InvalidDigitForBase(digit, BASE))) } } } diff --git a/src/main.rs b/src/main.rs index 8f0a08a..114ac57 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,7 +9,7 @@ use doughlang::{ Expr, macro_matcher::{Match, Subst}, }, - lexer::{LexError, Lexer}, + lexer::{EOF, LexError, Lexer}, parser::{ParseError, Parser}, span::Span, token::{TKind, Token}, @@ -73,7 +73,7 @@ fn lex() -> Result<(), Box> { } loop { match lexer.scan() { - Err(LexError { res: "EOF", .. }) => { + Err(LexError { res: EOF, .. }) => { break Ok(Response::Accept); } Err(e) => { @@ -97,7 +97,7 @@ fn exprs() -> Result<(), Box> { } for idx in 0.. { match parser.parse::>(0) { - Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => { + Err(ParseError::FromLexer(LexError { res: EOF, .. })) => { return Ok(Response::Accept); } Err(e) => { @@ -120,7 +120,7 @@ fn pats() -> Result<(), Box> { } loop { match parser.parse::(PPrec::Min) { - Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => { + Err(ParseError::FromLexer(LexError { res: EOF, .. })) => { break Ok(Response::Accept); } Err(e) => { @@ -142,7 +142,7 @@ fn tys() -> Result<(), Box> { } loop { match parser.parse::(()) { - Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => { + Err(ParseError::FromLexer(LexError { res: EOF, .. })) => { break Ok(Response::Accept); } Err(e) => { @@ -205,16 +205,25 @@ fn subst() -> Result<(), Box> { fn parse(document: &str) { let mut parser = Parser::new(Lexer::new(document)); + let isatty = std::io::stdin().is_terminal(); for idx in 0.. { match parser.parse::(0) { - Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break, - Err(e) => { - println!("\x1b[31m{e}\x1b[0m"); + Err(e @ ParseError::EOF(s)) if s.tail == document.len() as _ => { + println!("\x1b[92m{e} (total {} bytes)\x1b[0m", document.len()); break; } - Ok(v) => { + Err(e @ ParseError::EOF(_)) => { + println!("\x1b[93m{e} (total {} bytes)\x1b[0m", document.len()); + break; + } + Err(e) => { + println!("\x1b[91m{e}\x1b[0m"); + break; + } + Ok(v) if isatty => { println!("\x1b[{}m{v}", (idx + 5) % 6 + 31); } + _ => {} } } } diff --git a/src/parser.rs b/src/parser.rs index 65f951f..62f17a6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ //! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes. use crate::{ ast::*, - lexer::{LexError, Lexer}, + lexer::{LexError, LexFailure, Lexer}, span::Span, token::{Lexeme, TKind, Token}, }; @@ -9,8 +9,12 @@ use std::{error::Error, fmt::Display, vec}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ParseError { + /// Reached the expected end of input. + EOF(Span), + /// Unexpectedly reached end of input. + UnexpectedEOF(Span), FromLexer(LexError), - Expected(TKind, Span), + Expected(TKind, TKind, Span), NotLiteral(TKind, Span), NotPattern(TKind, Span), NotType(TKind, Span), @@ -19,12 +23,16 @@ pub enum ParseError { NotPostfix(TKind, Span), } +pub use ParseError::EOF; + impl Error for ParseError {} impl Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + Self::EOF(loc) => write!(f, "{loc}: Reached end of input."), + Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."), Self::FromLexer(e) => e.fmt(f), - Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."), + Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."), Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."), Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."), Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."), @@ -37,10 +45,36 @@ impl Display for ParseError { pub type PResult = Result; +trait PResultExt { + fn no_eof(self) -> PResult; + fn allow_eof(self) -> PResult>; +} + +impl PResultExt for PResult { + fn no_eof(self) -> Self { + match self { + Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)), + other => other, + } + } + fn allow_eof(self) -> PResult> { + match self { + Ok(t) => Ok(Some(t)), + Err(ParseError::EOF(_)) => Ok(None), + Err(e) => Err(e), + } + } +} + +/// Opens a scope where [ParseError::EOF] is unexpected (See [PResultExt::no_eof]) +fn no_eof(f: impl FnOnce() -> PResult) -> PResult { + f().no_eof() +} + #[derive(Debug)] pub struct Parser<'t> { pub lexer: Lexer<'t>, - pub next_tok: Option, + pub next_tok: Option>, pub last_loc: Span, pub elide_do: bool, } @@ -72,46 +106,59 @@ impl<'t> Parser<'t> { None => loop { match self.lexer.scan() { Ok(Token { kind: TKind::Comment, .. }) => {} - Ok(tok) => break tok, - Err(e) => Err(ParseError::FromLexer(e))?, + Ok(tok) => break Ok(tok), + Err(LexError { pos, res: LexFailure::EOF }) => Err(ParseError::EOF(pos))?, + Err(e) => break Err(ParseError::FromLexer(e)), } }, }; - self.last_loc = next_tok.span; self.next_tok = Some(next_tok); - Ok(self.next_tok.as_ref().expect("should have token")) + + let next_tok = self.next_tok.as_ref().expect("should have Some lex result"); + + if let Ok(tok) = next_tok { + self.last_loc = tok.span; + } + + next_tok.as_ref().map_err(|e| *e) } /// Peeks the next token if it matches the `expected` [TKind] - pub fn peek_if(&mut self, expected: TKind) -> Option<&Token> { - self.peek().into_iter().find(|tok| tok.kind == expected) + pub fn peek_if(&mut self, expected: TKind) -> PResult> { + match self.peek() { + Ok(tok) if tok.kind == expected => Ok(Some(tok)), + Ok(_) => Ok(None), + Err(e) => Err(e), + } } /// Consumes and returns the currently-peeked [Token]. - pub fn take(&mut self) -> Option { - let tok = self.next_tok.take(); - self.elide_do = matches!(tok, Some(Token { kind: TKind::RCurly, .. })); + pub fn take(&mut self) -> PResult { + let tok = self + .next_tok + .take() + .unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc))); + self.elide_do = matches!(tok, Ok(Token { kind: TKind::RCurly | TKind::Semi, .. })); tok } /// Consumes the currently-peeked [Token], returning its lexeme without cloning. - pub fn take_lexeme(&mut self) -> Option { + pub fn take_lexeme(&mut self) -> PResult { self.take().map(|tok| tok.lexeme) } #[allow(clippy::should_implement_trait)] pub fn next(&mut self) -> PResult { - self.peek()?; + self.peek().no_eof()?; Ok(self.take().expect("should have token here")) } /// Consumes and returns the next [Token] if it matches the `expected` [TKind] - pub fn next_if(&mut self, expected: TKind) -> PResult { - let token = self.peek()?; - if token.kind == expected { - Ok(self.take().expect("should have token here")) - } else { - Err(ParseError::Expected(expected, token.span)) + pub fn next_if(&mut self, expected: TKind) -> PResult> { + match self.peek() { + Ok(t) if t.kind == expected => self.take().map(Ok), + Ok(t) => Ok(Err(t.kind)), + Err(e) => Err(e), } } @@ -127,13 +174,15 @@ impl<'t> Parser<'t> { end: TKind, ) -> PResult> { // TODO: This loses lexer errors - while self.peek_if(end).is_none() { - elems.push(self.parse(level.clone())?); - if self.next_if(sep).is_err() { - break; - } + while self.peek_if(end).no_eof()?.is_none() { + elems.push(self.parse(level.clone()).no_eof()?); + match self.peek_if(sep)? { + Some(_) => self.consume(), + None => break, + }; } - self.next_if(end)?; + self.next_if(end)? + .map_err(|tk| ParseError::Expected(end, tk, self.span()))?; Ok(elems) } @@ -148,33 +197,37 @@ impl<'t> Parser<'t> { sep: TKind, ) -> PResult> { loop { - elems.push(self.parse(level.clone())?); - if self.next_if(sep).is_err() { - break Ok(elems); - } + let elem = self.parse(level.clone()).no_eof()?; + elems.push(elem); + match self.peek_if(sep) { + Ok(Some(_)) => self.consume(), + Ok(None) | Err(ParseError::EOF(_)) => break Ok(elems), + Err(e) => Err(e)?, + }; } } /// Parses into an [`Option

`] if the next token is `next` pub fn opt_if>(&mut self, level: P::Prec, next: TKind) -> PResult> { - Ok(match self.next_if(next) { - Ok(_) => Some(self.parse(level)?), + Ok(match self.next_if(next)? { + Ok(_) => Some(self.parse(level).no_eof()?), Err(_) => None, }) } /// Parses a P unless the next token is `end` pub fn opt>(&mut self, level: P::Prec, end: TKind) -> PResult> { - let out = match self.peek_if(end) { - None => Some(self.parse(level)?), + let out = match self.peek_if(end)? { + None => Some(self.parse(level).no_eof()?), Some(_) => None, }; - self.next_if(end)?; + self.expect(end)?; Ok(out) } - pub fn consume_if(&mut self, next: TKind) -> PResult<&mut Self> { - self.next_if(next)?; + pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> { + self.next_if(next)? + .map_err(|tk| ParseError::Expected(next, tk, self.span()))?; Ok(self) } @@ -197,17 +250,12 @@ impl<'t> Parse<'t> for FqPath { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { let mut parts = vec![]; - if p.next_if(TKind::ColonColon).is_ok() { + if p.next_if(TKind::ColonColon)?.is_ok() { parts.push("".into()); // the "root" } - loop { - parts.push( - p.next_if(TKind::Identifier)? - .lexeme - .string() - .expect("Identifier should have String"), - ); - if p.next_if(TKind::ColonColon).is_err() { + while let Ok(id) = p.next_if(TKind::Identifier)? { + parts.push(id.lexeme.string().expect("Identifier should have String")); + if let None | Some(Err(_)) = p.next_if(TKind::ColonColon).allow_eof()? { break; } } @@ -223,26 +271,19 @@ impl<'t> Parse<'t> for Literal { Ok(match tok.kind { TKind::True => p.consume().then(Literal::Bool(true)), TKind::False => p.consume().then(Literal::Bool(false)), - TKind::Character => Literal::Char( - p.take_lexeme() - .expect("should have Token") - .char() - .expect("should have one char in char literal"), - ), - TKind::Integer => { - let Token { lexeme, span, .. } = p.take().expect("should have Token"); - let Lexeme::Integer(int, _) = lexeme else { - Err(ParseError::Expected(TKind::Integer, span))? - }; - Literal::Int(int) - } - TKind::String => Literal::Str({ - let Token { lexeme, span, .. } = p.take().expect("should have Token"); - lexeme - .string() - .ok_or(ParseError::Expected(TKind::String, span))? + TKind::Character => Literal::Char({ + let Token { lexeme, .. } = p.take().expect("should have Token"); + lexeme.char().expect("char token should have char") }), - _ => Err(ParseError::Expected(TKind::Integer, tok.span))?, + TKind::Integer => Literal::Int({ + let Token { lexeme, .. } = p.take().expect("should have Token"); + lexeme.int().expect("integer token should have int") + }), + TKind::String => Literal::Str({ + let Token { lexeme, .. } = p.take().expect("should have Token"); + lexeme.string().expect("string token should have string") + }), + other => Err(ParseError::NotLiteral(other, tok.span))?, }) } } @@ -288,6 +329,7 @@ fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> { impl<'t> Parse<'t> for Pat { type Prec = PPrec; + fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult { let tok = p.peek()?; @@ -315,12 +357,10 @@ impl<'t> Parse<'t> for Pat { .opt(PPrec::Alt, TKind::RCurly)? .unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))), ), - Ok(_) | Err(ParseError::FromLexer(LexError { pos: _, res: "EOF" })) => { - match path.parts.len() { - 1 => Self::Name(path.parts.pop().expect("name has 1 part")), - _ => Self::Path(path), - } - } + Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() { + 1 => Self::Name(path.parts.pop().expect("name has 1 part")), + _ => Self::Path(path), + }, Err(e) => Err(e)?, } } @@ -329,21 +369,21 @@ impl<'t> Parse<'t> for Pat { TKind::DotDot => Pat::Op( PatOp::Rest, // Identifier in Rest position always becomes binder - match p.consume().peek()?.kind { - TKind::Identifier => vec![Pat::Name( + match p.consume().peek().allow_eof()?.map(Token::kind) { + Some(TKind::Identifier) => vec![Pat::Name( p.take_lexeme() .expect("should have lexeme") .string() .expect("should be string"), )], - TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?], + Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?], _ => vec![], }, ), TKind::DotDotEq => Pat::Op( PatOp::RangeIn, - match p.consume().peek()?.kind { - TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?], + match p.consume().peek().allow_eof()?.map(Token::kind) { + Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?], _ => vec![], }, ), @@ -360,14 +400,14 @@ impl<'t> Parse<'t> for Pat { _ => Err(ParseError::NotPattern(tok.kind, tok.span))?, }; - while let Ok(tok) = p.peek() + while let Ok(Some(tok)) = p.peek().allow_eof() && let Some((op, prec)) = pat_from_infix(tok) && level <= prec { let kind = tok.kind; head = match op { PatPs::Typed => Pat::Typed(head.into(), p.consume().parse(())?), - PatPs::Op(op @ PatOp::RangeEx) => Pat::Op( + PatPs::Op(op @ (PatOp::RangeEx | PatOp::RangeIn)) => Pat::Op( op, match p.consume().peek().map(|t| t.kind) { Ok(TKind::Integer | TKind::Character | TKind::Identifier) => { @@ -379,7 +419,6 @@ impl<'t> Parse<'t> for Pat { PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?), } } - Ok(head) } } @@ -389,10 +428,11 @@ impl<'t> Parse<'t> for Ty { fn parse(p: &mut Parser<'t>, level: Self::Prec) -> PResult where Self: Sized { - let tok = p.peek()?; + let &Token { kind, span, .. } = p.peek()?; - let head = match tok.kind { - TKind::Identifier => match tok.lexeme.str() { + // TODO: this is a kinda jank way of error reporting + let head = match kind { + TKind::Identifier => match p.peek()?.lexeme.str() { Some("_") => p.consume().then(Ty::Infer), _ => Ty::Named(p.parse(())?), }, @@ -403,7 +443,7 @@ impl<'t> Parse<'t> for Ty { match p.next()? { Token { kind: TKind::Semi, .. } => { let ty = Ty::Array(ty, p.parse(Prec::Binary.next())?); - p.next_if(TKind::RBrack)?; + p.expect(TKind::RBrack)?; ty } Token { kind: TKind::RBrack, .. } => Ty::Slice(ty), @@ -411,30 +451,27 @@ impl<'t> Parse<'t> for Ty { } } TKind::Fn => { - p.consume().consume_if(TKind::LParen)?; - - let mut tys = p.list(vec![], (), TKind::Comma, TKind::RParen)?; - tys.push(match p.next_if(TKind::Arrow) { - Ok(_) => p.parse(())?, - _ => Ty::Tuple(vec![]), - }); - Ty::Fn(tys) - } - TKind::LParen => { - let mut tys = p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?; - match p.next_if(TKind::Arrow) { - Ok(_) => { - tys.push(p.parse(())?); - Ty::Fn(tys) - } - _ => Ty::Tuple(tys), + p.consume(); + match p.parse(())? { + Ty::Fn(args) => Ty::Fn(args), + other @ Ty::Tuple(_) => Ty::Fn(vec![other, Ty::Tuple(vec![])]), + other => Ty::Fn(vec![other, Ty::Tuple(vec![])]), } } - _ => Err(ParseError::NotType(tok.kind, tok.span))?, + TKind::LParen => { + Ty::Tuple(p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?) + } + _ => Err(ParseError::NotType(kind, span))?, }; - Ok(match p.next_if(TKind::Arrow) { - Ok(_) => Ty::Fn(vec![head, p.parse(())?]), + Ok(match p.next_if(TKind::Arrow).allow_eof()? { + Some(Ok(_)) => Ty::Fn(vec![ + match head { + args @ Ty::Tuple(_) => args, + arg => Ty::Tuple(vec![arg]), + }, + p.parse(())?, + ]), _ => head, }) } @@ -483,15 +520,18 @@ pub enum Prec { impl Prec { pub const MIN: usize = Prec::Min.value(); + pub const fn value(self) -> usize { self as usize * 2 } + pub const fn prev(self) -> usize { match self { Self::Assign => self.value() + 1, _ => self.value(), } } + pub const fn next(self) -> usize { match self { Self::Assign => self.value(), @@ -526,7 +566,7 @@ pub enum Ps { fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { Ok(match token.kind { TKind::Do => (Ps::Op(Op::Do), Prec::Do), - TKind::Semi => (Ps::ExplicitDo, Prec::Do), + TKind::Semi => (Ps::End, Prec::Body), TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max), TKind::Grave => (Ps::Mid, Prec::Max), @@ -566,6 +606,7 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary), TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary), TKind::Star => (Ps::Op(Op::Deref), Prec::Unary), + TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary), kind => Err(ParseError::NotPrefix(kind, token.span))?, }) @@ -574,18 +615,24 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { fn from_infix(token: &Token) -> PResult<(Ps, Prec)> { Ok(match token.kind { TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration - TKind::As => (Ps::Op(Op::As), Prec::Max), - TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple), - TKind::Dot => (Ps::Op(Op::Dot), Prec::Project), - TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd), - TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr), - TKind::Question => (Ps::Op(Op::Try), Prec::Unary), - TKind::LParen => (Ps::Op(Op::Call), Prec::Extend), - TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend), - TKind::LCurly => (Ps::Make, Prec::Make), - TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max), + TKind::In => (Ps::Op(Op::Do), Prec::Do), + TKind::Eq => (Ps::Op(Op::Set), Prec::Assign), + TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign), + TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign), + TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign), + TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign), + TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign), + TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign), + TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign), + TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign), + TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign), + TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign), + TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple), + TKind::LCurly => (Ps::Make, Prec::Make), TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical), + TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr), + TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd), TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare), TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare), TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare), @@ -605,6 +652,13 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> { TKind::Slash => (Ps::Op(Op::Div), Prec::Term), TKind::Rem => (Ps::Op(Op::Rem), Prec::Term), + TKind::Question => (Ps::Op(Op::Try), Prec::Unary), + TKind::Dot => (Ps::Op(Op::Dot), Prec::Project), + TKind::LParen => (Ps::Op(Op::Call), Prec::Extend), + TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend), + + TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max), + TKind::As => (Ps::Op(Op::As), Prec::Max), _ => (Ps::ImplicitDo, Prec::Do), }) } @@ -615,7 +669,7 @@ impl<'t> Parse<'t> for Const { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { Ok(Self( p.consume().parse(PPrec::Tuple)?, - p.consume_if(TKind::Eq)?.parse(Prec::Tuple.value())?, + p.expect(TKind::Eq)?.parse(Prec::Tuple.value())?, )) } } @@ -637,7 +691,7 @@ impl<'t> Parse<'t> for Fn { type Prec = (); fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { - match p.consume().next_if(TKind::Identifier) { + match p.consume().next_if(TKind::Identifier)? { Ok(Token { lexeme, .. }) => Ok(Self( lexeme.string(), p.parse(PPrec::Tuple)?, @@ -648,7 +702,7 @@ impl<'t> Parse<'t> for Fn { None, Pat::Op( PatOp::Tuple, - p.consume_if(TKind::LParen)?.list( + p.expect(TKind::LParen)?.list( vec![], PPrec::Tuple, TKind::Comma, @@ -667,12 +721,15 @@ impl<'t> Parse<'t> for Let { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { let pat = p.consume().parse(PPrec::Tuple)?; - if p.next_if(TKind::Eq).is_err() { + if p.next_if(TKind::Eq).allow_eof()?.is_none_or(|v| v.is_err()) { return Ok(Self(pat, vec![])); } let body = p.parse(Prec::Tuple.value())?; - if p.next_if(TKind::Else).is_err() { + if p.next_if(TKind::Else) + .allow_eof()? + .is_none_or(|v| v.is_err()) + { return Ok(Self(pat, vec![body])); } @@ -685,42 +742,41 @@ impl<'t> Parse<'t> for Match { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { Ok(Self(p.consume().parse(Prec::Logical.value())?, { - p.next_if(TKind::LCurly)?; + p.expect(TKind::LCurly)?; p.list(vec![], Prec::Body.next(), TKind::Comma, TKind::RCurly)? })) } } + impl<'t> Parse<'t> for MatchArm { type Prec = usize; + fn parse(p: &mut Parser<'t>, level: usize) -> PResult { - p.next_if(TKind::Bar).ok(); + p.next_if(TKind::Bar)?.ok(); // and discard Ok(MatchArm( p.parse(PPrec::Min)?, - p.consume_if(TKind::FatArrow)?.parse(level)?, + p.expect(TKind::FatArrow)?.parse(level)?, )) } } impl<'t> Parse<'t> for MakeArm { type Prec = (); + fn parse(p: &mut Parser<'t>, _level: ()) -> PResult { + let name = p + .next_if(TKind::Identifier)? + .map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?; Ok(MakeArm( - p.next_if(TKind::Identifier)? - .lexeme - .string() - .expect("Identifier should have String"), - { - p.next_if(TKind::Colon) - .ok() - .map(|_| p.parse(Prec::Body.value())) - .transpose()? - }, + name.lexeme.string().expect("Identifier should have String"), + p.opt_if(Prec::Body.value(), TKind::Colon)?, )) } } impl<'t> Parse<'t> for Mod { type Prec = (); + fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { let ty = p.consume().parse(())?; let body = p.parse(Prec::Body.value())?; @@ -732,14 +788,14 @@ fn parse_for<'t>(p: &mut Parser<'t>, _level: ()) -> PResult { // for Pat let pat = p.consume().parse(PPrec::Tuple)?; // in Expr - let iter: Anno = p.consume_if(TKind::In)?.parse(Prec::Logical.next())?; + let iter: Anno = p.expect(TKind::In)?.parse(Prec::Logical.next())?; let cspan = iter.1; // Expr let pass: Anno = p.parse(Prec::Body.next())?; let pspan = pass.1; // else Expr? - let fail = match p.next_if(TKind::Else) { - Ok(_) => p.parse(Prec::Body.next())?, + let fail = match p.next_if(TKind::Else).allow_eof()? { + Some(Ok(_)) => p.parse(Prec::Body.next())?, _ => Expr::Op(Op::Tuple, vec![]).anno(pspan), }; let fspan = fail.1; @@ -834,125 +890,143 @@ impl<'t> Parse<'t> for Expr { fn parse(p: &mut Parser<'t>, level: usize) -> PResult { const MIN: usize = Prec::MIN; + // TODO: in-tree doc comments + while p.next_if(TKind::Doc)?.is_ok() {} + // Prefix - let tok = p.peek()?; - let ((op, prec), span) = (from_prefix(tok)?, tok.span); + let tok @ &Token { kind, span, .. } = p.peek()?; + let ((op, prec), span) = (from_prefix(tok)?, span); + no_eof(move || { + let mut head = match op { + // "End" is produced when an "empty" expression is syntactically required. + // This happens when a semi or closing delimiter begins an expression. + // The token which emitted "End" cannot be consumed, as it is expected elsewhere. + Ps::End if level <= prec.next() => Expr::Op(Op::Tuple, vec![]), + Ps::End => Err(ParseError::NotPrefix(kind, span))?, - let mut head = match op { - // Empty is returned when a block finisher is an expr prefix. - // It's the only expr that doesn't consume. - Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]), - Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?, - - Ps::ExplicitDo => { - p.consume(); - Expr::Op(Op::Tuple, vec![]) - } - - Ps::Id => Expr::Id(p.parse(())?), - Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()), - Ps::Lit => Expr::Lit(p.parse(())?), - Ps::Let => Expr::Let(p.parse(())?), - Ps::For => parse_for(p, ())?, - Ps::Const => Expr::Const(p.parse(())?), - Ps::Typedef => Expr::Struct(p.parse(())?), - Ps::Match => Expr::Match(p.parse(())?), - Ps::Mod => Expr::Mod(p.parse(())?), - Ps::Op(Op::Block) => Expr::Op( - Op::Block, - p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(), - ), - Ps::Op(Op::Array) => parse_array(p)?, - Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? { - Some(value) => Expr::Op(Op::Group, vec![value]), - None => Expr::Op(Op::Tuple, vec![]), - }, - Ps::Op(op @ (Op::If | Op::While)) => { - p.consume(); - let exprs = vec![ - // conditional restricted to Logical operators or above - p.parse(Prec::Logical.value())?, - p.parse(prec.next())?, - match p.peek() { - Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?, - _ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())), - }, - ]; - Expr::Op(op, exprs) - } - Ps::Fn => Expr::Fn(p.parse(())?), - Ps::Lambda => Expr::Fn(Box::new(Fn( - None, - p.consume() - .opt(PPrec::Tuple, TKind::Bar)? - .unwrap_or(Pat::Op(PatOp::Tuple, vec![])), - p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer), - p.parse(Prec::Body.next())?, - ))), - Ps::Lambda0 => Expr::Fn(Box::new(Fn( - None, - Pat::Op(PatOp::Tuple, vec![]), - p.consume().opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer), - p.parse(Prec::Body.next())?, - ))), - Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| { - Expr::Op( - Op::Refer, - vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)], - ) - })?, - - Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]), - _ => unimplemented!("prefix {op:?}"), - }; - - // Infix and Postfix - while let Ok(tok) = p.peek() - && let Ok((op, prec)) = from_infix(tok) - && level <= prec.prev() - && op != Ps::End - { - let kind = tok.kind; - let span = span.merge(p.span()); - - head = match op { - // Make (structor expressions) are context-sensitive - Ps::Make => match &head { - Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make( - head.anno(span), - p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?, - ))), - _ => break, + Ps::Id => Expr::Id(p.parse(())?), + Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()), + Ps::Lit => Expr::Lit(p.parse(())?), + Ps::Let => Expr::Let(p.parse(())?), + Ps::For => parse_for(p, ())?, + Ps::Const => Expr::Const(p.parse(())?), + Ps::Typedef => Expr::Struct(p.parse(())?), + Ps::Match => Expr::Match(p.parse(())?), + Ps::Mod => Expr::Mod(p.parse(())?), + Ps::Op(Op::Meta) => Expr::Op( + Op::Meta, + vec![ + p.consume() + .expect(TKind::LBrack)? + .opt(MIN, TKind::RBrack)? + .unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)), + p.parse(level)?, + ], + ), + Ps::Op(Op::Block) => Expr::Op( + Op::Block, + p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(), + ), + Ps::Op(Op::Array) => parse_array(p)?, + Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? { + Some(value) => Expr::Op(Op::Group, vec![value]), + None => Expr::Op(Op::Tuple, vec![]), }, - // As is ImplicitDo (semicolon elision) - Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?), - Ps::ImplicitDo => break, - Ps::Op(Op::Do) => head.and_do(span, p.consume().parse(prec.next())?), - Ps::Op(Op::Index) => Expr::Op( - Op::Index, - p.consume() - .list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?, - ), - Ps::Op(Op::Call) => Expr::Op( - Op::Call, - p.consume() - .list(vec![head.anno(span)], 0, TKind::Comma, TKind::RParen)?, - ), - Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op( - op, - p.consume() - .list_bare(vec![head.anno(span)], prec.next(), kind)?, - ), - Ps::Op(op @ Op::Try) => { + Ps::Op(op @ (Op::If | Op::While)) => { p.consume(); - Expr::Op(op, vec![head.anno(span)]) + let exprs = vec![ + // conditional restricted to Logical operators or above + p.parse(Prec::Logical.value())?, + p.parse(prec.next())?, + match p.peek() { + Ok(Token { kind: TKind::Else, .. }) => { + p.consume().parse(prec.next())? + } + _ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())), + }, + ]; + Expr::Op(op, exprs) } - Ps::Op(op) => Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]), - _ => Err(ParseError::NotInfix(kind, span))?, - } - } + Ps::Fn => Expr::Fn(p.parse(())?), + Ps::Lambda => Expr::Fn(Box::new(Fn( + None, + p.consume() + .opt(PPrec::Tuple, TKind::Bar)? + .unwrap_or(Pat::Op(PatOp::Tuple, vec![])), + p.opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer), + p.parse(Prec::Body.next())?, + ))), + Ps::Lambda0 => Expr::Fn(Box::new(Fn( + None, + Pat::Op(PatOp::Tuple, vec![]), + p.consume().opt_if((), TKind::Arrow)?.unwrap_or(Ty::Infer), + p.parse(Prec::Body.next())?, + ))), + Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| { + Expr::Op( + Op::Refer, + vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)], + ) + })?, - Ok(head) + Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]), + _ => unimplemented!("prefix {op:?}"), + }; + + // Infix and Postfix + while let Ok(Some(tok)) = p.peek().allow_eof() + && let Ok((op, prec)) = from_infix(tok) + && level <= prec.prev() + && op != Ps::End + { + let kind = tok.kind; + let span = span.merge(p.span()); + + head = match op { + // Make (structor expressions) are context-sensitive + Ps::Make => match &head { + Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make( + head.anno(span), + p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?, + ))), + _ => break, + }, + // As is ImplicitDo (semicolon elision) + Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?), + Ps::ImplicitDo => break, + Ps::Op(Op::Do) => head.and_do(span, p.consume().parse(prec.next())?), + Ps::Op(Op::Index) => Expr::Op( + Op::Index, + p.consume() + .list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?, + ), + Ps::Op(Op::Call) => Expr::Op( + Op::Call, + vec![ + head.anno(span), + p.consume() + .opt(0, TKind::RParen)? + .unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)), + ], + ), + Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op( + op, + p.consume() + .list_bare(vec![head.anno(span)], prec.next(), kind)?, + ), + Ps::Op(op @ Op::Try) => { + p.consume(); + Expr::Op(op, vec![head.anno(span)]) + } + Ps::Op(op) => { + Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]) + } + _ => Err(ParseError::NotInfix(kind, span))?, + } + } + + Ok(head) + }) } } @@ -966,7 +1040,7 @@ fn parse_array<'t>(p: &mut Parser<'t>) -> PResult { let prec = Prec::Tuple; let item = p.parse(prec.value())?; let repeat = p.opt_if(prec.next(), TKind::Semi)?; - p.next_if(TKind::RBrack)?; + p.expect(TKind::RBrack)?; Ok(match (repeat, item) { (Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]), diff --git a/src/token.rs b/src/token.rs index 00eed68..34e495a 100644 --- a/src/token.rs +++ b/src/token.rs @@ -9,6 +9,12 @@ pub struct Token { pub span: Span, } +impl Token { + pub fn kind(&self) -> TKind { + self.kind + } +} + #[derive(Clone, Debug)] pub enum Lexeme { String(String), @@ -55,7 +61,9 @@ impl std::fmt::Display for Lexeme { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TKind { - Comment, + Comment, // Line or block comment + Doc, // Doc comment + And, As, Break, @@ -67,6 +75,7 @@ pub enum TKind { Fn, For, If, + Impl, In, Let, Loop, @@ -76,6 +85,7 @@ pub enum TKind { Or, Public, Return, + Static, Struct, True, While,