doughlang: Preserve errors through entire pipeline

lexer:
- Un-stringify errors
- Reserve more words
- Doc the comments

parser:
- MASSIVE changes to peek, peek_if, next_if, consume_if=>expect.
- Keep track of when EOF is allowable
- TKind is stupidly cheap with >100 niches, so we can fit like 4 of them in a single ParseError lmao
- TODO: make sure EOF/UnexpectedEOF propagation is correct. It seems... Kinda Not correct.
- Add meta-expressions
This commit is contained in:
2025-10-17 06:25:11 -04:00
parent c8f1f082c4
commit 6368e68941
6 changed files with 543 additions and 351 deletions

View File

@@ -23,7 +23,7 @@ impl<T: Clone + std::fmt::Debug + std::fmt::Display + PartialEq + Eq> Annotation
pub struct FqPath { pub struct FqPath {
// TODO: Identifier interning // TODO: Identifier interning
pub parts: Vec<String>, pub parts: Vec<String>,
// TODO: // TODO: generic parameters
} }
impl From<&str> for FqPath { impl From<&str> for FqPath {
@@ -251,6 +251,7 @@ pub enum Op {
ArRep, // [ Expr ; Expr ] ArRep, // [ Expr ; Expr ]
Group, // ( Expr ,?) Group, // ( Expr ,?)
Tuple, // Expr (, Expr)* Tuple, // Expr (, Expr)*
Meta, // #[ Expr ]
Try, // Expr '?' Try, // Expr '?'
Index, // Expr [ Expr,* ] Index, // Expr [ Expr,* ]
@@ -299,6 +300,16 @@ pub enum Op {
LogOr, // Expr || Expr LogOr, // Expr || Expr
Set, // Expr = Expr Set, // Expr = Expr
MulSet, // Expr *= Expr
DivSet, // Expr /= Expr
RemSet, // Expr %= Expr
AddSet, // Expr += Expr
SubSet, // Expr -= Expr
ShlSet, // Expr <<= Expr
ShrSet, // Expr >>= Expr
AndSet, // Expr &= Expr
XorSet, // Expr ^= Expr
OrSet, // Expr |= Expr
} }
use crate::{fmt::FmtAdapter, span::Span}; use crate::{fmt::FmtAdapter, span::Span};
@@ -381,19 +392,19 @@ impl<A: Annotation> Display for Mod<A> {
impl Display for Typedef { impl Display for Typedef {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(kind, pat) = self; let Self(kind, pat) = self;
let kind = match kind { f.write_str(match kind {
TypedefKind::Alias => "type", TypedefKind::Alias => "type ",
TypedefKind::Struct => "struct", TypedefKind::Struct => "struct ",
TypedefKind::Enum => "enum", TypedefKind::Enum => "enum ",
}; })?;
match pat { match pat {
Pat::Struct(name, bind) => match bind.as_ref() { Pat::Struct(name, bind) => match bind.as_ref() {
Pat::Op(PatOp::Tuple, parts) => f Pat::Op(PatOp::Tuple, parts) => f
.delimit_indented(fmt!("{kind} {name} {{"), "}") .delimit_indented(fmt!("{name} {{"), "}")
.list_wrap("\n", parts, ",\n", ",\n"), .list_wrap("\n", parts, ",\n", ",\n"),
other => write!(f, "{name} {{ {other} }}"), other => write!(f, "{name} {{ {other} }}"),
}, },
_ => write!(f, "{kind} {pat}"), _ => pat.fmt(f),
} }
} }
} }
@@ -426,6 +437,10 @@ impl<A: Annotation> Display for Expr<A> {
.list_wrap("\n", exprs, "\n", "\n"), .list_wrap("\n", exprs, "\n", "\n"),
Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "), Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "),
Self::Op(Op::Group, exprs) => f.list(exprs, ", "), Self::Op(Op::Group, exprs) => f.list(exprs, ", "),
Self::Op(Op::Meta, exprs) => match exprs.as_slice() {
[meta, expr @ ..] => f.delimit(fmt!("#[{meta}]\n"), "").list(expr, ","),
[] => write!(f, "#[]"),
},
Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() { Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() {
[callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "), [callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "),
@@ -436,7 +451,7 @@ impl<A: Annotation> Display for Expr<A> {
[] => write!(f, "{op}"), [] => write!(f, "{op}"),
}, },
Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"), Self::Op(op @ Op::Do, exprs) => f.list(exprs, op),
Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "), Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "),
Self::Op(op @ Op::Try, exprs) => f.delimit("(", fmt!("){op}")).list(exprs, ", "), Self::Op(op @ Op::Try, exprs) => f.delimit("(", fmt!("){op}")).list(exprs, ", "),
Self::Op(op, exprs) => match exprs.as_slice() { Self::Op(op, exprs) => match exprs.as_slice() {
@@ -449,53 +464,64 @@ impl<A: Annotation> Display for Expr<A> {
impl Display for Op { impl Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { f.write_str(match self {
Op::Do => "; ".fmt(f), Op::Do => "; ",
Op::As => " as ".fmt(f), Op::As => " as ",
Op::Macro => "macro ".fmt(f), Op::Macro => "macro ",
Op::Block => "{}".fmt(f), Op::Block => "{}",
Op::Array => "[]".fmt(f), Op::Array => "[]",
Op::ArRep => "[;]".fmt(f), Op::ArRep => "[;]",
Op::Group => "()".fmt(f), Op::Group => "()",
Op::Tuple => "()".fmt(f), Op::Tuple => "()",
Op::Try => "?".fmt(f), Op::Meta => "#[]",
Op::Index => "".fmt(f), Op::Try => "?",
Op::Call => "".fmt(f), Op::Index => "",
Op::Pub => "pub ".fmt(f), Op::Call => "",
Op::Loop => "loop ".fmt(f), Op::Pub => "pub ",
Op::If => "if ".fmt(f), Op::Loop => "loop ",
Op::While => "while ".fmt(f), Op::If => "if ",
Op::Break => "break ".fmt(f), Op::While => "while ",
Op::Return => "return ".fmt(f), Op::Break => "break ",
Op::Dot => ".".fmt(f), Op::Return => "return ",
Op::RangeEx => "..".fmt(f), Op::Dot => ".",
Op::RangeIn => "..=".fmt(f), Op::RangeEx => "..",
Op::Neg => "-".fmt(f), Op::RangeIn => "..=",
Op::Not => "!".fmt(f), Op::Neg => "-",
Op::Identity => "!!".fmt(f), Op::Not => "!",
Op::Refer => "&".fmt(f), Op::Identity => "!!",
Op::Deref => "*".fmt(f), Op::Refer => "&",
Op::Mul => " * ".fmt(f), Op::Deref => "*",
Op::Div => " / ".fmt(f), Op::Mul => " * ",
Op::Rem => " % ".fmt(f), Op::Div => " / ",
Op::Add => " + ".fmt(f), Op::Rem => " % ",
Op::Sub => " - ".fmt(f), Op::Add => " + ",
Op::Shl => " << ".fmt(f), Op::Sub => " - ",
Op::Shr => " >> ".fmt(f), Op::Shl => " << ",
Op::And => " & ".fmt(f), Op::Shr => " >> ",
Op::Xor => " ^ ".fmt(f), Op::And => " & ",
Op::Or => " | ".fmt(f), Op::Xor => " ^ ",
Op::Lt => " < ".fmt(f), Op::Or => " | ",
Op::Leq => " <= ".fmt(f), Op::Lt => " < ",
Op::Eq => " == ".fmt(f), Op::Leq => " <= ",
Op::Neq => " != ".fmt(f), Op::Eq => " == ",
Op::Geq => " >= ".fmt(f), Op::Neq => " != ",
Op::Gt => " > ".fmt(f), Op::Geq => " >= ",
Op::LogAnd => " && ".fmt(f), Op::Gt => " > ",
Op::LogXor => " ^^ ".fmt(f), Op::LogAnd => " && ",
Op::LogOr => " || ".fmt(f), Op::LogXor => " ^^ ",
Op::Set => " = ".fmt(f), Op::LogOr => " || ",
} Op::Set => " = ",
Op::MulSet => " *= ",
Op::DivSet => " /= ",
Op::RemSet => " %= ",
Op::AddSet => " += ",
Op::SubSet => " -= ",
Op::ShlSet => " <<= ",
Op::ShrSet => " >>= ",
Op::AndSet => " &= ",
Op::XorSet => " ^= ",
Op::OrSet => " |= ",
})
} }
} }

View File

@@ -79,6 +79,11 @@ impl<'f, F: Write + ?Sized> Indent<'f, F> {
pub fn new(f: &'f mut F, indent: &'static str) -> Self { pub fn new(f: &'f mut F, indent: &'static str) -> Self {
Indent { f, needs_indent: false, indent } Indent { f, needs_indent: false, indent }
} }
/// Gets mutable access to the inner [Write]-adapter
pub fn inner(&mut self) -> &mut F {
self.f
}
} }
impl<F: Write + ?Sized> Write for Indent<'_, F> { impl<F: Write + ?Sized> Write for Indent<'_, F> {
@@ -103,10 +108,18 @@ impl<F: Write + ?Sized> Write for Indent<'_, F> {
/// Prints delimiters around anything formatted with this. Implies [Indent] /// Prints delimiters around anything formatted with this. Implies [Indent]
pub struct Delimit<'f, F: Write + ?Sized, E: Display = &'static str> { pub struct Delimit<'f, F: Write + ?Sized, E: Display = &'static str> {
f: &'f mut F, /// The formatter
pub f: &'f mut F,
close: E, close: E,
} }
impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> {
/// Gets mutable access to the inner [Write]-adapter
pub fn inner(&mut self) -> &mut F {
self.f
}
}
impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> { impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> {
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self { pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
let _ = write!(f, "{open}"); let _ = write!(f, "{open}");
@@ -133,6 +146,13 @@ pub struct DelimitIndent<'f, F: Write + ?Sized, E: Display = &'static str> {
close: E, close: E,
} }
impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> {
/// Gets mutable access to the inner [Write]-adapter
pub fn inner(&mut self) -> &mut F {
self.f.inner()
}
}
impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> { impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> {
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self { pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
let mut f = f.indent(); let mut f = f.indent();

View File

@@ -8,9 +8,10 @@ use crate::{span::Span, token::*};
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LexError { pub struct LexError {
pub pos: u32, pub pos: Span,
pub res: &'static str, pub res: LexFailure,
} }
impl std::error::Error for LexError {} impl std::error::Error for LexError {}
impl std::fmt::Display for LexError { impl std::fmt::Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -19,6 +20,44 @@ impl std::fmt::Display for LexError {
} }
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum LexFailure {
/// Reached end of file
EOF,
UnexpectedEOF,
Unexpected(char),
UnterminatedBlockComment,
UnterminatedCharacter,
UnterminatedString,
UnterminatedUnicodeEscape,
InvalidUnicodeEscape(u32),
InvalidDigitForBase(char, u32),
IntegerOverflow,
}
use LexFailure::*;
pub use LexFailure::{EOF, UnexpectedEOF};
impl std::fmt::Display for LexFailure {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EOF => "EOF".fmt(f),
Self::UnexpectedEOF => "Unexpected EOF".fmt(f),
Self::Unexpected(c) => write!(f, "Character '{c:?}'"),
Self::UnterminatedBlockComment => "Unterminated Block Comment".fmt(f),
Self::UnterminatedCharacter => "Unterminated Character".fmt(f),
Self::UnterminatedString => "Unterminated String".fmt(f),
Self::UnterminatedUnicodeEscape => "Unterminated Unicode Escape".fmt(f),
Self::InvalidUnicodeEscape(hex) => {
write!(f, "'\\u{{{hex:x}}}' is not a valid UTF-8 codepoint")
}
Self::InvalidDigitForBase(digit, base) => {
write!(f, "Invalid digit {digit} for base {base}")
}
Self::IntegerOverflow => "Integer literal does not fit in 128 bits".fmt(f),
}
}
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Lexer<'t> { pub struct Lexer<'t> {
/// The source text /// The source text
@@ -72,8 +111,8 @@ impl<'t> Lexer<'t> {
} }
/// Produces a LexError at the start of the current token /// Produces a LexError at the start of the current token
fn error(&self, res: &'static str) -> LexError { fn error(&self, res: LexFailure) -> LexError {
LexError { pos: self.head, res } LexError { pos: Span(self.head, self.tail), res }
} }
/// Gets the Lexer's current &[str] lexeme and [Span] /// Gets the Lexer's current &[str] lexeme and [Span]
@@ -118,7 +157,7 @@ impl<'t> Lexer<'t> {
.skip_whitespace() .skip_whitespace()
.start_token() .start_token()
.peek() .peek()
.ok_or_else(|| self.error("EOF"))? .ok_or_else(|| self.error(EOF))?
{ {
'!' => Bang, '!' => Bang,
'"' => return self.string(), '"' => return self.string(),
@@ -154,7 +193,7 @@ impl<'t> Lexer<'t> {
'~' => Tilde, '~' => Tilde,
'_' => return self.identifier(), '_' => return self.identifier(),
c if is_xid_start(c) => return self.identifier(), c if is_xid_start(c) => return self.identifier(),
_ => Err(self.error("Invalid"))?, c => Err(self.error(Unexpected(c)))?,
}; };
// Handle digraphs // Handle digraphs
@@ -217,8 +256,12 @@ impl<'t> Lexer<'t> {
/// Consumes characters until the lexer reaches a newline `'\n'` /// Consumes characters until the lexer reaches a newline `'\n'`
pub fn line_comment(&mut self) -> Result<Token, LexError> { pub fn line_comment(&mut self) -> Result<Token, LexError> {
let kind = match self.consume().peek() {
Some('!' | '/') => TKind::Doc,
_ => TKind::Comment,
};
while self.consume().peek().is_some_and(|c| c != '\n') {} while self.consume().peek().is_some_and(|c| c != '\n') {}
Ok(self.produce(TKind::Comment)) Ok(self.produce(kind))
} }
/// Consumes characters until the lexer reaches the end of a *nested* block comment. /// Consumes characters until the lexer reaches the end of a *nested* block comment.
@@ -232,7 +275,7 @@ impl<'t> Lexer<'t> {
_ => continue, _ => continue,
}; };
} }
Err(self.error("Unterminated block comment")) Err(self.error(UnterminatedBlockComment))
} }
/// Consumes characters until it reaches a character not in [is_xid_continue]. /// Consumes characters until it reaches a character not in [is_xid_continue].
@@ -257,6 +300,7 @@ impl<'t> Lexer<'t> {
"fn" => TKind::Fn, "fn" => TKind::Fn,
"for" => TKind::For, "for" => TKind::For,
"if" => TKind::If, "if" => TKind::If,
"impl" => TKind::Impl,
"in" => TKind::In, "in" => TKind::In,
"let" => TKind::Let, "let" => TKind::Let,
"loop" => TKind::Loop, "loop" => TKind::Loop,
@@ -266,6 +310,7 @@ impl<'t> Lexer<'t> {
"or" => TKind::Or, "or" => TKind::Or,
"pub" => TKind::Public, "pub" => TKind::Public,
"return" => TKind::Return, "return" => TKind::Return,
"static" => TKind::Const, // TODO: Static
"struct" => TKind::Struct, "struct" => TKind::Struct,
"then" => TKind::Do, "then" => TKind::Do,
"true" => TKind::True, "true" => TKind::True,
@@ -286,7 +331,7 @@ impl<'t> Lexer<'t> {
if self.take().is_some_and(|c| c == '\'') { if self.take().is_some_and(|c| c == '\'') {
Ok(self.produce_with_lexeme(TKind::Character, Lexeme::Char(c))) Ok(self.produce_with_lexeme(TKind::Character, Lexeme::Char(c)))
} else { } else {
Err(self.error("Unterminated character")) Err(self.error(UnterminatedCharacter))
} }
} }
@@ -296,7 +341,7 @@ impl<'t> Lexer<'t> {
self.consume(); self.consume();
loop { loop {
lexeme.push(match self.take() { lexeme.push(match self.take() {
None => Err(self.error("Unterminated string"))?, None => Err(self.error(UnterminatedString))?,
Some('\\') => self.escape()?, Some('\\') => self.escape()?,
Some('"') => break, Some('"') => break,
Some(c) => c, Some(c) => c,
@@ -308,7 +353,8 @@ impl<'t> Lexer<'t> {
/// Parses a single escape sequence into its resulting char value. /// Parses a single escape sequence into its resulting char value.
pub fn escape(&mut self) -> Result<char, LexError> { pub fn escape(&mut self) -> Result<char, LexError> {
Ok(match self.take().ok_or_else(|| self.error("EOF"))? { Ok(
match self.take().ok_or_else(|| self.error(UnexpectedEOF))? {
' ' => '\u{a0}', // Non-breaking space ' ' => '\u{a0}', // Non-breaking space
'0' => '\0', // C0 Null Character '0' => '\0', // C0 Null Character
'a' => '\x07', // C0 Acknowledge 'a' => '\x07', // C0 Acknowledge
@@ -321,27 +367,30 @@ impl<'t> Lexer<'t> {
'u' => self.unicode_escape()?, 'u' => self.unicode_escape()?,
'x' => self.hex_escape()?, 'x' => self.hex_escape()?,
c => c, c => c,
}) },
)
} }
/// Parses two hex-digits and constructs a [char] out of them. /// Parses two hex-digits and constructs a [char] out of them.
pub fn hex_escape(&mut self) -> Result<char, LexError> { pub fn hex_escape(&mut self) -> Result<char, LexError> {
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?; let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
char::from_u32(out).ok_or(self.error("Invalid digit")) char::from_u32(out).ok_or(self.error(InvalidUnicodeEscape(out)))
} }
/// Parses a sequence of `{}`-bracketed hex-digits and constructs a [char] out of them. /// Parses a sequence of `{}`-bracketed hex-digits and constructs a [char] out of them.
pub fn unicode_escape(&mut self) -> Result<char, LexError> { pub fn unicode_escape(&mut self) -> Result<char, LexError> {
self.next_if('{') self.next_if('{')
.ok_or_else(|| self.error("No unicode escape opener"))?; .ok_or_else(|| self.error(UnterminatedUnicodeEscape))?;
let mut out = 0; let mut out = 0;
while let Some(c) = self.take() { while let Some(c) = self.take() {
if c == '}' { if c == '}' {
return char::from_u32(out).ok_or_else(|| self.error("Bad unicode value")); return char::from_u32(out).ok_or_else(|| self.error(InvalidUnicodeEscape(out)));
} }
out = out * 16 + c.to_digit(16).ok_or_else(|| self.error("Invalid digit"))?; out = out * 16
+ c.to_digit(16)
.ok_or_else(|| self.error(InvalidDigitForBase(c, 16)))?;
} }
Err(self.error("Unterminated unicode escape")) Err(self.error(UnterminatedUnicodeEscape))
} }
/// Parses a sequence of digits (and underscores) in base `BASE`, where 2 <= `BASE` <= 36. /// Parses a sequence of digits (and underscores) in base `BASE`, where 2 <= `BASE` <= 36.
@@ -353,7 +402,10 @@ impl<'t> Lexer<'t> {
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
int = match c.to_digit(BASE).ok_or(c) { int = match c.to_digit(BASE).ok_or(c) {
Err('_') => int, Err('_') => int,
Ok(c) => int.wrapping_mul(BASE as _).wrapping_add(c as _), Ok(c) => int
.checked_mul(BASE as _)
.and_then(|int| int.checked_add(c as _))
.ok_or_else(|| self.error(IntegerOverflow))?,
_ => break, _ => break,
}; };
self.consume(); self.consume();
@@ -362,12 +414,13 @@ impl<'t> Lexer<'t> {
Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE))) Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
} }
/// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36 /// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36.
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> { pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) { let digit = self.take().ok_or_else(|| self.error(UnexpectedEOF))?;
if let Some(digit) = digit.to_digit(BASE) {
Ok(digit) Ok(digit)
} else { } else {
Err(self.error("Invalid digit")) Err(self.error(InvalidDigitForBase(digit, BASE)))
} }
} }
} }

View File

@@ -9,7 +9,7 @@ use doughlang::{
Expr, Expr,
macro_matcher::{Match, Subst}, macro_matcher::{Match, Subst},
}, },
lexer::{LexError, Lexer}, lexer::{EOF, LexError, Lexer},
parser::{ParseError, Parser}, parser::{ParseError, Parser},
span::Span, span::Span,
token::{TKind, Token}, token::{TKind, Token},
@@ -73,7 +73,7 @@ fn lex() -> Result<(), Box<dyn Error>> {
} }
loop { loop {
match lexer.scan() { match lexer.scan() {
Err(LexError { res: "EOF", .. }) => { Err(LexError { res: EOF, .. }) => {
break Ok(Response::Accept); break Ok(Response::Accept);
} }
Err(e) => { Err(e) => {
@@ -97,7 +97,7 @@ fn exprs() -> Result<(), Box<dyn Error>> {
} }
for idx in 0.. { for idx in 0.. {
match parser.parse::<Anno<Expr>>(0) { match parser.parse::<Anno<Expr>>(0) {
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => { Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
return Ok(Response::Accept); return Ok(Response::Accept);
} }
Err(e) => { Err(e) => {
@@ -120,7 +120,7 @@ fn pats() -> Result<(), Box<dyn Error>> {
} }
loop { loop {
match parser.parse::<Pat>(PPrec::Min) { match parser.parse::<Pat>(PPrec::Min) {
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => { Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
break Ok(Response::Accept); break Ok(Response::Accept);
} }
Err(e) => { Err(e) => {
@@ -142,7 +142,7 @@ fn tys() -> Result<(), Box<dyn Error>> {
} }
loop { loop {
match parser.parse::<Ty>(()) { match parser.parse::<Ty>(()) {
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => { Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
break Ok(Response::Accept); break Ok(Response::Accept);
} }
Err(e) => { Err(e) => {
@@ -205,16 +205,25 @@ fn subst() -> Result<(), Box<dyn Error>> {
fn parse(document: &str) { fn parse(document: &str) {
let mut parser = Parser::new(Lexer::new(document)); let mut parser = Parser::new(Lexer::new(document));
let isatty = std::io::stdin().is_terminal();
for idx in 0.. { for idx in 0.. {
match parser.parse::<Expr>(0) { match parser.parse::<Expr>(0) {
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break, Err(e @ ParseError::EOF(s)) if s.tail == document.len() as _ => {
Err(e) => { println!("\x1b[92m{e} (total {} bytes)\x1b[0m", document.len());
println!("\x1b[31m{e}\x1b[0m");
break; break;
} }
Ok(v) => { Err(e @ ParseError::EOF(_)) => {
println!("\x1b[93m{e} (total {} bytes)\x1b[0m", document.len());
break;
}
Err(e) => {
println!("\x1b[91m{e}\x1b[0m");
break;
}
Ok(v) if isatty => {
println!("\x1b[{}m{v}", (idx + 5) % 6 + 31); println!("\x1b[{}m{v}", (idx + 5) % 6 + 31);
} }
_ => {}
} }
} }
} }

View File

@@ -1,7 +1,7 @@
//! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes. //! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes.
use crate::{ use crate::{
ast::*, ast::*,
lexer::{LexError, Lexer}, lexer::{LexError, LexFailure, Lexer},
span::Span, span::Span,
token::{Lexeme, TKind, Token}, token::{Lexeme, TKind, Token},
}; };
@@ -9,8 +9,12 @@ use std::{error::Error, fmt::Display, vec};
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseError { pub enum ParseError {
/// Reached the expected end of input.
EOF(Span),
/// Unexpectedly reached end of input.
UnexpectedEOF(Span),
FromLexer(LexError), FromLexer(LexError),
Expected(TKind, Span), Expected(TKind, TKind, Span),
NotLiteral(TKind, Span), NotLiteral(TKind, Span),
NotPattern(TKind, Span), NotPattern(TKind, Span),
NotType(TKind, Span), NotType(TKind, Span),
@@ -19,12 +23,16 @@ pub enum ParseError {
NotPostfix(TKind, Span), NotPostfix(TKind, Span),
} }
pub use ParseError::EOF;
impl Error for ParseError {} impl Error for ParseError {}
impl Display for ParseError { impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Self::EOF(loc) => write!(f, "{loc}: Reached end of input."),
Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."),
Self::FromLexer(e) => e.fmt(f), Self::FromLexer(e) => e.fmt(f),
Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."), Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."),
Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."), Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."),
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."), Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."), Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."),
@@ -37,10 +45,36 @@ impl Display for ParseError {
pub type PResult<T> = Result<T, ParseError>; pub type PResult<T> = Result<T, ParseError>;
trait PResultExt<T> {
fn no_eof(self) -> PResult<T>;
fn allow_eof(self) -> PResult<Option<T>>;
}
impl<T> PResultExt<T> for PResult<T> {
fn no_eof(self) -> Self {
match self {
Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)),
other => other,
}
}
fn allow_eof(self) -> PResult<Option<T>> {
match self {
Ok(t) => Ok(Some(t)),
Err(ParseError::EOF(_)) => Ok(None),
Err(e) => Err(e),
}
}
}
/// Opens a scope where [ParseError::EOF] is unexpected (See [PResultExt::no_eof])
fn no_eof<T>(f: impl FnOnce() -> PResult<T>) -> PResult<T> {
f().no_eof()
}
#[derive(Debug)] #[derive(Debug)]
pub struct Parser<'t> { pub struct Parser<'t> {
pub lexer: Lexer<'t>, pub lexer: Lexer<'t>,
pub next_tok: Option<Token>, pub next_tok: Option<PResult<Token>>,
pub last_loc: Span, pub last_loc: Span,
pub elide_do: bool, pub elide_do: bool,
} }
@@ -72,46 +106,59 @@ impl<'t> Parser<'t> {
None => loop { None => loop {
match self.lexer.scan() { match self.lexer.scan() {
Ok(Token { kind: TKind::Comment, .. }) => {} Ok(Token { kind: TKind::Comment, .. }) => {}
Ok(tok) => break tok, Ok(tok) => break Ok(tok),
Err(e) => Err(ParseError::FromLexer(e))?, Err(LexError { pos, res: LexFailure::EOF }) => Err(ParseError::EOF(pos))?,
Err(e) => break Err(ParseError::FromLexer(e)),
} }
}, },
}; };
self.last_loc = next_tok.span;
self.next_tok = Some(next_tok); self.next_tok = Some(next_tok);
Ok(self.next_tok.as_ref().expect("should have token"))
let next_tok = self.next_tok.as_ref().expect("should have Some lex result");
if let Ok(tok) = next_tok {
self.last_loc = tok.span;
}
next_tok.as_ref().map_err(|e| *e)
} }
/// Peeks the next token if it matches the `expected` [TKind] /// Peeks the next token if it matches the `expected` [TKind]
pub fn peek_if(&mut self, expected: TKind) -> Option<&Token> { pub fn peek_if(&mut self, expected: TKind) -> PResult<Option<&Token>> {
self.peek().into_iter().find(|tok| tok.kind == expected) match self.peek() {
Ok(tok) if tok.kind == expected => Ok(Some(tok)),
Ok(_) => Ok(None),
Err(e) => Err(e),
}
} }
/// Consumes and returns the currently-peeked [Token]. /// Consumes and returns the currently-peeked [Token].
pub fn take(&mut self) -> Option<Token> { pub fn take(&mut self) -> PResult<Token> {
let tok = self.next_tok.take(); let tok = self
self.elide_do = matches!(tok, Some(Token { kind: TKind::RCurly, .. })); .next_tok
.take()
.unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc)));
self.elide_do = matches!(tok, Ok(Token { kind: TKind::RCurly | TKind::Semi, .. }));
tok tok
} }
/// Consumes the currently-peeked [Token], returning its lexeme without cloning. /// Consumes the currently-peeked [Token], returning its lexeme without cloning.
pub fn take_lexeme(&mut self) -> Option<Lexeme> { pub fn take_lexeme(&mut self) -> PResult<Lexeme> {
self.take().map(|tok| tok.lexeme) self.take().map(|tok| tok.lexeme)
} }
#[allow(clippy::should_implement_trait)] #[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> PResult<Token> { pub fn next(&mut self) -> PResult<Token> {
self.peek()?; self.peek().no_eof()?;
Ok(self.take().expect("should have token here")) Ok(self.take().expect("should have token here"))
} }
/// Consumes and returns the next [Token] if it matches the `expected` [TKind] /// Consumes and returns the next [Token] if it matches the `expected` [TKind]
pub fn next_if(&mut self, expected: TKind) -> PResult<Token> { pub fn next_if(&mut self, expected: TKind) -> PResult<Result<Token, TKind>> {
let token = self.peek()?; match self.peek() {
if token.kind == expected { Ok(t) if t.kind == expected => self.take().map(Ok),
Ok(self.take().expect("should have token here")) Ok(t) => Ok(Err(t.kind)),
} else { Err(e) => Err(e),
Err(ParseError::Expected(expected, token.span))
} }
} }
@@ -127,13 +174,15 @@ impl<'t> Parser<'t> {
end: TKind, end: TKind,
) -> PResult<Vec<P>> { ) -> PResult<Vec<P>> {
// TODO: This loses lexer errors // TODO: This loses lexer errors
while self.peek_if(end).is_none() { while self.peek_if(end).no_eof()?.is_none() {
elems.push(self.parse(level.clone())?); elems.push(self.parse(level.clone()).no_eof()?);
if self.next_if(sep).is_err() { match self.peek_if(sep)? {
break; Some(_) => self.consume(),
None => break,
};
} }
} self.next_if(end)?
self.next_if(end)?; .map_err(|tk| ParseError::Expected(end, tk, self.span()))?;
Ok(elems) Ok(elems)
} }
@@ -148,33 +197,37 @@ impl<'t> Parser<'t> {
sep: TKind, sep: TKind,
) -> PResult<Vec<P>> { ) -> PResult<Vec<P>> {
loop { loop {
elems.push(self.parse(level.clone())?); let elem = self.parse(level.clone()).no_eof()?;
if self.next_if(sep).is_err() { elems.push(elem);
break Ok(elems); match self.peek_if(sep) {
} Ok(Some(_)) => self.consume(),
Ok(None) | Err(ParseError::EOF(_)) => break Ok(elems),
Err(e) => Err(e)?,
};
} }
} }
/// Parses into an [`Option<P>`] if the next token is `next` /// Parses into an [`Option<P>`] if the next token is `next`
pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> { pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> {
Ok(match self.next_if(next) { Ok(match self.next_if(next)? {
Ok(_) => Some(self.parse(level)?), Ok(_) => Some(self.parse(level).no_eof()?),
Err(_) => None, Err(_) => None,
}) })
} }
/// Parses a P unless the next token is `end` /// Parses a P unless the next token is `end`
pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> { pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
let out = match self.peek_if(end) { let out = match self.peek_if(end)? {
None => Some(self.parse(level)?), None => Some(self.parse(level).no_eof()?),
Some(_) => None, Some(_) => None,
}; };
self.next_if(end)?; self.expect(end)?;
Ok(out) Ok(out)
} }
pub fn consume_if(&mut self, next: TKind) -> PResult<&mut Self> { pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> {
self.next_if(next)?; self.next_if(next)?
.map_err(|tk| ParseError::Expected(next, tk, self.span()))?;
Ok(self) Ok(self)
} }
@@ -197,17 +250,12 @@ impl<'t> Parse<'t> for FqPath {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let mut parts = vec![]; let mut parts = vec![];
if p.next_if(TKind::ColonColon).is_ok() { if p.next_if(TKind::ColonColon)?.is_ok() {
parts.push("".into()); // the "root" parts.push("".into()); // the "root"
} }
loop { while let Ok(id) = p.next_if(TKind::Identifier)? {
parts.push( parts.push(id.lexeme.string().expect("Identifier should have String"));
p.next_if(TKind::Identifier)? if let None | Some(Err(_)) = p.next_if(TKind::ColonColon).allow_eof()? {
.lexeme
.string()
.expect("Identifier should have String"),
);
if p.next_if(TKind::ColonColon).is_err() {
break; break;
} }
} }
@@ -223,26 +271,19 @@ impl<'t> Parse<'t> for Literal {
Ok(match tok.kind { Ok(match tok.kind {
TKind::True => p.consume().then(Literal::Bool(true)), TKind::True => p.consume().then(Literal::Bool(true)),
TKind::False => p.consume().then(Literal::Bool(false)), TKind::False => p.consume().then(Literal::Bool(false)),
TKind::Character => Literal::Char( TKind::Character => Literal::Char({
p.take_lexeme() let Token { lexeme, .. } = p.take().expect("should have Token");
.expect("should have Token") lexeme.char().expect("char token should have char")
.char()
.expect("should have one char in char literal"),
),
TKind::Integer => {
let Token { lexeme, span, .. } = p.take().expect("should have Token");
let Lexeme::Integer(int, _) = lexeme else {
Err(ParseError::Expected(TKind::Integer, span))?
};
Literal::Int(int)
}
TKind::String => Literal::Str({
let Token { lexeme, span, .. } = p.take().expect("should have Token");
lexeme
.string()
.ok_or(ParseError::Expected(TKind::String, span))?
}), }),
_ => Err(ParseError::Expected(TKind::Integer, tok.span))?, TKind::Integer => Literal::Int({
let Token { lexeme, .. } = p.take().expect("should have Token");
lexeme.int().expect("integer token should have int")
}),
TKind::String => Literal::Str({
let Token { lexeme, .. } = p.take().expect("should have Token");
lexeme.string().expect("string token should have string")
}),
other => Err(ParseError::NotLiteral(other, tok.span))?,
}) })
} }
} }
@@ -288,6 +329,7 @@ fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> {
impl<'t> Parse<'t> for Pat { impl<'t> Parse<'t> for Pat {
type Prec = PPrec; type Prec = PPrec;
fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> { fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> {
let tok = p.peek()?; let tok = p.peek()?;
@@ -315,12 +357,10 @@ impl<'t> Parse<'t> for Pat {
.opt(PPrec::Alt, TKind::RCurly)? .opt(PPrec::Alt, TKind::RCurly)?
.unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))), .unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))),
), ),
Ok(_) | Err(ParseError::FromLexer(LexError { pos: _, res: "EOF" })) => { Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() {
match path.parts.len() {
1 => Self::Name(path.parts.pop().expect("name has 1 part")), 1 => Self::Name(path.parts.pop().expect("name has 1 part")),
_ => Self::Path(path), _ => Self::Path(path),
} },
}
Err(e) => Err(e)?, Err(e) => Err(e)?,
} }
} }
@@ -329,21 +369,21 @@ impl<'t> Parse<'t> for Pat {
TKind::DotDot => Pat::Op( TKind::DotDot => Pat::Op(
PatOp::Rest, PatOp::Rest,
// Identifier in Rest position always becomes binder // Identifier in Rest position always becomes binder
match p.consume().peek()?.kind { match p.consume().peek().allow_eof()?.map(Token::kind) {
TKind::Identifier => vec![Pat::Name( Some(TKind::Identifier) => vec![Pat::Name(
p.take_lexeme() p.take_lexeme()
.expect("should have lexeme") .expect("should have lexeme")
.string() .string()
.expect("should be string"), .expect("should be string"),
)], )],
TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?], Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![], _ => vec![],
}, },
), ),
TKind::DotDotEq => Pat::Op( TKind::DotDotEq => Pat::Op(
PatOp::RangeIn, PatOp::RangeIn,
match p.consume().peek()?.kind { match p.consume().peek().allow_eof()?.map(Token::kind) {
TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?], Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![], _ => vec![],
}, },
), ),
@@ -360,14 +400,14 @@ impl<'t> Parse<'t> for Pat {
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?, _ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
}; };
while let Ok(tok) = p.peek() while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Some((op, prec)) = pat_from_infix(tok) && let Some((op, prec)) = pat_from_infix(tok)
&& level <= prec && level <= prec
{ {
let kind = tok.kind; let kind = tok.kind;
head = match op { head = match op {
PatPs::Typed => Pat::Typed(head.into(), p.consume().parse(())?), PatPs::Typed => Pat::Typed(head.into(), p.consume().parse(())?),
PatPs::Op(op @ PatOp::RangeEx) => Pat::Op( PatPs::Op(op @ (PatOp::RangeEx | PatOp::RangeIn)) => Pat::Op(
op, op,
match p.consume().peek().map(|t| t.kind) { match p.consume().peek().map(|t| t.kind) {
Ok(TKind::Integer | TKind::Character | TKind::Identifier) => { Ok(TKind::Integer | TKind::Character | TKind::Identifier) => {
@@ -379,7 +419,6 @@ impl<'t> Parse<'t> for Pat {
PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?), PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?),
} }
} }
Ok(head) Ok(head)
} }
} }
@@ -389,10 +428,11 @@ impl<'t> Parse<'t> for Ty {
fn parse(p: &mut Parser<'t>, level: Self::Prec) -> PResult<Self> fn parse(p: &mut Parser<'t>, level: Self::Prec) -> PResult<Self>
where Self: Sized { where Self: Sized {
let tok = p.peek()?; let &Token { kind, span, .. } = p.peek()?;
let head = match tok.kind { // TODO: this is a kinda jank way of error reporting
TKind::Identifier => match tok.lexeme.str() { let head = match kind {
TKind::Identifier => match p.peek()?.lexeme.str() {
Some("_") => p.consume().then(Ty::Infer), Some("_") => p.consume().then(Ty::Infer),
_ => Ty::Named(p.parse(())?), _ => Ty::Named(p.parse(())?),
}, },
@@ -403,7 +443,7 @@ impl<'t> Parse<'t> for Ty {
match p.next()? { match p.next()? {
Token { kind: TKind::Semi, .. } => { Token { kind: TKind::Semi, .. } => {
let ty = Ty::Array(ty, p.parse(Prec::Binary.next())?); let ty = Ty::Array(ty, p.parse(Prec::Binary.next())?);
p.next_if(TKind::RBrack)?; p.expect(TKind::RBrack)?;
ty ty
} }
Token { kind: TKind::RBrack, .. } => Ty::Slice(ty), Token { kind: TKind::RBrack, .. } => Ty::Slice(ty),
@@ -411,30 +451,27 @@ impl<'t> Parse<'t> for Ty {
} }
} }
TKind::Fn => { TKind::Fn => {
p.consume().consume_if(TKind::LParen)?; p.consume();
match p.parse(())? {
let mut tys = p.list(vec![], (), TKind::Comma, TKind::RParen)?; Ty::Fn(args) => Ty::Fn(args),
tys.push(match p.next_if(TKind::Arrow) { other @ Ty::Tuple(_) => Ty::Fn(vec![other, Ty::Tuple(vec![])]),
Ok(_) => p.parse(())?, other => Ty::Fn(vec![other, Ty::Tuple(vec![])]),
_ => Ty::Tuple(vec![]), }
});
Ty::Fn(tys)
} }
TKind::LParen => { TKind::LParen => {
let mut tys = p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?; Ty::Tuple(p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?)
match p.next_if(TKind::Arrow) {
Ok(_) => {
tys.push(p.parse(())?);
Ty::Fn(tys)
} }
_ => Ty::Tuple(tys), _ => Err(ParseError::NotType(kind, span))?,
}
}
_ => Err(ParseError::NotType(tok.kind, tok.span))?,
}; };
Ok(match p.next_if(TKind::Arrow) { Ok(match p.next_if(TKind::Arrow).allow_eof()? {
Ok(_) => Ty::Fn(vec![head, p.parse(())?]), Some(Ok(_)) => Ty::Fn(vec![
match head {
args @ Ty::Tuple(_) => args,
arg => Ty::Tuple(vec![arg]),
},
p.parse(())?,
]),
_ => head, _ => head,
}) })
} }
@@ -483,15 +520,18 @@ pub enum Prec {
impl Prec { impl Prec {
pub const MIN: usize = Prec::Min.value(); pub const MIN: usize = Prec::Min.value();
pub const fn value(self) -> usize { pub const fn value(self) -> usize {
self as usize * 2 self as usize * 2
} }
pub const fn prev(self) -> usize { pub const fn prev(self) -> usize {
match self { match self {
Self::Assign => self.value() + 1, Self::Assign => self.value() + 1,
_ => self.value(), _ => self.value(),
} }
} }
pub const fn next(self) -> usize { pub const fn next(self) -> usize {
match self { match self {
Self::Assign => self.value(), Self::Assign => self.value(),
@@ -526,7 +566,7 @@ pub enum Ps {
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind { Ok(match token.kind {
TKind::Do => (Ps::Op(Op::Do), Prec::Do), TKind::Do => (Ps::Op(Op::Do), Prec::Do),
TKind::Semi => (Ps::ExplicitDo, Prec::Do), TKind::Semi => (Ps::End, Prec::Body),
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max), TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
TKind::Grave => (Ps::Mid, Prec::Max), TKind::Grave => (Ps::Mid, Prec::Max),
@@ -566,6 +606,7 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary), TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary),
TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary), TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Star => (Ps::Op(Op::Deref), Prec::Unary), TKind::Star => (Ps::Op(Op::Deref), Prec::Unary),
TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?, kind => Err(ParseError::NotPrefix(kind, token.span))?,
}) })
@@ -574,18 +615,24 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
fn from_infix(token: &Token) -> PResult<(Ps, Prec)> { fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind { Ok(match token.kind {
TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration
TKind::As => (Ps::Op(Op::As), Prec::Max), TKind::In => (Ps::Op(Op::Do), Prec::Do),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::LCurly => (Ps::Make, Prec::Make),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::Eq => (Ps::Op(Op::Set), Prec::Assign), TKind::Eq => (Ps::Op(Op::Set), Prec::Assign),
TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign),
TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign),
TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign),
TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign),
TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign),
TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign),
TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign),
TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign),
TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign),
TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::LCurly => (Ps::Make, Prec::Make),
TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical), TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare), TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare),
TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare), TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare),
TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare), TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare),
@@ -605,6 +652,13 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
TKind::Slash => (Ps::Op(Op::Div), Prec::Term), TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term), TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::As => (Ps::Op(Op::As), Prec::Max),
_ => (Ps::ImplicitDo, Prec::Do), _ => (Ps::ImplicitDo, Prec::Do),
}) })
} }
@@ -615,7 +669,7 @@ impl<'t> Parse<'t> for Const {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
Ok(Self( Ok(Self(
p.consume().parse(PPrec::Tuple)?, p.consume().parse(PPrec::Tuple)?,
p.consume_if(TKind::Eq)?.parse(Prec::Tuple.value())?, p.expect(TKind::Eq)?.parse(Prec::Tuple.value())?,
)) ))
} }
} }
@@ -637,7 +691,7 @@ impl<'t> Parse<'t> for Fn {
type Prec = (); type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
match p.consume().next_if(TKind::Identifier) { match p.consume().next_if(TKind::Identifier)? {
Ok(Token { lexeme, .. }) => Ok(Self( Ok(Token { lexeme, .. }) => Ok(Self(
lexeme.string(), lexeme.string(),
p.parse(PPrec::Tuple)?, p.parse(PPrec::Tuple)?,
@@ -648,7 +702,7 @@ impl<'t> Parse<'t> for Fn {
None, None,
Pat::Op( Pat::Op(
PatOp::Tuple, PatOp::Tuple,
p.consume_if(TKind::LParen)?.list( p.expect(TKind::LParen)?.list(
vec![], vec![],
PPrec::Tuple, PPrec::Tuple,
TKind::Comma, TKind::Comma,
@@ -667,12 +721,15 @@ impl<'t> Parse<'t> for Let {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let pat = p.consume().parse(PPrec::Tuple)?; let pat = p.consume().parse(PPrec::Tuple)?;
if p.next_if(TKind::Eq).is_err() { if p.next_if(TKind::Eq).allow_eof()?.is_none_or(|v| v.is_err()) {
return Ok(Self(pat, vec![])); return Ok(Self(pat, vec![]));
} }
let body = p.parse(Prec::Tuple.value())?; let body = p.parse(Prec::Tuple.value())?;
if p.next_if(TKind::Else).is_err() { if p.next_if(TKind::Else)
.allow_eof()?
.is_none_or(|v| v.is_err())
{
return Ok(Self(pat, vec![body])); return Ok(Self(pat, vec![body]));
} }
@@ -685,42 +742,41 @@ impl<'t> Parse<'t> for Match {
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
Ok(Self(p.consume().parse(Prec::Logical.value())?, { Ok(Self(p.consume().parse(Prec::Logical.value())?, {
p.next_if(TKind::LCurly)?; p.expect(TKind::LCurly)?;
p.list(vec![], Prec::Body.next(), TKind::Comma, TKind::RCurly)? p.list(vec![], Prec::Body.next(), TKind::Comma, TKind::RCurly)?
})) }))
} }
} }
impl<'t> Parse<'t> for MatchArm { impl<'t> Parse<'t> for MatchArm {
type Prec = usize; type Prec = usize;
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> { fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
p.next_if(TKind::Bar).ok(); p.next_if(TKind::Bar)?.ok(); // and discard
Ok(MatchArm( Ok(MatchArm(
p.parse(PPrec::Min)?, p.parse(PPrec::Min)?,
p.consume_if(TKind::FatArrow)?.parse(level)?, p.expect(TKind::FatArrow)?.parse(level)?,
)) ))
} }
} }
impl<'t> Parse<'t> for MakeArm { impl<'t> Parse<'t> for MakeArm {
type Prec = (); type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> { fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
let name = p
.next_if(TKind::Identifier)?
.map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?;
Ok(MakeArm( Ok(MakeArm(
p.next_if(TKind::Identifier)? name.lexeme.string().expect("Identifier should have String"),
.lexeme p.opt_if(Prec::Body.value(), TKind::Colon)?,
.string()
.expect("Identifier should have String"),
{
p.next_if(TKind::Colon)
.ok()
.map(|_| p.parse(Prec::Body.value()))
.transpose()?
},
)) ))
} }
} }
impl<'t> Parse<'t> for Mod { impl<'t> Parse<'t> for Mod {
type Prec = (); type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> { fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let ty = p.consume().parse(())?; let ty = p.consume().parse(())?;
let body = p.parse(Prec::Body.value())?; let body = p.parse(Prec::Body.value())?;
@@ -732,14 +788,14 @@ fn parse_for<'t>(p: &mut Parser<'t>, _level: ()) -> PResult<Expr> {
// for Pat // for Pat
let pat = p.consume().parse(PPrec::Tuple)?; let pat = p.consume().parse(PPrec::Tuple)?;
// in Expr // in Expr
let iter: Anno<Expr> = p.consume_if(TKind::In)?.parse(Prec::Logical.next())?; let iter: Anno<Expr> = p.expect(TKind::In)?.parse(Prec::Logical.next())?;
let cspan = iter.1; let cspan = iter.1;
// Expr // Expr
let pass: Anno<Expr> = p.parse(Prec::Body.next())?; let pass: Anno<Expr> = p.parse(Prec::Body.next())?;
let pspan = pass.1; let pspan = pass.1;
// else Expr? // else Expr?
let fail = match p.next_if(TKind::Else) { let fail = match p.next_if(TKind::Else).allow_eof()? {
Ok(_) => p.parse(Prec::Body.next())?, Some(Ok(_)) => p.parse(Prec::Body.next())?,
_ => Expr::Op(Op::Tuple, vec![]).anno(pspan), _ => Expr::Op(Op::Tuple, vec![]).anno(pspan),
}; };
let fspan = fail.1; let fspan = fail.1;
@@ -834,20 +890,19 @@ impl<'t> Parse<'t> for Expr {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> { fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
const MIN: usize = Prec::MIN; const MIN: usize = Prec::MIN;
// TODO: in-tree doc comments
while p.next_if(TKind::Doc)?.is_ok() {}
// Prefix // Prefix
let tok = p.peek()?; let tok @ &Token { kind, span, .. } = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, tok.span); let ((op, prec), span) = (from_prefix(tok)?, span);
no_eof(move || {
let mut head = match op { let mut head = match op {
// Empty is returned when a block finisher is an expr prefix. // "End" is produced when an "empty" expression is syntactically required.
// It's the only expr that doesn't consume. // This happens when a semi or closing delimiter begins an expression.
Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]), // The token which emitted "End" cannot be consumed, as it is expected elsewhere.
Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?, Ps::End if level <= prec.next() => Expr::Op(Op::Tuple, vec![]),
Ps::End => Err(ParseError::NotPrefix(kind, span))?,
Ps::ExplicitDo => {
p.consume();
Expr::Op(Op::Tuple, vec![])
}
Ps::Id => Expr::Id(p.parse(())?), Ps::Id => Expr::Id(p.parse(())?),
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()), Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
@@ -858,6 +913,16 @@ impl<'t> Parse<'t> for Expr {
Ps::Typedef => Expr::Struct(p.parse(())?), Ps::Typedef => Expr::Struct(p.parse(())?),
Ps::Match => Expr::Match(p.parse(())?), Ps::Match => Expr::Match(p.parse(())?),
Ps::Mod => Expr::Mod(p.parse(())?), Ps::Mod => Expr::Mod(p.parse(())?),
Ps::Op(Op::Meta) => Expr::Op(
Op::Meta,
vec![
p.consume()
.expect(TKind::LBrack)?
.opt(MIN, TKind::RBrack)?
.unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)),
p.parse(level)?,
],
),
Ps::Op(Op::Block) => Expr::Op( Ps::Op(Op::Block) => Expr::Op(
Op::Block, Op::Block,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(), p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
@@ -874,7 +939,9 @@ impl<'t> Parse<'t> for Expr {
p.parse(Prec::Logical.value())?, p.parse(Prec::Logical.value())?,
p.parse(prec.next())?, p.parse(prec.next())?,
match p.peek() { match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?, Ok(Token { kind: TKind::Else, .. }) => {
p.consume().parse(prec.next())?
}
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())), _ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
}, },
]; ];
@@ -907,7 +974,7 @@ impl<'t> Parse<'t> for Expr {
}; };
// Infix and Postfix // Infix and Postfix
while let Ok(tok) = p.peek() while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Ok((op, prec)) = from_infix(tok) && let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev() && level <= prec.prev()
&& op != Ps::End && op != Ps::End
@@ -935,8 +1002,12 @@ impl<'t> Parse<'t> for Expr {
), ),
Ps::Op(Op::Call) => Expr::Op( Ps::Op(Op::Call) => Expr::Op(
Op::Call, Op::Call,
vec![
head.anno(span),
p.consume() p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RParen)?, .opt(0, TKind::RParen)?
.unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)),
],
), ),
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op( Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
op, op,
@@ -947,12 +1018,15 @@ impl<'t> Parse<'t> for Expr {
p.consume(); p.consume();
Expr::Op(op, vec![head.anno(span)]) Expr::Op(op, vec![head.anno(span)])
} }
Ps::Op(op) => Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]), Ps::Op(op) => {
Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?])
}
_ => Err(ParseError::NotInfix(kind, span))?, _ => Err(ParseError::NotInfix(kind, span))?,
} }
} }
Ok(head) Ok(head)
})
} }
} }
@@ -966,7 +1040,7 @@ fn parse_array<'t>(p: &mut Parser<'t>) -> PResult<Expr> {
let prec = Prec::Tuple; let prec = Prec::Tuple;
let item = p.parse(prec.value())?; let item = p.parse(prec.value())?;
let repeat = p.opt_if(prec.next(), TKind::Semi)?; let repeat = p.opt_if(prec.next(), TKind::Semi)?;
p.next_if(TKind::RBrack)?; p.expect(TKind::RBrack)?;
Ok(match (repeat, item) { Ok(match (repeat, item) {
(Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]), (Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]),

View File

@@ -9,6 +9,12 @@ pub struct Token {
pub span: Span, pub span: Span,
} }
impl Token {
pub fn kind(&self) -> TKind {
self.kind
}
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum Lexeme { pub enum Lexeme {
String(String), String(String),
@@ -55,7 +61,9 @@ impl std::fmt::Display for Lexeme {
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TKind { pub enum TKind {
Comment, Comment, // Line or block comment
Doc, // Doc comment
And, And,
As, As,
Break, Break,
@@ -67,6 +75,7 @@ pub enum TKind {
Fn, Fn,
For, For,
If, If,
Impl,
In, In,
Let, Let,
Loop, Loop,
@@ -76,6 +85,7 @@ pub enum TKind {
Or, Or,
Public, Public,
Return, Return,
Static,
Struct, Struct,
True, True,
While, While,