doughlang: Preserve errors through entire pipeline
lexer: - Un-stringify errors - Reserve more words - Doc the comments parser: - MASSIVE changes to peek, peek_if, next_if, consume_if=>expect. - Keep track of when EOF is allowable - TKind is stupidly cheap with >100 niches, so we can fit like 4 of them in a single ParseError lmao - TODO: make sure EOF/UnexpectedEOF propagation is correct. It seems... Kinda Not correct. - Add meta-expressions
This commit is contained in:
138
src/ast.rs
138
src/ast.rs
@@ -23,7 +23,7 @@ impl<T: Clone + std::fmt::Debug + std::fmt::Display + PartialEq + Eq> Annotation
|
||||
pub struct FqPath {
|
||||
// TODO: Identifier interning
|
||||
pub parts: Vec<String>,
|
||||
// TODO:
|
||||
// TODO: generic parameters
|
||||
}
|
||||
|
||||
impl From<&str> for FqPath {
|
||||
@@ -251,6 +251,7 @@ pub enum Op {
|
||||
ArRep, // [ Expr ; Expr ]
|
||||
Group, // ( Expr ,?)
|
||||
Tuple, // Expr (, Expr)*
|
||||
Meta, // #[ Expr ]
|
||||
|
||||
Try, // Expr '?'
|
||||
Index, // Expr [ Expr,* ]
|
||||
@@ -299,6 +300,16 @@ pub enum Op {
|
||||
LogOr, // Expr || Expr
|
||||
|
||||
Set, // Expr = Expr
|
||||
MulSet, // Expr *= Expr
|
||||
DivSet, // Expr /= Expr
|
||||
RemSet, // Expr %= Expr
|
||||
AddSet, // Expr += Expr
|
||||
SubSet, // Expr -= Expr
|
||||
ShlSet, // Expr <<= Expr
|
||||
ShrSet, // Expr >>= Expr
|
||||
AndSet, // Expr &= Expr
|
||||
XorSet, // Expr ^= Expr
|
||||
OrSet, // Expr |= Expr
|
||||
}
|
||||
|
||||
use crate::{fmt::FmtAdapter, span::Span};
|
||||
@@ -381,19 +392,19 @@ impl<A: Annotation> Display for Mod<A> {
|
||||
impl Display for Typedef {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self(kind, pat) = self;
|
||||
let kind = match kind {
|
||||
TypedefKind::Alias => "type",
|
||||
TypedefKind::Struct => "struct",
|
||||
TypedefKind::Enum => "enum",
|
||||
};
|
||||
f.write_str(match kind {
|
||||
TypedefKind::Alias => "type ",
|
||||
TypedefKind::Struct => "struct ",
|
||||
TypedefKind::Enum => "enum ",
|
||||
})?;
|
||||
match pat {
|
||||
Pat::Struct(name, bind) => match bind.as_ref() {
|
||||
Pat::Op(PatOp::Tuple, parts) => f
|
||||
.delimit_indented(fmt!("{kind} {name} {{"), "}")
|
||||
.delimit_indented(fmt!("{name} {{"), "}")
|
||||
.list_wrap("\n", parts, ",\n", ",\n"),
|
||||
other => write!(f, "{name} {{ {other} }}"),
|
||||
},
|
||||
_ => write!(f, "{kind} {pat}"),
|
||||
_ => pat.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -426,6 +437,10 @@ impl<A: Annotation> Display for Expr<A> {
|
||||
.list_wrap("\n", exprs, "\n", "\n"),
|
||||
Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "),
|
||||
Self::Op(Op::Group, exprs) => f.list(exprs, ", "),
|
||||
Self::Op(Op::Meta, exprs) => match exprs.as_slice() {
|
||||
[meta, expr @ ..] => f.delimit(fmt!("#[{meta}]\n"), "").list(expr, ","),
|
||||
[] => write!(f, "#[]"),
|
||||
},
|
||||
|
||||
Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() {
|
||||
[callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "),
|
||||
@@ -436,7 +451,7 @@ impl<A: Annotation> Display for Expr<A> {
|
||||
[] => write!(f, "{op}"),
|
||||
},
|
||||
|
||||
Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"),
|
||||
Self::Op(op @ Op::Do, exprs) => f.list(exprs, op),
|
||||
Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "),
|
||||
Self::Op(op @ Op::Try, exprs) => f.delimit("(", fmt!("){op}")).list(exprs, ", "),
|
||||
Self::Op(op, exprs) => match exprs.as_slice() {
|
||||
@@ -449,53 +464,64 @@ impl<A: Annotation> Display for Expr<A> {
|
||||
|
||||
impl Display for Op {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Op::Do => "; ".fmt(f),
|
||||
Op::As => " as ".fmt(f),
|
||||
Op::Macro => "macro ".fmt(f),
|
||||
Op::Block => "{}".fmt(f),
|
||||
Op::Array => "[]".fmt(f),
|
||||
Op::ArRep => "[;]".fmt(f),
|
||||
Op::Group => "()".fmt(f),
|
||||
Op::Tuple => "()".fmt(f),
|
||||
Op::Try => "?".fmt(f),
|
||||
Op::Index => "".fmt(f),
|
||||
Op::Call => "".fmt(f),
|
||||
Op::Pub => "pub ".fmt(f),
|
||||
Op::Loop => "loop ".fmt(f),
|
||||
Op::If => "if ".fmt(f),
|
||||
Op::While => "while ".fmt(f),
|
||||
Op::Break => "break ".fmt(f),
|
||||
Op::Return => "return ".fmt(f),
|
||||
Op::Dot => ".".fmt(f),
|
||||
Op::RangeEx => "..".fmt(f),
|
||||
Op::RangeIn => "..=".fmt(f),
|
||||
Op::Neg => "-".fmt(f),
|
||||
Op::Not => "!".fmt(f),
|
||||
Op::Identity => "!!".fmt(f),
|
||||
Op::Refer => "&".fmt(f),
|
||||
Op::Deref => "*".fmt(f),
|
||||
Op::Mul => " * ".fmt(f),
|
||||
Op::Div => " / ".fmt(f),
|
||||
Op::Rem => " % ".fmt(f),
|
||||
Op::Add => " + ".fmt(f),
|
||||
Op::Sub => " - ".fmt(f),
|
||||
Op::Shl => " << ".fmt(f),
|
||||
Op::Shr => " >> ".fmt(f),
|
||||
Op::And => " & ".fmt(f),
|
||||
Op::Xor => " ^ ".fmt(f),
|
||||
Op::Or => " | ".fmt(f),
|
||||
Op::Lt => " < ".fmt(f),
|
||||
Op::Leq => " <= ".fmt(f),
|
||||
Op::Eq => " == ".fmt(f),
|
||||
Op::Neq => " != ".fmt(f),
|
||||
Op::Geq => " >= ".fmt(f),
|
||||
Op::Gt => " > ".fmt(f),
|
||||
Op::LogAnd => " && ".fmt(f),
|
||||
Op::LogXor => " ^^ ".fmt(f),
|
||||
Op::LogOr => " || ".fmt(f),
|
||||
Op::Set => " = ".fmt(f),
|
||||
}
|
||||
f.write_str(match self {
|
||||
Op::Do => "; ",
|
||||
Op::As => " as ",
|
||||
Op::Macro => "macro ",
|
||||
Op::Block => "{}",
|
||||
Op::Array => "[]",
|
||||
Op::ArRep => "[;]",
|
||||
Op::Group => "()",
|
||||
Op::Tuple => "()",
|
||||
Op::Meta => "#[]",
|
||||
Op::Try => "?",
|
||||
Op::Index => "",
|
||||
Op::Call => "",
|
||||
Op::Pub => "pub ",
|
||||
Op::Loop => "loop ",
|
||||
Op::If => "if ",
|
||||
Op::While => "while ",
|
||||
Op::Break => "break ",
|
||||
Op::Return => "return ",
|
||||
Op::Dot => ".",
|
||||
Op::RangeEx => "..",
|
||||
Op::RangeIn => "..=",
|
||||
Op::Neg => "-",
|
||||
Op::Not => "!",
|
||||
Op::Identity => "!!",
|
||||
Op::Refer => "&",
|
||||
Op::Deref => "*",
|
||||
Op::Mul => " * ",
|
||||
Op::Div => " / ",
|
||||
Op::Rem => " % ",
|
||||
Op::Add => " + ",
|
||||
Op::Sub => " - ",
|
||||
Op::Shl => " << ",
|
||||
Op::Shr => " >> ",
|
||||
Op::And => " & ",
|
||||
Op::Xor => " ^ ",
|
||||
Op::Or => " | ",
|
||||
Op::Lt => " < ",
|
||||
Op::Leq => " <= ",
|
||||
Op::Eq => " == ",
|
||||
Op::Neq => " != ",
|
||||
Op::Geq => " >= ",
|
||||
Op::Gt => " > ",
|
||||
Op::LogAnd => " && ",
|
||||
Op::LogXor => " ^^ ",
|
||||
Op::LogOr => " || ",
|
||||
Op::Set => " = ",
|
||||
Op::MulSet => " *= ",
|
||||
Op::DivSet => " /= ",
|
||||
Op::RemSet => " %= ",
|
||||
Op::AddSet => " += ",
|
||||
Op::SubSet => " -= ",
|
||||
Op::ShlSet => " <<= ",
|
||||
Op::ShrSet => " >>= ",
|
||||
Op::AndSet => " &= ",
|
||||
Op::XorSet => " ^= ",
|
||||
Op::OrSet => " |= ",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
22
src/fmt.rs
22
src/fmt.rs
@@ -79,6 +79,11 @@ impl<'f, F: Write + ?Sized> Indent<'f, F> {
|
||||
pub fn new(f: &'f mut F, indent: &'static str) -> Self {
|
||||
Indent { f, needs_indent: false, indent }
|
||||
}
|
||||
|
||||
/// Gets mutable access to the inner [Write]-adapter
|
||||
pub fn inner(&mut self) -> &mut F {
|
||||
self.f
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Write + ?Sized> Write for Indent<'_, F> {
|
||||
@@ -103,10 +108,18 @@ impl<F: Write + ?Sized> Write for Indent<'_, F> {
|
||||
|
||||
/// Prints delimiters around anything formatted with this. Implies [Indent]
|
||||
pub struct Delimit<'f, F: Write + ?Sized, E: Display = &'static str> {
|
||||
f: &'f mut F,
|
||||
/// The formatter
|
||||
pub f: &'f mut F,
|
||||
close: E,
|
||||
}
|
||||
|
||||
impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> {
|
||||
/// Gets mutable access to the inner [Write]-adapter
|
||||
pub fn inner(&mut self) -> &mut F {
|
||||
self.f
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> {
|
||||
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
|
||||
let _ = write!(f, "{open}");
|
||||
@@ -133,6 +146,13 @@ pub struct DelimitIndent<'f, F: Write + ?Sized, E: Display = &'static str> {
|
||||
close: E,
|
||||
}
|
||||
|
||||
impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> {
|
||||
/// Gets mutable access to the inner [Write]-adapter
|
||||
pub fn inner(&mut self) -> &mut F {
|
||||
self.f.inner()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> {
|
||||
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
|
||||
let mut f = f.indent();
|
||||
|
||||
95
src/lexer.rs
95
src/lexer.rs
@@ -8,9 +8,10 @@ use crate::{span::Span, token::*};
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct LexError {
|
||||
pub pos: u32,
|
||||
pub res: &'static str,
|
||||
pub pos: Span,
|
||||
pub res: LexFailure,
|
||||
}
|
||||
|
||||
impl std::error::Error for LexError {}
|
||||
impl std::fmt::Display for LexError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
@@ -19,6 +20,44 @@ impl std::fmt::Display for LexError {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum LexFailure {
|
||||
/// Reached end of file
|
||||
EOF,
|
||||
UnexpectedEOF,
|
||||
Unexpected(char),
|
||||
UnterminatedBlockComment,
|
||||
UnterminatedCharacter,
|
||||
UnterminatedString,
|
||||
UnterminatedUnicodeEscape,
|
||||
InvalidUnicodeEscape(u32),
|
||||
InvalidDigitForBase(char, u32),
|
||||
IntegerOverflow,
|
||||
}
|
||||
use LexFailure::*;
|
||||
pub use LexFailure::{EOF, UnexpectedEOF};
|
||||
|
||||
impl std::fmt::Display for LexFailure {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::EOF => "EOF".fmt(f),
|
||||
Self::UnexpectedEOF => "Unexpected EOF".fmt(f),
|
||||
Self::Unexpected(c) => write!(f, "Character '{c:?}'"),
|
||||
Self::UnterminatedBlockComment => "Unterminated Block Comment".fmt(f),
|
||||
Self::UnterminatedCharacter => "Unterminated Character".fmt(f),
|
||||
Self::UnterminatedString => "Unterminated String".fmt(f),
|
||||
Self::UnterminatedUnicodeEscape => "Unterminated Unicode Escape".fmt(f),
|
||||
Self::InvalidUnicodeEscape(hex) => {
|
||||
write!(f, "'\\u{{{hex:x}}}' is not a valid UTF-8 codepoint")
|
||||
}
|
||||
Self::InvalidDigitForBase(digit, base) => {
|
||||
write!(f, "Invalid digit {digit} for base {base}")
|
||||
}
|
||||
Self::IntegerOverflow => "Integer literal does not fit in 128 bits".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Lexer<'t> {
|
||||
/// The source text
|
||||
@@ -72,8 +111,8 @@ impl<'t> Lexer<'t> {
|
||||
}
|
||||
|
||||
/// Produces a LexError at the start of the current token
|
||||
fn error(&self, res: &'static str) -> LexError {
|
||||
LexError { pos: self.head, res }
|
||||
fn error(&self, res: LexFailure) -> LexError {
|
||||
LexError { pos: Span(self.head, self.tail), res }
|
||||
}
|
||||
|
||||
/// Gets the Lexer's current &[str] lexeme and [Span]
|
||||
@@ -118,7 +157,7 @@ impl<'t> Lexer<'t> {
|
||||
.skip_whitespace()
|
||||
.start_token()
|
||||
.peek()
|
||||
.ok_or_else(|| self.error("EOF"))?
|
||||
.ok_or_else(|| self.error(EOF))?
|
||||
{
|
||||
'!' => Bang,
|
||||
'"' => return self.string(),
|
||||
@@ -154,7 +193,7 @@ impl<'t> Lexer<'t> {
|
||||
'~' => Tilde,
|
||||
'_' => return self.identifier(),
|
||||
c if is_xid_start(c) => return self.identifier(),
|
||||
_ => Err(self.error("Invalid"))?,
|
||||
c => Err(self.error(Unexpected(c)))?,
|
||||
};
|
||||
|
||||
// Handle digraphs
|
||||
@@ -217,8 +256,12 @@ impl<'t> Lexer<'t> {
|
||||
|
||||
/// Consumes characters until the lexer reaches a newline `'\n'`
|
||||
pub fn line_comment(&mut self) -> Result<Token, LexError> {
|
||||
let kind = match self.consume().peek() {
|
||||
Some('!' | '/') => TKind::Doc,
|
||||
_ => TKind::Comment,
|
||||
};
|
||||
while self.consume().peek().is_some_and(|c| c != '\n') {}
|
||||
Ok(self.produce(TKind::Comment))
|
||||
Ok(self.produce(kind))
|
||||
}
|
||||
|
||||
/// Consumes characters until the lexer reaches the end of a *nested* block comment.
|
||||
@@ -232,7 +275,7 @@ impl<'t> Lexer<'t> {
|
||||
_ => continue,
|
||||
};
|
||||
}
|
||||
Err(self.error("Unterminated block comment"))
|
||||
Err(self.error(UnterminatedBlockComment))
|
||||
}
|
||||
|
||||
/// Consumes characters until it reaches a character not in [is_xid_continue].
|
||||
@@ -257,6 +300,7 @@ impl<'t> Lexer<'t> {
|
||||
"fn" => TKind::Fn,
|
||||
"for" => TKind::For,
|
||||
"if" => TKind::If,
|
||||
"impl" => TKind::Impl,
|
||||
"in" => TKind::In,
|
||||
"let" => TKind::Let,
|
||||
"loop" => TKind::Loop,
|
||||
@@ -266,6 +310,7 @@ impl<'t> Lexer<'t> {
|
||||
"or" => TKind::Or,
|
||||
"pub" => TKind::Public,
|
||||
"return" => TKind::Return,
|
||||
"static" => TKind::Const, // TODO: Static
|
||||
"struct" => TKind::Struct,
|
||||
"then" => TKind::Do,
|
||||
"true" => TKind::True,
|
||||
@@ -286,7 +331,7 @@ impl<'t> Lexer<'t> {
|
||||
if self.take().is_some_and(|c| c == '\'') {
|
||||
Ok(self.produce_with_lexeme(TKind::Character, Lexeme::Char(c)))
|
||||
} else {
|
||||
Err(self.error("Unterminated character"))
|
||||
Err(self.error(UnterminatedCharacter))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -296,7 +341,7 @@ impl<'t> Lexer<'t> {
|
||||
self.consume();
|
||||
loop {
|
||||
lexeme.push(match self.take() {
|
||||
None => Err(self.error("Unterminated string"))?,
|
||||
None => Err(self.error(UnterminatedString))?,
|
||||
Some('\\') => self.escape()?,
|
||||
Some('"') => break,
|
||||
Some(c) => c,
|
||||
@@ -308,7 +353,8 @@ impl<'t> Lexer<'t> {
|
||||
|
||||
/// Parses a single escape sequence into its resulting char value.
|
||||
pub fn escape(&mut self) -> Result<char, LexError> {
|
||||
Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
|
||||
Ok(
|
||||
match self.take().ok_or_else(|| self.error(UnexpectedEOF))? {
|
||||
' ' => '\u{a0}', // Non-breaking space
|
||||
'0' => '\0', // C0 Null Character
|
||||
'a' => '\x07', // C0 Acknowledge
|
||||
@@ -321,27 +367,30 @@ impl<'t> Lexer<'t> {
|
||||
'u' => self.unicode_escape()?,
|
||||
'x' => self.hex_escape()?,
|
||||
c => c,
|
||||
})
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Parses two hex-digits and constructs a [char] out of them.
|
||||
pub fn hex_escape(&mut self) -> Result<char, LexError> {
|
||||
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
|
||||
char::from_u32(out).ok_or(self.error("Invalid digit"))
|
||||
char::from_u32(out).ok_or(self.error(InvalidUnicodeEscape(out)))
|
||||
}
|
||||
|
||||
/// Parses a sequence of `{}`-bracketed hex-digits and constructs a [char] out of them.
|
||||
pub fn unicode_escape(&mut self) -> Result<char, LexError> {
|
||||
self.next_if('{')
|
||||
.ok_or_else(|| self.error("No unicode escape opener"))?;
|
||||
.ok_or_else(|| self.error(UnterminatedUnicodeEscape))?;
|
||||
let mut out = 0;
|
||||
while let Some(c) = self.take() {
|
||||
if c == '}' {
|
||||
return char::from_u32(out).ok_or_else(|| self.error("Bad unicode value"));
|
||||
return char::from_u32(out).ok_or_else(|| self.error(InvalidUnicodeEscape(out)));
|
||||
}
|
||||
out = out * 16 + c.to_digit(16).ok_or_else(|| self.error("Invalid digit"))?;
|
||||
out = out * 16
|
||||
+ c.to_digit(16)
|
||||
.ok_or_else(|| self.error(InvalidDigitForBase(c, 16)))?;
|
||||
}
|
||||
Err(self.error("Unterminated unicode escape"))
|
||||
Err(self.error(UnterminatedUnicodeEscape))
|
||||
}
|
||||
|
||||
/// Parses a sequence of digits (and underscores) in base `BASE`, where 2 <= `BASE` <= 36.
|
||||
@@ -353,7 +402,10 @@ impl<'t> Lexer<'t> {
|
||||
while let Some(c) = self.peek() {
|
||||
int = match c.to_digit(BASE).ok_or(c) {
|
||||
Err('_') => int,
|
||||
Ok(c) => int.wrapping_mul(BASE as _).wrapping_add(c as _),
|
||||
Ok(c) => int
|
||||
.checked_mul(BASE as _)
|
||||
.and_then(|int| int.checked_add(c as _))
|
||||
.ok_or_else(|| self.error(IntegerOverflow))?,
|
||||
_ => break,
|
||||
};
|
||||
self.consume();
|
||||
@@ -362,12 +414,13 @@ impl<'t> Lexer<'t> {
|
||||
Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
|
||||
}
|
||||
|
||||
/// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36
|
||||
/// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36.
|
||||
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
|
||||
if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
|
||||
let digit = self.take().ok_or_else(|| self.error(UnexpectedEOF))?;
|
||||
if let Some(digit) = digit.to_digit(BASE) {
|
||||
Ok(digit)
|
||||
} else {
|
||||
Err(self.error("Invalid digit"))
|
||||
Err(self.error(InvalidDigitForBase(digit, BASE)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
27
src/main.rs
27
src/main.rs
@@ -9,7 +9,7 @@ use doughlang::{
|
||||
Expr,
|
||||
macro_matcher::{Match, Subst},
|
||||
},
|
||||
lexer::{LexError, Lexer},
|
||||
lexer::{EOF, LexError, Lexer},
|
||||
parser::{ParseError, Parser},
|
||||
span::Span,
|
||||
token::{TKind, Token},
|
||||
@@ -73,7 +73,7 @@ fn lex() -> Result<(), Box<dyn Error>> {
|
||||
}
|
||||
loop {
|
||||
match lexer.scan() {
|
||||
Err(LexError { res: "EOF", .. }) => {
|
||||
Err(LexError { res: EOF, .. }) => {
|
||||
break Ok(Response::Accept);
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -97,7 +97,7 @@ fn exprs() -> Result<(), Box<dyn Error>> {
|
||||
}
|
||||
for idx in 0.. {
|
||||
match parser.parse::<Anno<Expr>>(0) {
|
||||
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => {
|
||||
Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
|
||||
return Ok(Response::Accept);
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -120,7 +120,7 @@ fn pats() -> Result<(), Box<dyn Error>> {
|
||||
}
|
||||
loop {
|
||||
match parser.parse::<Pat>(PPrec::Min) {
|
||||
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => {
|
||||
Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
|
||||
break Ok(Response::Accept);
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -142,7 +142,7 @@ fn tys() -> Result<(), Box<dyn Error>> {
|
||||
}
|
||||
loop {
|
||||
match parser.parse::<Ty>(()) {
|
||||
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => {
|
||||
Err(ParseError::FromLexer(LexError { res: EOF, .. })) => {
|
||||
break Ok(Response::Accept);
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -205,16 +205,25 @@ fn subst() -> Result<(), Box<dyn Error>> {
|
||||
|
||||
fn parse(document: &str) {
|
||||
let mut parser = Parser::new(Lexer::new(document));
|
||||
let isatty = std::io::stdin().is_terminal();
|
||||
for idx in 0.. {
|
||||
match parser.parse::<Expr>(0) {
|
||||
Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break,
|
||||
Err(e) => {
|
||||
println!("\x1b[31m{e}\x1b[0m");
|
||||
Err(e @ ParseError::EOF(s)) if s.tail == document.len() as _ => {
|
||||
println!("\x1b[92m{e} (total {} bytes)\x1b[0m", document.len());
|
||||
break;
|
||||
}
|
||||
Ok(v) => {
|
||||
Err(e @ ParseError::EOF(_)) => {
|
||||
println!("\x1b[93m{e} (total {} bytes)\x1b[0m", document.len());
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
println!("\x1b[91m{e}\x1b[0m");
|
||||
break;
|
||||
}
|
||||
Ok(v) if isatty => {
|
||||
println!("\x1b[{}m{v}", (idx + 5) % 6 + 31);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
374
src/parser.rs
374
src/parser.rs
@@ -1,7 +1,7 @@
|
||||
//! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes.
|
||||
use crate::{
|
||||
ast::*,
|
||||
lexer::{LexError, Lexer},
|
||||
lexer::{LexError, LexFailure, Lexer},
|
||||
span::Span,
|
||||
token::{Lexeme, TKind, Token},
|
||||
};
|
||||
@@ -9,8 +9,12 @@ use std::{error::Error, fmt::Display, vec};
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ParseError {
|
||||
/// Reached the expected end of input.
|
||||
EOF(Span),
|
||||
/// Unexpectedly reached end of input.
|
||||
UnexpectedEOF(Span),
|
||||
FromLexer(LexError),
|
||||
Expected(TKind, Span),
|
||||
Expected(TKind, TKind, Span),
|
||||
NotLiteral(TKind, Span),
|
||||
NotPattern(TKind, Span),
|
||||
NotType(TKind, Span),
|
||||
@@ -19,12 +23,16 @@ pub enum ParseError {
|
||||
NotPostfix(TKind, Span),
|
||||
}
|
||||
|
||||
pub use ParseError::EOF;
|
||||
|
||||
impl Error for ParseError {}
|
||||
impl Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::EOF(loc) => write!(f, "{loc}: Reached end of input."),
|
||||
Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."),
|
||||
Self::FromLexer(e) => e.fmt(f),
|
||||
Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."),
|
||||
Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."),
|
||||
Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."),
|
||||
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
|
||||
Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."),
|
||||
@@ -37,10 +45,36 @@ impl Display for ParseError {
|
||||
|
||||
pub type PResult<T> = Result<T, ParseError>;
|
||||
|
||||
trait PResultExt<T> {
|
||||
fn no_eof(self) -> PResult<T>;
|
||||
fn allow_eof(self) -> PResult<Option<T>>;
|
||||
}
|
||||
|
||||
impl<T> PResultExt<T> for PResult<T> {
|
||||
fn no_eof(self) -> Self {
|
||||
match self {
|
||||
Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)),
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
fn allow_eof(self) -> PResult<Option<T>> {
|
||||
match self {
|
||||
Ok(t) => Ok(Some(t)),
|
||||
Err(ParseError::EOF(_)) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a scope where [ParseError::EOF] is unexpected (See [PResultExt::no_eof])
|
||||
fn no_eof<T>(f: impl FnOnce() -> PResult<T>) -> PResult<T> {
|
||||
f().no_eof()
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Parser<'t> {
|
||||
pub lexer: Lexer<'t>,
|
||||
pub next_tok: Option<Token>,
|
||||
pub next_tok: Option<PResult<Token>>,
|
||||
pub last_loc: Span,
|
||||
pub elide_do: bool,
|
||||
}
|
||||
@@ -72,46 +106,59 @@ impl<'t> Parser<'t> {
|
||||
None => loop {
|
||||
match self.lexer.scan() {
|
||||
Ok(Token { kind: TKind::Comment, .. }) => {}
|
||||
Ok(tok) => break tok,
|
||||
Err(e) => Err(ParseError::FromLexer(e))?,
|
||||
Ok(tok) => break Ok(tok),
|
||||
Err(LexError { pos, res: LexFailure::EOF }) => Err(ParseError::EOF(pos))?,
|
||||
Err(e) => break Err(ParseError::FromLexer(e)),
|
||||
}
|
||||
},
|
||||
};
|
||||
self.last_loc = next_tok.span;
|
||||
self.next_tok = Some(next_tok);
|
||||
Ok(self.next_tok.as_ref().expect("should have token"))
|
||||
|
||||
let next_tok = self.next_tok.as_ref().expect("should have Some lex result");
|
||||
|
||||
if let Ok(tok) = next_tok {
|
||||
self.last_loc = tok.span;
|
||||
}
|
||||
|
||||
next_tok.as_ref().map_err(|e| *e)
|
||||
}
|
||||
|
||||
/// Peeks the next token if it matches the `expected` [TKind]
|
||||
pub fn peek_if(&mut self, expected: TKind) -> Option<&Token> {
|
||||
self.peek().into_iter().find(|tok| tok.kind == expected)
|
||||
pub fn peek_if(&mut self, expected: TKind) -> PResult<Option<&Token>> {
|
||||
match self.peek() {
|
||||
Ok(tok) if tok.kind == expected => Ok(Some(tok)),
|
||||
Ok(_) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
/// Consumes and returns the currently-peeked [Token].
|
||||
pub fn take(&mut self) -> Option<Token> {
|
||||
let tok = self.next_tok.take();
|
||||
self.elide_do = matches!(tok, Some(Token { kind: TKind::RCurly, .. }));
|
||||
pub fn take(&mut self) -> PResult<Token> {
|
||||
let tok = self
|
||||
.next_tok
|
||||
.take()
|
||||
.unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc)));
|
||||
self.elide_do = matches!(tok, Ok(Token { kind: TKind::RCurly | TKind::Semi, .. }));
|
||||
tok
|
||||
}
|
||||
|
||||
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
|
||||
pub fn take_lexeme(&mut self) -> Option<Lexeme> {
|
||||
pub fn take_lexeme(&mut self) -> PResult<Lexeme> {
|
||||
self.take().map(|tok| tok.lexeme)
|
||||
}
|
||||
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> PResult<Token> {
|
||||
self.peek()?;
|
||||
self.peek().no_eof()?;
|
||||
Ok(self.take().expect("should have token here"))
|
||||
}
|
||||
|
||||
/// Consumes and returns the next [Token] if it matches the `expected` [TKind]
|
||||
pub fn next_if(&mut self, expected: TKind) -> PResult<Token> {
|
||||
let token = self.peek()?;
|
||||
if token.kind == expected {
|
||||
Ok(self.take().expect("should have token here"))
|
||||
} else {
|
||||
Err(ParseError::Expected(expected, token.span))
|
||||
pub fn next_if(&mut self, expected: TKind) -> PResult<Result<Token, TKind>> {
|
||||
match self.peek() {
|
||||
Ok(t) if t.kind == expected => self.take().map(Ok),
|
||||
Ok(t) => Ok(Err(t.kind)),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,13 +174,15 @@ impl<'t> Parser<'t> {
|
||||
end: TKind,
|
||||
) -> PResult<Vec<P>> {
|
||||
// TODO: This loses lexer errors
|
||||
while self.peek_if(end).is_none() {
|
||||
elems.push(self.parse(level.clone())?);
|
||||
if self.next_if(sep).is_err() {
|
||||
break;
|
||||
while self.peek_if(end).no_eof()?.is_none() {
|
||||
elems.push(self.parse(level.clone()).no_eof()?);
|
||||
match self.peek_if(sep)? {
|
||||
Some(_) => self.consume(),
|
||||
None => break,
|
||||
};
|
||||
}
|
||||
}
|
||||
self.next_if(end)?;
|
||||
self.next_if(end)?
|
||||
.map_err(|tk| ParseError::Expected(end, tk, self.span()))?;
|
||||
Ok(elems)
|
||||
}
|
||||
|
||||
@@ -148,33 +197,37 @@ impl<'t> Parser<'t> {
|
||||
sep: TKind,
|
||||
) -> PResult<Vec<P>> {
|
||||
loop {
|
||||
elems.push(self.parse(level.clone())?);
|
||||
if self.next_if(sep).is_err() {
|
||||
break Ok(elems);
|
||||
}
|
||||
let elem = self.parse(level.clone()).no_eof()?;
|
||||
elems.push(elem);
|
||||
match self.peek_if(sep) {
|
||||
Ok(Some(_)) => self.consume(),
|
||||
Ok(None) | Err(ParseError::EOF(_)) => break Ok(elems),
|
||||
Err(e) => Err(e)?,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses into an [`Option<P>`] if the next token is `next`
|
||||
pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> {
|
||||
Ok(match self.next_if(next) {
|
||||
Ok(_) => Some(self.parse(level)?),
|
||||
Ok(match self.next_if(next)? {
|
||||
Ok(_) => Some(self.parse(level).no_eof()?),
|
||||
Err(_) => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses a P unless the next token is `end`
|
||||
pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
|
||||
let out = match self.peek_if(end) {
|
||||
None => Some(self.parse(level)?),
|
||||
let out = match self.peek_if(end)? {
|
||||
None => Some(self.parse(level).no_eof()?),
|
||||
Some(_) => None,
|
||||
};
|
||||
self.next_if(end)?;
|
||||
self.expect(end)?;
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
pub fn consume_if(&mut self, next: TKind) -> PResult<&mut Self> {
|
||||
self.next_if(next)?;
|
||||
pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> {
|
||||
self.next_if(next)?
|
||||
.map_err(|tk| ParseError::Expected(next, tk, self.span()))?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
@@ -197,17 +250,12 @@ impl<'t> Parse<'t> for FqPath {
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
let mut parts = vec![];
|
||||
if p.next_if(TKind::ColonColon).is_ok() {
|
||||
if p.next_if(TKind::ColonColon)?.is_ok() {
|
||||
parts.push("".into()); // the "root"
|
||||
}
|
||||
loop {
|
||||
parts.push(
|
||||
p.next_if(TKind::Identifier)?
|
||||
.lexeme
|
||||
.string()
|
||||
.expect("Identifier should have String"),
|
||||
);
|
||||
if p.next_if(TKind::ColonColon).is_err() {
|
||||
while let Ok(id) = p.next_if(TKind::Identifier)? {
|
||||
parts.push(id.lexeme.string().expect("Identifier should have String"));
|
||||
if let None | Some(Err(_)) = p.next_if(TKind::ColonColon).allow_eof()? {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -223,26 +271,19 @@ impl<'t> Parse<'t> for Literal {
|
||||
Ok(match tok.kind {
|
||||
TKind::True => p.consume().then(Literal::Bool(true)),
|
||||
TKind::False => p.consume().then(Literal::Bool(false)),
|
||||
TKind::Character => Literal::Char(
|
||||
p.take_lexeme()
|
||||
.expect("should have Token")
|
||||
.char()
|
||||
.expect("should have one char in char literal"),
|
||||
),
|
||||
TKind::Integer => {
|
||||
let Token { lexeme, span, .. } = p.take().expect("should have Token");
|
||||
let Lexeme::Integer(int, _) = lexeme else {
|
||||
Err(ParseError::Expected(TKind::Integer, span))?
|
||||
};
|
||||
Literal::Int(int)
|
||||
}
|
||||
TKind::String => Literal::Str({
|
||||
let Token { lexeme, span, .. } = p.take().expect("should have Token");
|
||||
lexeme
|
||||
.string()
|
||||
.ok_or(ParseError::Expected(TKind::String, span))?
|
||||
TKind::Character => Literal::Char({
|
||||
let Token { lexeme, .. } = p.take().expect("should have Token");
|
||||
lexeme.char().expect("char token should have char")
|
||||
}),
|
||||
_ => Err(ParseError::Expected(TKind::Integer, tok.span))?,
|
||||
TKind::Integer => Literal::Int({
|
||||
let Token { lexeme, .. } = p.take().expect("should have Token");
|
||||
lexeme.int().expect("integer token should have int")
|
||||
}),
|
||||
TKind::String => Literal::Str({
|
||||
let Token { lexeme, .. } = p.take().expect("should have Token");
|
||||
lexeme.string().expect("string token should have string")
|
||||
}),
|
||||
other => Err(ParseError::NotLiteral(other, tok.span))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -288,6 +329,7 @@ fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> {
|
||||
|
||||
impl<'t> Parse<'t> for Pat {
|
||||
type Prec = PPrec;
|
||||
|
||||
fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> {
|
||||
let tok = p.peek()?;
|
||||
|
||||
@@ -315,12 +357,10 @@ impl<'t> Parse<'t> for Pat {
|
||||
.opt(PPrec::Alt, TKind::RCurly)?
|
||||
.unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))),
|
||||
),
|
||||
Ok(_) | Err(ParseError::FromLexer(LexError { pos: _, res: "EOF" })) => {
|
||||
match path.parts.len() {
|
||||
Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() {
|
||||
1 => Self::Name(path.parts.pop().expect("name has 1 part")),
|
||||
_ => Self::Path(path),
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(e) => Err(e)?,
|
||||
}
|
||||
}
|
||||
@@ -329,21 +369,21 @@ impl<'t> Parse<'t> for Pat {
|
||||
TKind::DotDot => Pat::Op(
|
||||
PatOp::Rest,
|
||||
// Identifier in Rest position always becomes binder
|
||||
match p.consume().peek()?.kind {
|
||||
TKind::Identifier => vec![Pat::Name(
|
||||
match p.consume().peek().allow_eof()?.map(Token::kind) {
|
||||
Some(TKind::Identifier) => vec![Pat::Name(
|
||||
p.take_lexeme()
|
||||
.expect("should have lexeme")
|
||||
.string()
|
||||
.expect("should be string"),
|
||||
)],
|
||||
TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?],
|
||||
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
|
||||
_ => vec![],
|
||||
},
|
||||
),
|
||||
TKind::DotDotEq => Pat::Op(
|
||||
PatOp::RangeIn,
|
||||
match p.consume().peek()?.kind {
|
||||
TKind::Grave | TKind::Integer | TKind::Character => vec![p.parse(level)?],
|
||||
match p.consume().peek().allow_eof()?.map(Token::kind) {
|
||||
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
|
||||
_ => vec![],
|
||||
},
|
||||
),
|
||||
@@ -360,14 +400,14 @@ impl<'t> Parse<'t> for Pat {
|
||||
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
|
||||
};
|
||||
|
||||
while let Ok(tok) = p.peek()
|
||||
while let Ok(Some(tok)) = p.peek().allow_eof()
|
||||
&& let Some((op, prec)) = pat_from_infix(tok)
|
||||
&& level <= prec
|
||||
{
|
||||
let kind = tok.kind;
|
||||
head = match op {
|
||||
PatPs::Typed => Pat::Typed(head.into(), p.consume().parse(())?),
|
||||
PatPs::Op(op @ PatOp::RangeEx) => Pat::Op(
|
||||
PatPs::Op(op @ (PatOp::RangeEx | PatOp::RangeIn)) => Pat::Op(
|
||||
op,
|
||||
match p.consume().peek().map(|t| t.kind) {
|
||||
Ok(TKind::Integer | TKind::Character | TKind::Identifier) => {
|
||||
@@ -379,7 +419,6 @@ impl<'t> Parse<'t> for Pat {
|
||||
PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(head)
|
||||
}
|
||||
}
|
||||
@@ -389,10 +428,11 @@ impl<'t> Parse<'t> for Ty {
|
||||
|
||||
fn parse(p: &mut Parser<'t>, level: Self::Prec) -> PResult<Self>
|
||||
where Self: Sized {
|
||||
let tok = p.peek()?;
|
||||
let &Token { kind, span, .. } = p.peek()?;
|
||||
|
||||
let head = match tok.kind {
|
||||
TKind::Identifier => match tok.lexeme.str() {
|
||||
// TODO: this is a kinda jank way of error reporting
|
||||
let head = match kind {
|
||||
TKind::Identifier => match p.peek()?.lexeme.str() {
|
||||
Some("_") => p.consume().then(Ty::Infer),
|
||||
_ => Ty::Named(p.parse(())?),
|
||||
},
|
||||
@@ -403,7 +443,7 @@ impl<'t> Parse<'t> for Ty {
|
||||
match p.next()? {
|
||||
Token { kind: TKind::Semi, .. } => {
|
||||
let ty = Ty::Array(ty, p.parse(Prec::Binary.next())?);
|
||||
p.next_if(TKind::RBrack)?;
|
||||
p.expect(TKind::RBrack)?;
|
||||
ty
|
||||
}
|
||||
Token { kind: TKind::RBrack, .. } => Ty::Slice(ty),
|
||||
@@ -411,30 +451,27 @@ impl<'t> Parse<'t> for Ty {
|
||||
}
|
||||
}
|
||||
TKind::Fn => {
|
||||
p.consume().consume_if(TKind::LParen)?;
|
||||
|
||||
let mut tys = p.list(vec![], (), TKind::Comma, TKind::RParen)?;
|
||||
tys.push(match p.next_if(TKind::Arrow) {
|
||||
Ok(_) => p.parse(())?,
|
||||
_ => Ty::Tuple(vec![]),
|
||||
});
|
||||
Ty::Fn(tys)
|
||||
p.consume();
|
||||
match p.parse(())? {
|
||||
Ty::Fn(args) => Ty::Fn(args),
|
||||
other @ Ty::Tuple(_) => Ty::Fn(vec![other, Ty::Tuple(vec![])]),
|
||||
other => Ty::Fn(vec![other, Ty::Tuple(vec![])]),
|
||||
}
|
||||
}
|
||||
TKind::LParen => {
|
||||
let mut tys = p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?;
|
||||
match p.next_if(TKind::Arrow) {
|
||||
Ok(_) => {
|
||||
tys.push(p.parse(())?);
|
||||
Ty::Fn(tys)
|
||||
Ty::Tuple(p.consume().list(vec![], (), TKind::Comma, TKind::RParen)?)
|
||||
}
|
||||
_ => Ty::Tuple(tys),
|
||||
}
|
||||
}
|
||||
_ => Err(ParseError::NotType(tok.kind, tok.span))?,
|
||||
_ => Err(ParseError::NotType(kind, span))?,
|
||||
};
|
||||
|
||||
Ok(match p.next_if(TKind::Arrow) {
|
||||
Ok(_) => Ty::Fn(vec![head, p.parse(())?]),
|
||||
Ok(match p.next_if(TKind::Arrow).allow_eof()? {
|
||||
Some(Ok(_)) => Ty::Fn(vec![
|
||||
match head {
|
||||
args @ Ty::Tuple(_) => args,
|
||||
arg => Ty::Tuple(vec![arg]),
|
||||
},
|
||||
p.parse(())?,
|
||||
]),
|
||||
_ => head,
|
||||
})
|
||||
}
|
||||
@@ -483,15 +520,18 @@ pub enum Prec {
|
||||
|
||||
impl Prec {
|
||||
pub const MIN: usize = Prec::Min.value();
|
||||
|
||||
pub const fn value(self) -> usize {
|
||||
self as usize * 2
|
||||
}
|
||||
|
||||
pub const fn prev(self) -> usize {
|
||||
match self {
|
||||
Self::Assign => self.value() + 1,
|
||||
_ => self.value(),
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn next(self) -> usize {
|
||||
match self {
|
||||
Self::Assign => self.value(),
|
||||
@@ -526,7 +566,7 @@ pub enum Ps {
|
||||
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
|
||||
Ok(match token.kind {
|
||||
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
|
||||
TKind::Semi => (Ps::ExplicitDo, Prec::Do),
|
||||
TKind::Semi => (Ps::End, Prec::Body),
|
||||
|
||||
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
|
||||
TKind::Grave => (Ps::Mid, Prec::Max),
|
||||
@@ -566,6 +606,7 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
|
||||
TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary),
|
||||
TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary),
|
||||
TKind::Star => (Ps::Op(Op::Deref), Prec::Unary),
|
||||
TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary),
|
||||
|
||||
kind => Err(ParseError::NotPrefix(kind, token.span))?,
|
||||
})
|
||||
@@ -574,18 +615,24 @@ fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
|
||||
fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
|
||||
Ok(match token.kind {
|
||||
TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration
|
||||
TKind::As => (Ps::Op(Op::As), Prec::Max),
|
||||
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
|
||||
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
|
||||
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
|
||||
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
|
||||
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
|
||||
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
|
||||
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
|
||||
TKind::LCurly => (Ps::Make, Prec::Make),
|
||||
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
|
||||
TKind::In => (Ps::Op(Op::Do), Prec::Do),
|
||||
|
||||
TKind::Eq => (Ps::Op(Op::Set), Prec::Assign),
|
||||
TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign),
|
||||
TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign),
|
||||
TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign),
|
||||
TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign),
|
||||
TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign),
|
||||
TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign),
|
||||
TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign),
|
||||
TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign),
|
||||
TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign),
|
||||
TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign),
|
||||
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
|
||||
TKind::LCurly => (Ps::Make, Prec::Make),
|
||||
TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical),
|
||||
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
|
||||
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
|
||||
TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare),
|
||||
TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare),
|
||||
TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare),
|
||||
@@ -605,6 +652,13 @@ fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
|
||||
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
|
||||
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
|
||||
|
||||
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
|
||||
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
|
||||
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
|
||||
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
|
||||
|
||||
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
|
||||
TKind::As => (Ps::Op(Op::As), Prec::Max),
|
||||
_ => (Ps::ImplicitDo, Prec::Do),
|
||||
})
|
||||
}
|
||||
@@ -615,7 +669,7 @@ impl<'t> Parse<'t> for Const {
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
Ok(Self(
|
||||
p.consume().parse(PPrec::Tuple)?,
|
||||
p.consume_if(TKind::Eq)?.parse(Prec::Tuple.value())?,
|
||||
p.expect(TKind::Eq)?.parse(Prec::Tuple.value())?,
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -637,7 +691,7 @@ impl<'t> Parse<'t> for Fn {
|
||||
type Prec = ();
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
match p.consume().next_if(TKind::Identifier) {
|
||||
match p.consume().next_if(TKind::Identifier)? {
|
||||
Ok(Token { lexeme, .. }) => Ok(Self(
|
||||
lexeme.string(),
|
||||
p.parse(PPrec::Tuple)?,
|
||||
@@ -648,7 +702,7 @@ impl<'t> Parse<'t> for Fn {
|
||||
None,
|
||||
Pat::Op(
|
||||
PatOp::Tuple,
|
||||
p.consume_if(TKind::LParen)?.list(
|
||||
p.expect(TKind::LParen)?.list(
|
||||
vec![],
|
||||
PPrec::Tuple,
|
||||
TKind::Comma,
|
||||
@@ -667,12 +721,15 @@ impl<'t> Parse<'t> for Let {
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
let pat = p.consume().parse(PPrec::Tuple)?;
|
||||
if p.next_if(TKind::Eq).is_err() {
|
||||
if p.next_if(TKind::Eq).allow_eof()?.is_none_or(|v| v.is_err()) {
|
||||
return Ok(Self(pat, vec![]));
|
||||
}
|
||||
|
||||
let body = p.parse(Prec::Tuple.value())?;
|
||||
if p.next_if(TKind::Else).is_err() {
|
||||
if p.next_if(TKind::Else)
|
||||
.allow_eof()?
|
||||
.is_none_or(|v| v.is_err())
|
||||
{
|
||||
return Ok(Self(pat, vec![body]));
|
||||
}
|
||||
|
||||
@@ -685,42 +742,41 @@ impl<'t> Parse<'t> for Match {
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
Ok(Self(p.consume().parse(Prec::Logical.value())?, {
|
||||
p.next_if(TKind::LCurly)?;
|
||||
p.expect(TKind::LCurly)?;
|
||||
p.list(vec![], Prec::Body.next(), TKind::Comma, TKind::RCurly)?
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for MatchArm {
|
||||
type Prec = usize;
|
||||
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
|
||||
p.next_if(TKind::Bar).ok();
|
||||
p.next_if(TKind::Bar)?.ok(); // and discard
|
||||
Ok(MatchArm(
|
||||
p.parse(PPrec::Min)?,
|
||||
p.consume_if(TKind::FatArrow)?.parse(level)?,
|
||||
p.expect(TKind::FatArrow)?.parse(level)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for MakeArm {
|
||||
type Prec = ();
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
|
||||
let name = p
|
||||
.next_if(TKind::Identifier)?
|
||||
.map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?;
|
||||
Ok(MakeArm(
|
||||
p.next_if(TKind::Identifier)?
|
||||
.lexeme
|
||||
.string()
|
||||
.expect("Identifier should have String"),
|
||||
{
|
||||
p.next_if(TKind::Colon)
|
||||
.ok()
|
||||
.map(|_| p.parse(Prec::Body.value()))
|
||||
.transpose()?
|
||||
},
|
||||
name.lexeme.string().expect("Identifier should have String"),
|
||||
p.opt_if(Prec::Body.value(), TKind::Colon)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parse<'t> for Mod {
|
||||
type Prec = ();
|
||||
|
||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||
let ty = p.consume().parse(())?;
|
||||
let body = p.parse(Prec::Body.value())?;
|
||||
@@ -732,14 +788,14 @@ fn parse_for<'t>(p: &mut Parser<'t>, _level: ()) -> PResult<Expr> {
|
||||
// for Pat
|
||||
let pat = p.consume().parse(PPrec::Tuple)?;
|
||||
// in Expr
|
||||
let iter: Anno<Expr> = p.consume_if(TKind::In)?.parse(Prec::Logical.next())?;
|
||||
let iter: Anno<Expr> = p.expect(TKind::In)?.parse(Prec::Logical.next())?;
|
||||
let cspan = iter.1;
|
||||
// Expr
|
||||
let pass: Anno<Expr> = p.parse(Prec::Body.next())?;
|
||||
let pspan = pass.1;
|
||||
// else Expr?
|
||||
let fail = match p.next_if(TKind::Else) {
|
||||
Ok(_) => p.parse(Prec::Body.next())?,
|
||||
let fail = match p.next_if(TKind::Else).allow_eof()? {
|
||||
Some(Ok(_)) => p.parse(Prec::Body.next())?,
|
||||
_ => Expr::Op(Op::Tuple, vec![]).anno(pspan),
|
||||
};
|
||||
let fspan = fail.1;
|
||||
@@ -834,20 +890,19 @@ impl<'t> Parse<'t> for Expr {
|
||||
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
|
||||
const MIN: usize = Prec::MIN;
|
||||
|
||||
// TODO: in-tree doc comments
|
||||
while p.next_if(TKind::Doc)?.is_ok() {}
|
||||
|
||||
// Prefix
|
||||
let tok = p.peek()?;
|
||||
let ((op, prec), span) = (from_prefix(tok)?, tok.span);
|
||||
|
||||
let tok @ &Token { kind, span, .. } = p.peek()?;
|
||||
let ((op, prec), span) = (from_prefix(tok)?, span);
|
||||
no_eof(move || {
|
||||
let mut head = match op {
|
||||
// Empty is returned when a block finisher is an expr prefix.
|
||||
// It's the only expr that doesn't consume.
|
||||
Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]),
|
||||
Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?,
|
||||
|
||||
Ps::ExplicitDo => {
|
||||
p.consume();
|
||||
Expr::Op(Op::Tuple, vec![])
|
||||
}
|
||||
// "End" is produced when an "empty" expression is syntactically required.
|
||||
// This happens when a semi or closing delimiter begins an expression.
|
||||
// The token which emitted "End" cannot be consumed, as it is expected elsewhere.
|
||||
Ps::End if level <= prec.next() => Expr::Op(Op::Tuple, vec![]),
|
||||
Ps::End => Err(ParseError::NotPrefix(kind, span))?,
|
||||
|
||||
Ps::Id => Expr::Id(p.parse(())?),
|
||||
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
|
||||
@@ -858,6 +913,16 @@ impl<'t> Parse<'t> for Expr {
|
||||
Ps::Typedef => Expr::Struct(p.parse(())?),
|
||||
Ps::Match => Expr::Match(p.parse(())?),
|
||||
Ps::Mod => Expr::Mod(p.parse(())?),
|
||||
Ps::Op(Op::Meta) => Expr::Op(
|
||||
Op::Meta,
|
||||
vec![
|
||||
p.consume()
|
||||
.expect(TKind::LBrack)?
|
||||
.opt(MIN, TKind::RBrack)?
|
||||
.unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)),
|
||||
p.parse(level)?,
|
||||
],
|
||||
),
|
||||
Ps::Op(Op::Block) => Expr::Op(
|
||||
Op::Block,
|
||||
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
|
||||
@@ -874,7 +939,9 @@ impl<'t> Parse<'t> for Expr {
|
||||
p.parse(Prec::Logical.value())?,
|
||||
p.parse(prec.next())?,
|
||||
match p.peek() {
|
||||
Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?,
|
||||
Ok(Token { kind: TKind::Else, .. }) => {
|
||||
p.consume().parse(prec.next())?
|
||||
}
|
||||
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
|
||||
},
|
||||
];
|
||||
@@ -907,7 +974,7 @@ impl<'t> Parse<'t> for Expr {
|
||||
};
|
||||
|
||||
// Infix and Postfix
|
||||
while let Ok(tok) = p.peek()
|
||||
while let Ok(Some(tok)) = p.peek().allow_eof()
|
||||
&& let Ok((op, prec)) = from_infix(tok)
|
||||
&& level <= prec.prev()
|
||||
&& op != Ps::End
|
||||
@@ -935,8 +1002,12 @@ impl<'t> Parse<'t> for Expr {
|
||||
),
|
||||
Ps::Op(Op::Call) => Expr::Op(
|
||||
Op::Call,
|
||||
vec![
|
||||
head.anno(span),
|
||||
p.consume()
|
||||
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RParen)?,
|
||||
.opt(0, TKind::RParen)?
|
||||
.unwrap_or(Expr::Op(Op::Tuple, vec![]).anno(span)),
|
||||
],
|
||||
),
|
||||
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
|
||||
op,
|
||||
@@ -947,12 +1018,15 @@ impl<'t> Parse<'t> for Expr {
|
||||
p.consume();
|
||||
Expr::Op(op, vec![head.anno(span)])
|
||||
}
|
||||
Ps::Op(op) => Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]),
|
||||
Ps::Op(op) => {
|
||||
Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?])
|
||||
}
|
||||
_ => Err(ParseError::NotInfix(kind, span))?,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(head)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -966,7 +1040,7 @@ fn parse_array<'t>(p: &mut Parser<'t>) -> PResult<Expr> {
|
||||
let prec = Prec::Tuple;
|
||||
let item = p.parse(prec.value())?;
|
||||
let repeat = p.opt_if(prec.next(), TKind::Semi)?;
|
||||
p.next_if(TKind::RBrack)?;
|
||||
p.expect(TKind::RBrack)?;
|
||||
|
||||
Ok(match (repeat, item) {
|
||||
(Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]),
|
||||
|
||||
12
src/token.rs
12
src/token.rs
@@ -9,6 +9,12 @@ pub struct Token {
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn kind(&self) -> TKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Lexeme {
|
||||
String(String),
|
||||
@@ -55,7 +61,9 @@ impl std::fmt::Display for Lexeme {
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum TKind {
|
||||
Comment,
|
||||
Comment, // Line or block comment
|
||||
Doc, // Doc comment
|
||||
|
||||
And,
|
||||
As,
|
||||
Break,
|
||||
@@ -67,6 +75,7 @@ pub enum TKind {
|
||||
Fn,
|
||||
For,
|
||||
If,
|
||||
Impl,
|
||||
In,
|
||||
Let,
|
||||
Loop,
|
||||
@@ -76,6 +85,7 @@ pub enum TKind {
|
||||
Or,
|
||||
Public,
|
||||
Return,
|
||||
Static,
|
||||
Struct,
|
||||
True,
|
||||
While,
|
||||
|
||||
Reference in New Issue
Block a user