src: Split parser into modules, reorganize AST

This commit is contained in:
2025-10-28 22:15:06 -04:00
parent a3cab92b35
commit 8e2bc5ad85
8 changed files with 1146 additions and 1075 deletions

View File

@@ -4,184 +4,32 @@ pub mod macro_matcher;
pub mod visit;
/// A value with an annotation.
#[derive(Clone, PartialEq, Eq)]
pub struct Anno<T: Annotation, A: Annotation = Span>(pub T, pub A);
impl<T: Annotation, A: Annotation> std::fmt::Debug for Anno<T, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
<A as std::fmt::Debug>::fmt(&self.1, f)?;
f.write_str(": ")?;
<T as std::fmt::Debug>::fmt(&self.0, f)
}
}
/// An annotation: extra data added on to important AST nodes.
pub trait Annotation: Clone + std::fmt::Display + std::fmt::Debug + PartialEq + Eq {}
impl<T: Clone + std::fmt::Debug + std::fmt::Display + PartialEq + Eq> Annotation for T {}
/// A qualified identifier
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FqPath {
// TODO: Identifier interning
pub parts: Vec<String>,
// TODO: generic parameters
}
/// A value with an annotation.
#[derive(Clone, PartialEq, Eq)]
pub struct Anno<T: Annotation, A: Annotation = Span>(pub T, pub A);
impl From<&str> for FqPath {
fn from(value: &str) -> Self {
Self { parts: vec![value.to_owned()] }
}
}
/// A literal value (boolean, character, integer, string)
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal: true | false
Bool(bool),
/// A character literal: 'a', '\u{1f988}'
Char(char),
/// An integer literal: 0, 123, 0x10
Int(u128, u32),
/// A string literal:
Str(String),
}
/// A compound import declaration
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Use {
/// "*"
Glob,
/// Identifier
Name(String),
/// Identifier :: Use
Path(String, Box<Use>),
/// { Use, * }
Tree(Vec<Use>),
}
/// Binding patterns for each kind of matchable value.
/// Expressions: The beating heart of Dough.
///
/// This covers both patterns in Match expressions, and type annotations.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pat {
/// Matches anything without binding
Ignore,
/// Matches nothing, ever
Never,
/// Matches nothing; used for macro substitution
MetId(String),
/// Matches anything, and binds it to a name
Name(String),
/// Matches against a named const value
Path(FqPath),
/// Matches a Struct Expression `Ident { Pat }`
NamedStruct(FqPath, Box<Pat>),
/// Matches a Tuple Struct Expression `Ident ( Pat )`
NamedTuple(FqPath, Box<Pat>),
/// Matches a literal value by equality comparison
Lit(Literal),
/// Matches a compound pattern
Op(PatOp, Vec<Pat>),
}
/// Operators on lists of patterns
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum PatOp {
/// Changes the visibility mode to "public"
Pub,
/// Changes the binding mode to "mutable"
Mut,
/// Matches the dereference of a pointer (`&pat`)
Ref,
/// Matches the dereference of a raw pointer (`*pat`)
Ptr,
/// Matches a partial decomposition (`..rest`) or upper-bounded range (`..100`)
Rest,
/// Matches an exclusive bounded range (`0..100`)
RangeEx,
/// Matches an inclusive bounded range (`0..=100`)
RangeIn,
/// Matches the elements of a tuple
Tuple,
/// Matches the elements of a slice or array
Slice,
/// Matches a constant-size slice with repeating elements
Arrep,
/// Matches a type annotation or struct member
Typed,
/// Matches a function signature
Fn,
/// Matches one of a list of alternatives
Alt,
}
/// A pattern binding
/// ```ignore
/// let Pat (= Expr (else Expr)?)?
/// const Pat (= Expr (else Expr)?)?
/// static Pat (= Expr (else Expr)?)?
/// type Pat (= Expr)?
/// struct Pat
/// enum Pat
/// fn Pat Expr
/// mod Pat Expr
/// impl Pat Expr
/// Pat => Expr // in match
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Bind<A: Annotation = Span>(
pub BindKind,
pub Vec<FqPath>,
pub Pat,
pub Vec<Anno<Expr<A>, A>>,
);
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum BindKind {
/// A `let Pat (= Expr (else Expr)?)?` binding
Let,
/// A `const Pat = Expr` binding
Const,
/// A `static Pat = Expr` binding
Static,
/// A type-alias binding
Type,
/// A struct definition
Struct,
/// An enum definition
Enum,
/// A `fn Pat Expr` binding
Fn,
/// A `mod Pat Expr` binding
Mod,
/// An `impl Pat Expr` binding
Impl,
/// A `Pat => Expr` binding
Match,
}
/// A make (constructor) expression
/// ```ignore
/// Expr { (Ident (: Expr)?),* }
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Make<A: Annotation = Span>(pub Anno<Expr<A>, A>, pub Vec<MakeArm<A>>);
/// A single "arm" of a make expression
/// ```text
/// Identifier (':' Expr)?
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MakeArm<A: Annotation = Span>(pub String, pub Option<Anno<Expr<A>, A>>);
/// Expressions: The beating heart of Dough
/// A program in Doughlang is a single expression which, at compile time,
/// sets up the state in which a program will run. This expression binds types,
/// functions, and values to names which are exposed at runtime.
///
/// Whereas in the body of a function, `do` sequences are ordered, in the global
/// scope (or subsequent module scopes, which are children of the global module,)
/// `do` sequences are considered unordered, and subexpressions may be reordered
/// in whichever way the compiler sees fit. This is especially important when
/// performing import resolution, as imports typically depend on the order
/// in which names are bound.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Expr<A: Annotation = Span> {
/// Omitted by semicolon insertion-elision rules
Omitted,
/// An identifier
Id(FqPath),
Id(Path),
/// An escaped token for macro binding
MetId(String),
/// A literal bool, string, char, or int
@@ -197,6 +45,18 @@ pub enum Expr<A: Annotation = Span> {
Op(Op, Vec<Anno<Self, A>>),
}
/// Doughlang's AST is partitioned by data representation, so it
/// considers any expression which is composed solely of keywords,
/// symbols, and other expressions as operator expressions.
///
/// This includes:
/// - Do-sequence expressions: `Expr ; Expr `
/// - Type-cast expressions `Expr as Expr`
/// - Binding-modifier expressions: `pub Expr`, `#[Expr] Expr`
/// - Block and Group expressions: `{Expr?}`, `(Expr?)`
/// - Control flow: `if`, `while`, `loop`, `match`, `break`, `return`
/// - Function calls `Expr (Expr,*)`
/// - Traditional binary and unary operators (add, sub, neg, assign)
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Op {
// -- true operators
@@ -270,6 +130,170 @@ pub enum Op {
XorSet, // Expr ^= Expr
OrSet, // Expr |= Expr
}
/// A qualified identifier
///
/// TODO: qualify identifier
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path {
// TODO: Identifier interning
pub parts: Vec<String>,
// TODO: generic parameters
}
/// A literal value (boolean, character, integer, string)
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal: true | false
Bool(bool),
/// A character literal: 'a', '\u{1f988}'
Char(char),
/// An integer literal: 0, 123, 0x10
Int(u128, u32),
/// A string literal:
Str(String),
}
/// A compound import declaration
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Use {
/// "*"
Glob,
/// Identifier
Name(String),
/// Identifier :: Use
Path(String, Box<Use>),
/// { Use, * }
Tree(Vec<Use>),
}
/// A pattern binding
/// ```ignore
/// let Pat (= Expr (else Expr)?)?
/// const Pat (= Expr (else Expr)?)?
/// static Pat (= Expr (else Expr)?)?
/// type Pat (= Expr)?
/// struct Pat
/// enum Pat
/// fn Pat Expr
/// mod Pat Expr
/// impl Pat Expr
/// Pat => Expr // in match
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Bind<A: Annotation = Span>(
pub BindOp,
pub Vec<Path>,
pub Pat,
pub Vec<Anno<Expr<A>, A>>,
);
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BindOp {
/// A `let Pat (= Expr (else Expr)?)?` binding
Let,
/// A `const Pat = Expr` binding
Const,
/// A `static Pat = Expr` binding
Static,
/// A type-alias binding
Type,
/// A `fn Pat Expr` binding
Fn,
/// A `mod Pat Expr` binding
Mod,
/// An `impl Pat Expr` binding
Impl,
/// A struct definition
Struct,
/// An enum definition
Enum,
/// A `Pat => Expr` binding
Match,
}
/// A make (constructor) expression
/// ```ignore
/// Expr { (Ident (: Expr)?),* }
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Make<A: Annotation = Span>(pub Anno<Expr<A>, A>, pub Vec<MakeArm<A>>);
/// A single "arm" of a make expression
/// ```text
/// Identifier (':' Expr)?
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MakeArm<A: Annotation = Span>(pub String, pub Option<Anno<Expr<A>, A>>);
/// Binding patterns for each kind of matchable value.
///
/// This covers both patterns in Match expressions, and type annotations.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pat {
/// Matches anything without binding
Ignore,
/// Matches nothing, ever
Never,
/// Matches nothing; used for macro substitution
MetId(String),
/// Matches anything, and binds it to a name
Name(String),
/// Matches against a named const value
Path(Path),
/// Matches a Struct Expression `Ident { Pat }`
NamedStruct(Path, Box<Pat>),
/// Matches a Tuple Struct Expression `Ident ( Pat )`
NamedTuple(Path, Box<Pat>),
/// Matches a literal value by equality comparison
Lit(Literal),
/// Matches a compound pattern
Op(PatOp, Vec<Pat>),
}
/// Operators on lists of patterns
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PatOp {
/// Changes the visibility mode to "public"
Pub,
/// Changes the binding mode to "mutable"
Mut,
/// Matches the dereference of a pointer (`&pat`)
Ref,
/// Matches the dereference of a raw pointer (`*pat`)
Ptr,
/// Matches a partial decomposition (`..rest`) or upper-bounded range (`..100`)
Rest,
/// Matches an exclusive bounded range (`0..100`)
RangeEx,
/// Matches an inclusive bounded range (`0..=100`)
RangeIn,
/// Matches the elements of a tuple
Tuple,
/// Matches the elements of a slice or array
Slice,
/// Matches a constant-size slice with repeating elements
Arrep,
/// Matches a type annotation or struct member
Typed,
/// Matches a function signature
Fn,
/// Matches one of a list of alternatives
Alt,
}
impl<T: Annotation, A: Annotation> std::fmt::Debug for Anno<T, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
<A as std::fmt::Debug>::fmt(&self.1, f)?;
f.write_str(": ")?;
<T as std::fmt::Debug>::fmt(&self.0, f)
}
}
impl From<&str> for Path {
fn from(value: &str) -> Self {
Self { parts: vec![value.to_owned()] }
}
}
impl<A: Annotation> Default for Expr<A> {
fn default() -> Self {
@@ -313,106 +337,12 @@ impl<A: Annotation> Expr<A> {
use crate::{fmt::FmtAdapter, span::Span};
use std::{fmt::Display, format_args as fmt};
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Bool(v) => v.fmt(f),
Self::Char(c) => write!(f, "'{}'", c.escape_debug()),
Self::Int(i, 2) => write!(f, "0b{i:b}"),
Self::Int(i, 8) => write!(f, "0o{i:o}"),
Self::Int(i, 16) => write!(f, "0x{i:x}"),
Self::Int(i, _) => i.fmt(f),
Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()),
}
}
}
impl Display for FqPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { parts } = self;
f.list(parts, "::")
}
}
impl<T: Display + Annotation, A: Annotation> Display for Anno<T, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Display for Use {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Glob => "*".fmt(f),
Self::Name(name) => name.fmt(f),
Self::Path(segment, rest) => write!(f, "{segment}::{rest}"),
Self::Tree(items) => match items.len() {
0 => "{}".fmt(f),
1..=3 => f.delimit("{ ", " }").list(items, ", "),
_ => f
.delimit_indented("{", "}")
.list_wrap("\n", items, ",\n", ",\n"),
},
}
}
}
impl<A: Annotation> Display for Bind<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(op, gens, pat, exprs) = self;
op.fmt(f)?;
if !gens.is_empty() {
f.delimit("<", "> ").list(gens, ", ")?;
}
match op {
BindKind::Match => f.delimit(fmt!("{pat} => "), "").list(exprs, ",!? "),
BindKind::Fn | BindKind::Mod | BindKind::Impl => {
f.delimit(fmt!("{pat} "), "").list(exprs, ",!? ")
}
BindKind::Struct | BindKind::Enum => match pat {
Pat::NamedStruct(name, bind) => match bind.as_ref() {
Pat::Op(PatOp::Tuple, parts) => f
.delimit_indented(fmt!("{name} {{"), "}")
.list_wrap("\n", parts, ",\n", ",\n"),
other => write!(f, "{name} {{ {other} }}"),
},
_ => pat.fmt(f),
},
_ => match exprs.as_slice() {
[] => write!(f, "{pat}"),
[value] => write!(f, "{pat} = {value}"),
[value, fail] => write!(f, "{pat} = {value} else {fail}"),
other => f.delimit(fmt!("{pat} ("), ")").list(other, ", "),
},
}
}
}
impl Display for BindKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
Self::Let => "let ",
Self::Const => "const ",
Self::Static => "static ",
Self::Type => "type ",
Self::Struct => "struct ",
Self::Enum => "enum ",
Self::Fn => "fn ",
Self::Mod => "mod ",
Self::Impl => "impl ",
Self::Match => "",
})
}
}
impl<A: Annotation> Display for Make<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(expr, make_arms) = self;
f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ")
}
}
impl<A: Annotation> Display for Expr<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@@ -534,6 +464,100 @@ impl Display for Op {
}
}
impl Display for Path {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { parts } = self;
f.list(parts, "::")
}
}
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Bool(v) => v.fmt(f),
Self::Char(c) => write!(f, "'{}'", c.escape_debug()),
Self::Int(i, 2) => write!(f, "0b{i:b}"),
Self::Int(i, 8) => write!(f, "0o{i:o}"),
Self::Int(i, 16) => write!(f, "0x{i:x}"),
Self::Int(i, _) => i.fmt(f),
Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()),
}
}
}
impl Display for Use {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Glob => "*".fmt(f),
Self::Name(name) => name.fmt(f),
Self::Path(segment, rest) => write!(f, "{segment}::{rest}"),
Self::Tree(items) => match items.len() {
0 => "{}".fmt(f),
1..=3 => f.delimit("{ ", " }").list(items, ", "),
_ => f
.delimit_indented("{", "}")
.list_wrap("\n", items, ",\n", ",\n"),
},
}
}
}
impl<A: Annotation> Display for Bind<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(op, gens, pat, exprs) = self;
op.fmt(f)?;
if !gens.is_empty() {
f.delimit("<", "> ").list(gens, ", ")?;
}
match op {
BindOp::Match => f.delimit(fmt!("{pat} => "), "").list(exprs, ",!? "),
BindOp::Fn | BindOp::Mod | BindOp::Impl => {
f.delimit(fmt!("{pat} "), "").list(exprs, ",!? ")
}
BindOp::Struct | BindOp::Enum => match pat {
Pat::NamedStruct(name, bind) => match bind.as_ref() {
Pat::Op(PatOp::Tuple, parts) => f
.delimit_indented(fmt!("{name} {{"), "}")
.list_wrap("\n", parts, ",\n", ",\n"),
other => write!(f, "{name} {{ {other} }}"),
},
_ => pat.fmt(f),
},
_ => match exprs.as_slice() {
[] => write!(f, "{pat}"),
[value] => write!(f, "{pat} = {value}"),
[value, fail] => write!(f, "{pat} = {value} else {fail}"),
other => f.delimit(fmt!("{pat} ("), ")").list(other, ", "),
},
}
}
}
impl Display for BindOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
Self::Let => "let ",
Self::Const => "const ",
Self::Static => "static ",
Self::Type => "type ",
Self::Struct => "struct ",
Self::Enum => "enum ",
Self::Fn => "fn ",
Self::Mod => "mod ",
Self::Impl => "impl ",
Self::Match => "",
})
}
}
impl<A: Annotation> Display for Make<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(expr, make_arms) = self;
f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ")
}
}
impl<A: Annotation> Display for MakeArm<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@@ -598,7 +622,7 @@ impl<A: Annotation> TryFrom<Expr<A>> for Pat {
fn try_from(value: Expr<A>) -> Result<Self, Self::Error> {
Ok(match value {
Expr::Id(FqPath { mut parts }) if parts.len() == 1 => {
Expr::Id(Path { mut parts }) if parts.len() == 1 => {
match parts.pop().expect("parts should have len 1") {
ig if ig == "_" => Self::Ignore,
name => Self::Name(name),

View File

@@ -12,7 +12,7 @@ pub trait Visit<'a> {
fn visit_ident(&mut self, name: &'a str) -> Result<(), Self::Error> {
name.children(self)
}
fn visit_path(&mut self, path: &'a FqPath) -> Result<(), Self::Error> {
fn visit_path(&mut self, path: &'a Path) -> Result<(), Self::Error> {
path.children(self)
}
fn visit_literal(&mut self, lit: &'a Literal) -> Result<(), Self::Error> {
@@ -52,7 +52,7 @@ impl<'a> Walk<'a> for str {
}
}
impl<'a> Walk<'a> for FqPath {
impl<'a> Walk<'a> for Path {
fn visit_in<V: Visit<'a> + ?Sized>(&'a self, v: &mut V) -> Result<(), V::Error> {
v.visit_path(self)
}

View File

@@ -1,7 +1,7 @@
//! Tests the lexer
use doughlang::{
ast::{Anno, Pat},
parser::PPrec,
parser::pat::Prec as PPrec,
};
#[allow(unused_imports)]
use doughlang::{

View File

@@ -6,74 +6,21 @@ use crate::{
span::Span,
token::{Lexeme, TKind, Token},
};
use std::{error::Error, fmt::Display, iter, vec};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseError {
/// Reached the expected end of input.
EOF(Span),
/// Unexpectedly reached end of input.
UnexpectedEOF(Span),
FromLexer(LexError),
Expected(TKind, TKind, Span),
NotLiteral(TKind, Span),
NotUse(TKind, Span),
NotPattern(TKind, Span),
NotType(TKind, Span),
NotPrefix(TKind, Span),
NotInfix(TKind, Span),
NotPostfix(TKind, Span),
pub trait Parse<'t> {
type Prec: Copy;
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
where Self: Sized;
}
pub use ParseError::EOF;
pub mod expr;
pub mod pat;
impl Error for ParseError {}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EOF(loc) => write!(f, "{loc}: Reached end of input."),
Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."),
Self::FromLexer(e) => e.fmt(f),
Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."),
Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."),
Self::NotUse(tk, loc) => write!(f, "{loc}: {tk:?} is no use!"),
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."),
Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."),
Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."),
Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."),
}
}
}
pub type PResult<T> = Result<T, ParseError>;
trait PResultExt<T> {
fn no_eof(self) -> PResult<T>;
fn allow_eof(self) -> PResult<Option<T>>;
}
impl<T> PResultExt<T> for PResult<T> {
fn no_eof(self) -> Self {
match self {
Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)),
other => other,
}
}
fn allow_eof(self) -> PResult<Option<T>> {
match self {
Ok(t) => Ok(Some(t)),
Err(ParseError::EOF(_)) => Ok(None),
Err(e) => Err(e),
}
}
}
/// Opens a scope where [`ParseError::EOF`] is unexpected (See [`PResultExt::no_eof`])
fn no_eof<T>(f: impl FnOnce() -> PResult<T>) -> PResult<T> {
f().no_eof()
}
pub mod error;
pub use error::{EOF, PResult, PResultExt, ParseError, no_eof};
/// Handles stateful extraction from a [Lexer], with single-[Token] lookahead.
#[derive(Debug)]
pub struct Parser<'t> {
pub lexer: Lexer<'t>,
@@ -93,6 +40,7 @@ impl<'t> Parser<'t> {
t
}
/// Gets the [struct@Span] of the last-consumed [Token]
pub const fn span(&self) -> Span {
self.last_loc
}
@@ -116,11 +64,6 @@ impl<'t> Parser<'t> {
},
};
let next_tok = self.next_tok.insert(next_tok);
if let Ok(tok) = next_tok {
self.last_loc = tok.span;
}
next_tok.as_ref().map_err(|e| *e)
}
@@ -139,7 +82,12 @@ impl<'t> Parser<'t> {
.next_tok
.take()
.unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc)));
self.elide_do = matches!(tok, Ok(Token { kind: TKind::RCurly | TKind::Semi, .. }));
if let Ok(tok) = &tok {
self.last_loc = tok.span;
self.elide_do = matches!(tok.kind, TKind::RCurly | TKind::Semi)
}
tok
}
@@ -216,7 +164,7 @@ impl<'t> Parser<'t> {
})
}
/// Parses a P unless the next token is `end`
/// Parses a P unless the next [Token]'s [TKind] is `end`
pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
let out = match self.peek_if(end)? {
None => Some(self.parse(level).no_eof()?),
@@ -226,6 +174,7 @@ impl<'t> Parser<'t> {
Ok(out)
}
/// Ensures the next [Token]'s [TKind] is `next`
pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> {
self.next_if(next)?
.map_err(|tk| ParseError::Expected(next, tk, self.span()))?;
@@ -239,14 +188,7 @@ impl<'t> Parser<'t> {
}
}
pub trait Parse<'t> {
type Prec: Copy;
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
where Self: Sized;
}
impl<'t> Parse<'t> for FqPath {
impl<'t> Parse<'t> for Path {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
@@ -261,7 +203,7 @@ impl<'t> Parse<'t> for FqPath {
}
}
Ok(FqPath { parts })
Ok(Path { parts })
}
}
@@ -273,7 +215,7 @@ impl<'t> Parse<'t> for Literal {
TKind::True => p.consume().then(Literal::Bool(true)),
TKind::False => p.consume().then(Literal::Bool(false)),
TKind::Character | TKind::Integer | TKind::String => {
match p.take().expect("should have Token").lexeme {
match p.take().expect("should have Token after peek").lexeme {
Lexeme::String(str) => Literal::Str(str),
Lexeme::Integer(int, base) => Literal::Int(int, base),
Lexeme::Char(chr) => Literal::Char(chr),
@@ -304,734 +246,6 @@ impl<'t> Parse<'t> for Use {
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum PPrec {
Min,
Alt,
Tuple,
Typed,
Range,
Fn,
Max,
}
impl PPrec {
const fn next(self) -> Self {
match self {
Self::Min => Self::Alt,
Self::Alt => Self::Tuple,
Self::Tuple => Self::Typed,
Self::Typed => Self::Range,
Self::Range => Self::Fn,
Self::Fn => Self::Max,
Self::Max => Self::Max,
}
}
}
enum PatPs {
Op(PatOp),
}
fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> {
Some(match token.kind {
TKind::DotDot => (PatPs::Op(PatOp::RangeEx), PPrec::Range),
TKind::DotDotEq => (PatPs::Op(PatOp::RangeIn), PPrec::Range),
TKind::Colon => (PatPs::Op(PatOp::Typed), PPrec::Typed),
TKind::Comma => (PatPs::Op(PatOp::Tuple), PPrec::Tuple),
TKind::Arrow => (PatPs::Op(PatOp::Fn), PPrec::Fn),
TKind::Bar => (PatPs::Op(PatOp::Alt), PPrec::Alt),
_ => None?,
})
}
impl<'t> Parse<'t> for Pat {
type Prec = PPrec;
fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult<Self> {
let tok = p.peek()?;
// Prefix
let mut head = match tok.kind {
TKind::Fn => return p.consume().parse(PPrec::Fn),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
Pat::Lit(p.parse(())?)
}
TKind::Bar => p.consume().parse(level)?,
TKind::Bang => p.consume().then(Pat::Never),
TKind::Amp => Pat::Op(PatOp::Ref, vec![p.consume().parse(PPrec::Max)?]),
TKind::Star => Pat::Op(PatOp::Ptr, vec![p.consume().parse(PPrec::Max)?]),
TKind::Mut => Pat::Op(PatOp::Mut, vec![p.consume().parse(PPrec::Max)?]),
TKind::Pub => Pat::Op(PatOp::Pub, vec![p.consume().parse(PPrec::Max)?]),
TKind::AmpAmp => Pat::Op(
PatOp::Ref,
vec![Pat::Op(PatOp::Ref, vec![p.consume().parse(PPrec::Max)?])],
),
TKind::Identifier => match tok.lexeme.str() {
Some("_") => p.consume().then(Pat::Ignore),
_ => {
let mut path: FqPath = p.parse(())?;
// TODO: make these postfix.
match p.peek().map(|t| t.kind) {
Ok(TKind::LParen) => Pat::NamedTuple(path, p.parse(PPrec::Typed)?),
Ok(TKind::LCurly) if level <= PPrec::Tuple.next() => Pat::NamedStruct(
path,
p.consume()
.opt(PPrec::Tuple, TKind::RCurly)?
.unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))),
),
Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() {
1 => Self::Name(path.parts.pop().expect("name has 1 part")),
_ => Self::Path(path),
},
Err(e) => Err(e)?,
}
}
},
TKind::Grave => Pat::MetId(p.consume().next()?.lexeme.to_string()),
TKind::DotDot => Pat::Op(
PatOp::Rest,
// Identifier in Rest position always becomes binder
match p.consume().peek().allow_eof()?.map(Token::kind) {
Some(TKind::Identifier) => vec![Pat::Name(
p.take_lexeme()
.expect("should have lexeme")
.string()
.expect("should be string"),
)],
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![],
},
),
TKind::DotDotEq => Pat::Op(
PatOp::RangeIn,
match p.consume().peek().allow_eof()?.map(Token::kind) {
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![],
},
),
TKind::LParen => Pat::Op(
PatOp::Tuple,
p.consume()
.list(vec![], PPrec::Typed, TKind::Comma, TKind::RParen)?,
),
TKind::LBrack => parse_array_pat(p)?,
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
};
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Some((op, prec)) = pat_from_infix(tok)
&& level <= prec
{
let kind = tok.kind;
head = match op {
PatPs::Op(PatOp::Typed) => {
Pat::Op(PatOp::Typed, vec![head, p.consume().parse(PPrec::Max)?])
}
PatPs::Op(PatOp::Fn) => {
Pat::Op(PatOp::Fn, vec![head, p.consume().parse(PPrec::Fn.next())?])
}
PatPs::Op(op @ (PatOp::RangeEx | PatOp::RangeIn)) => Pat::Op(
op,
match p.consume().peek().map(|t| t.kind) {
Ok(TKind::Integer | TKind::Character | TKind::Identifier) => {
vec![head, p.parse(prec.next())?]
}
_ => vec![head],
},
),
PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?),
}
}
Ok(head)
}
}
fn parse_array_pat(p: &mut Parser<'_>) -> PResult<Pat> {
if p.consume().peek()?.kind == TKind::RBrack {
p.consume();
return Ok(Pat::Op(PatOp::Slice, vec![]));
}
let item = p.parse(PPrec::Tuple)?;
let repeat = p.opt_if(PPrec::Tuple, TKind::Semi)?;
p.expect(TKind::RBrack)?;
Ok(match (repeat, item) {
(Some(repeat), item) => Pat::Op(PatOp::Arrep, vec![item, repeat]),
(None, Pat::Op(PatOp::Tuple, items)) => Pat::Op(PatOp::Slice, items),
(None, item) => Pat::Op(PatOp::Slice, vec![item]),
})
}
/// Organizes the precedence hierarchy for syntactic elements
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Prec {
Min,
/// The Semicolon Operator gets its own precedence level
Do,
/// An assignment
Assign,
/// Constructor for a tuple
Tuple,
/// The body of a function, conditional, etc.
Body,
/// Constructor for a struct
Make,
/// The conditional of an `if` or `while` (which is really an `if`)
Logical,
/// The short-circuiting "boolean or" operator
LogOr,
/// The short-circuiting "boolean and" operator
LogAnd,
/// Value comparison operators
Compare,
/// Constructor for a Range
Range,
/// Binary/bitwise operators
Binary,
/// Bit-shifting operators
Shift,
/// Addition and Subtraction operators
Factor,
/// Multiplication, Division, and Remainder operators
Term,
/// Negation, (De)reference, Try
Unary,
/// Place-projection operators
Project,
/// Array/Call subscripting and reference
Extend,
Max,
}
impl Prec {
pub const MIN: usize = Prec::Min.value();
pub const fn value(self) -> usize {
self as usize * 2
}
pub const fn prev(self) -> usize {
match self {
Self::Assign => self.value() + 1,
_ => self.value(),
}
}
pub const fn next(self) -> usize {
match self {
Self::Assign => self.value(),
_ => self.value() + 1,
}
}
}
/// `PseudoOperator`: fake operators used to give certain tokens special behavior.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Ps {
Id, // Identifier
Mid, // MetaIdentifier
Lit, // Literal
Use, // use Use
Def, // any definition (let, const, static, struct, enum, fn, ...)
For, // for Pat in Expr Expr else Expr
Lambda0, // || Expr
Lambda, // | Pat,* | Expr
DoubleRef, // && Expr
Make, // Expr{ Expr,* }
ImplicitDo, // An implicit semicolon
End, // Produces an empty value.
Op(Op), // A normal [ast::Op]
}
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
TKind::Semi => (Ps::End, Prec::Body),
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
TKind::Grave => (Ps::Mid, Prec::Max),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
(Ps::Lit, Prec::Max)
}
TKind::Use => (Ps::Use, Prec::Max),
TKind::Pub => (Ps::Op(Op::Pub), Prec::Body),
TKind::For => (Ps::For, Prec::Body),
TKind::Match => (Ps::Op(Op::Match), Prec::Body),
TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign),
TKind::Fn
| TKind::Mod
| TKind::Impl
| TKind::Let
| TKind::Const
| TKind::Static
| TKind::Type
| TKind::Struct
| TKind::Enum => (Ps::Def, Prec::Body),
TKind::Loop => (Ps::Op(Op::Loop), Prec::Body),
TKind::If => (Ps::Op(Op::If), Prec::Body),
TKind::While => (Ps::Op(Op::While), Prec::Body),
TKind::Break => (Ps::Op(Op::Break), Prec::Body),
TKind::Return => (Ps::Op(Op::Return), Prec::Body),
TKind::LCurly => (Ps::Op(Op::Block), Prec::Min),
TKind::RCurly => (Ps::End, Prec::Do),
TKind::LBrack => (Ps::Op(Op::Array), Prec::Tuple),
TKind::RBrack => (Ps::End, Prec::Tuple),
TKind::LParen => (Ps::Op(Op::Group), Prec::Min),
TKind::RParen => (Ps::End, Prec::Tuple),
TKind::Amp => (Ps::Op(Op::Refer), Prec::Extend),
TKind::AmpAmp => (Ps::DoubleRef, Prec::Extend),
TKind::Bang => (Ps::Op(Op::Not), Prec::Unary),
TKind::BangBang => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Bar => (Ps::Lambda, Prec::Body),
TKind::BarBar => (Ps::Lambda0, Prec::Body),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary),
TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Star => (Ps::Op(Op::Deref), Prec::Unary),
TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?,
})
}
const fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration
TKind::In => (Ps::Op(Op::Do), Prec::Do),
TKind::Eq => (Ps::Op(Op::Set), Prec::Assign),
TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign),
TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign),
TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign),
TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign),
TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign),
TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign),
TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign),
TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign),
TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign),
TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::LCurly => (Ps::Make, Prec::Make),
TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare),
TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare),
TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare),
TKind::BangEq => (Ps::Op(Op::Neq), Prec::Compare),
TKind::GtEq => (Ps::Op(Op::Geq), Prec::Compare),
TKind::Gt => (Ps::Op(Op::Gt), Prec::Compare),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Amp => (Ps::Op(Op::And), Prec::Binary),
TKind::Xor => (Ps::Op(Op::Xor), Prec::Binary),
TKind::Bar => (Ps::Op(Op::Or), Prec::Binary),
TKind::LtLt => (Ps::Op(Op::Shl), Prec::Shift),
TKind::GtGt => (Ps::Op(Op::Shr), Prec::Shift),
TKind::Plus => (Ps::Op(Op::Add), Prec::Factor),
TKind::Minus => (Ps::Op(Op::Sub), Prec::Factor),
TKind::Star => (Ps::Op(Op::Mul), Prec::Term),
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::As => (Ps::Op(Op::As), Prec::Max),
_ => (Ps::ImplicitDo, Prec::Do),
})
}
impl<'t> Parse<'t> for BindKind {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let bk = match p.peek()?.kind {
TKind::Let => BindKind::Let,
TKind::Const => BindKind::Const,
TKind::Static => BindKind::Static,
TKind::Type => BindKind::Type,
TKind::Struct => BindKind::Struct,
TKind::Enum => BindKind::Enum,
TKind::Fn => BindKind::Fn,
TKind::Mod => BindKind::Mod,
TKind::Impl => BindKind::Impl,
TKind::Bar => BindKind::Match,
// no consume!
_ => return Ok(BindKind::Match),
};
p.consume();
Ok(bk)
}
}
impl<'t> Parse<'t> for Bind {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
let level = p.parse(())?;
let generics = match p.next_if(TKind::Lt)? {
Ok(_) => p.list(vec![], (), TKind::Comma, TKind::Gt)?,
Err(_) => vec![],
};
match level {
BindKind::Match => {
// |? Pat => Expr
Ok(Self(
level,
generics,
p.parse(PPrec::Alt)?,
vec![p.expect(TKind::FatArrow)?.parse(Prec::Body.next())?],
))
}
BindKind::Mod | BindKind::Impl => Ok(Self(
level,
generics,
p.parse(PPrec::Max)?,
vec![p.parse(Prec::Body.next())?],
)),
BindKind::Fn => Ok(Self(
level,
generics,
p.parse(PPrec::Fn)?,
vec![p.parse(Prec::Body.next())?],
)),
_ => {
// let Pat
let pat = p.parse(PPrec::Tuple)?;
if p.next_if(TKind::Eq).allow_eof()?.is_none_or(|v| v.is_err()) {
return Ok(Self(level, generics, pat, vec![]));
}
// = Expr
let body = p.parse(Prec::Tuple.value())?;
if p.next_if(TKind::Else)
.allow_eof()?
.is_none_or(|v| v.is_err())
{
return Ok(Self(level, generics, pat, vec![body]));
}
// else Expr
Ok(Self(
level,
generics,
pat,
vec![body, p.parse(Prec::Body.next())?],
))
}
}
}
}
impl<'t> Parse<'t> for MakeArm {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
let name = p
.next_if(TKind::Identifier)?
.map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?;
Ok(MakeArm(
name.lexeme.string().expect("Identifier should have String"),
p.opt_if(Prec::Body.value(), TKind::Colon)?,
))
}
}
impl<'t> Parse<'t> for Expr {
type Prec = usize;
/// Parses an [Expr]ession.
///
/// The `level` parameter indicates the operator binding level of the expression.
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
const MIN: usize = Prec::MIN;
// TODO: in-tree doc comments
while p.next_if(TKind::Doc)?.is_ok() {}
// Prefix
let tok @ &Token { kind, span, .. } = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, span);
no_eof(move || {
let mut head = match op {
// "End" is produced when an "empty" expression is syntactically required.
// This happens when a semi or closing delimiter begins an expression.
// The token which emitted "End" cannot be consumed, as it is expected elsewhere.
Ps::End if level <= prec.next() => Expr::Omitted,
Ps::End => Err(ParseError::NotPrefix(kind, span))?,
Ps::Id => Expr::Id(p.parse(())?),
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
Ps::Lit => Expr::Lit(p.parse(())?),
Ps::Use => Expr::Use(p.consume().parse(())?),
Ps::Def => Expr::Bind(p.parse(())?),
Ps::Lambda | Ps::Lambda0 => {
p.consume();
let args = if op == Ps::Lambda {
p.opt(PPrec::Tuple, TKind::Bar)?
.unwrap_or(Pat::Op(PatOp::Tuple, vec![]))
} else {
Pat::Op(PatOp::Tuple, vec![])
};
let rety = p.opt_if(PPrec::Max, TKind::Arrow)?.unwrap_or(Pat::Ignore);
Expr::Bind(Box::new(Bind(
BindKind::Fn,
vec![],
Pat::Op(PatOp::Fn, vec![args, rety]),
vec![p.parse(Prec::Body.next())?],
)))
}
Ps::For => parse_for(p, ())?,
Ps::Op(Op::Match) => parse_match(p)?,
Ps::Op(Op::Meta) => Expr::Op(
Op::Meta,
vec![
p.consume()
.expect(TKind::LBrack)?
.opt(MIN, TKind::RBrack)?
.unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)),
p.parse(level)?,
],
),
Ps::Op(Op::Block) => Expr::Op(
Op::Block,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Ps::Op(Op::Array) => parse_array(p)?,
Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
Ps::Op(op @ (Op::If | Op::While)) => {
p.consume();
let exprs = vec![
// conditional restricted to Logical operators or above
p.parse(Prec::Logical.value())?,
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => {
p.consume().parse(prec.next())?
}
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| {
Expr::Op(
Op::Refer,
vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)],
)
})?,
Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
_ => unimplemented!("prefix {op:?}"),
};
// Infix and Postfix
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Ps::End
{
let kind = tok.kind;
let span = span.merge(p.span());
head = match op {
// Make (structor expressions) are context-sensitive
Ps::Make => match &head {
Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make(
head.anno(span),
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
))),
_ => break,
},
// As is ImplicitDo (semicolon elision)
Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?),
Ps::ImplicitDo => break,
// Allow `;` at end of file
Ps::Op(Op::Do) => head.and_do(
span,
match p.consume().peek().allow_eof()? {
Some(_) => p.parse(prec.next())?,
None => Anno(Default::default(), span),
},
),
Ps::Op(Op::Index) => Expr::Op(
Op::Index,
p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?,
),
Ps::Op(Op::Call) => Expr::Op(
Op::Call,
vec![
head.anno(span),
p.consume()
.opt(0, TKind::RParen)?
.unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)),
],
),
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
op,
p.consume()
.list_bare(vec![head.anno(span)], prec.next(), kind)?,
),
Ps::Op(op @ Op::Try) => {
p.consume();
Expr::Op(op, vec![head.anno(span)])
}
Ps::Op(op) => {
Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?])
}
_ => Err(ParseError::NotInfix(kind, span))?,
}
}
Ok(head)
})
}
}
/// Parses an array with 0 or more elements, or an array-repetition
fn parse_array(p: &mut Parser<'_>) -> PResult<Expr> {
if p.consume().peek()?.kind == TKind::RBrack {
p.consume();
return Ok(Expr::Op(Op::Array, vec![]));
}
let prec = Prec::Tuple;
let item = p.parse(prec.value())?;
let repeat = p.opt_if(prec.next(), TKind::Semi)?;
p.expect(TKind::RBrack)?;
Ok(match (repeat, item) {
(Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]),
(None, Anno(Expr::Op(Op::Tuple, items), _)) => Expr::Op(Op::Array, items),
(None, item) => Expr::Op(Op::Array, vec![item]),
})
}
fn parse_match(p: &mut Parser<'_>) -> PResult<Expr> {
let scrutinee = p.consume().parse(Prec::Logical.value())?;
let arms = p
.expect(TKind::LCurly)?
.list(vec![], (), TKind::Comma, TKind::RCurly)?
.into_iter()
.map(|Anno(arm, span)| Anno(Expr::Bind(Box::new(arm)), span));
let expr = Expr::Op(Op::Match, iter::once(scrutinee).chain(arms).collect());
Ok(expr)
}
fn parse_for(p: &mut Parser<'_>, _level: ()) -> PResult<Expr> {
// for Pat
let pat = p.consume().parse(PPrec::Tuple)?;
// in Expr
let iter: Anno<Expr> = p.expect(TKind::In)?.parse(Prec::Logical.next())?;
let cspan = iter.1;
// Expr
let pass: Anno<Expr> = p.parse(Prec::Body.next())?;
let pspan = pass.1;
// else Expr?
let fail = match p.next_if(TKind::Else).allow_eof()? {
Some(Ok(_)) => p.parse(Prec::Body.next())?,
_ => Expr::Op(Op::Tuple, vec![]).anno(pspan),
};
let fspan = fail.1;
/*
for `pat in `iter `pass else `fail
==>
match (`iter).into_iter() {
#iter => loop match #iter.next() {
None => break `fail,
Some(`pat) => `pass,
},
}
*/
// TODO: A better way to do this kind of substitution desugaring
// without losing span information!
Ok(Expr::Op(
Op::Match,
vec![
Expr::Op(
Op::Dot,
vec![
iter,
Expr::Op(Op::Call, vec![Expr::Id("into_iter".into()).anno(cspan)]).anno(cspan),
],
)
.anno(cspan),
Expr::Bind(Box::new(Bind(
BindKind::Match,
vec![],
Pat::Name("#iter".into()),
vec![
Expr::Op(
Op::Loop,
vec![
Expr::Op(
Op::Match,
vec![
Expr::Op(
Op::Dot,
vec![
Expr::Id("#iter".into()).anno(cspan),
Expr::Op(
Op::Call,
vec![Expr::Id("next".into()).anno(cspan)],
)
.anno(cspan),
],
)
.anno(cspan),
Expr::Bind(Box::new(Bind(
BindKind::Match,
vec![],
Pat::Name("None".into()),
vec![Expr::Op(Op::Break, vec![fail]).anno(fspan)],
)))
.anno(fspan),
Expr::Bind(Box::new(Bind(
BindKind::Match,
vec![],
Pat::NamedTuple(
"Some".into(),
Box::new(Pat::Op(PatOp::Tuple, vec![pat])),
),
vec![pass],
)))
.anno(pspan),
],
)
.anno(pspan),
],
)
.anno(pspan),
],
)))
.anno(pspan),
],
))
}
impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno<P> {
type Prec = P::Prec;
fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>

70
src/parser/error.rs Normal file
View File

@@ -0,0 +1,70 @@
use crate::{ast::BindOp, lexer::LexError, span::Span, token::TKind};
use std::{error::Error, fmt::Display};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseError {
/// Reached the expected end of input.
EOF(Span),
/// Unexpectedly reached end of input.
UnexpectedEOF(Span),
FromLexer(LexError),
Expected(TKind, TKind, Span),
NotLiteral(TKind, Span),
NotUse(TKind, Span),
NotPattern(TKind, Span),
NotMatch(BindOp, BindOp, Span),
NotPrefix(TKind, Span),
NotInfix(TKind, Span),
NotPostfix(TKind, Span),
}
pub use ParseError::EOF;
impl Error for ParseError {}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EOF(loc) => write!(f, "{loc}: Reached end of input."),
Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."),
Self::FromLexer(e) => e.fmt(f),
Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."),
Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."),
Self::NotUse(tk, loc) => write!(f, "{loc}: {tk:?} is no use!"),
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
Self::NotMatch(bk, ex, loc) => {
write!(f, "{loc}: {bk:?} is not valid in a {ex:?} expression.")
}
Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."),
Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."),
Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."),
}
}
}
pub type PResult<T> = Result<T, ParseError>;
pub trait PResultExt<T> {
fn no_eof(self) -> PResult<T>;
fn allow_eof(self) -> PResult<Option<T>>;
}
impl<T> PResultExt<T> for PResult<T> {
fn no_eof(self) -> Self {
match self {
Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)),
other => other,
}
}
fn allow_eof(self) -> PResult<Option<T>> {
match self {
Ok(t) => Ok(Some(t)),
Err(ParseError::EOF(_)) => Ok(None),
Err(e) => Err(e),
}
}
}
/// Opens a scope where [`ParseError::EOF`] is unexpected (See [`PResultExt::no_eof`])
pub fn no_eof<T>(f: impl FnOnce() -> PResult<T>) -> PResult<T> {
f().no_eof()
}

592
src/parser/expr.rs Normal file
View File

@@ -0,0 +1,592 @@
use super::{PResult, PResultExt, Parse, ParseError, Parser, no_eof, pat::Prec as PPrec};
use crate::{
ast::*,
token::{TKind, Token},
};
use std::iter;
/// Organizes the precedence hierarchy for syntactic elements
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Prec {
Min,
/// The Semicolon Operator gets its own precedence level
Do,
/// An assignment
Assign,
/// Constructor for a tuple
Tuple,
/// The body of a function, conditional, etc.
Body,
/// Constructor for a struct
Make,
/// The conditional of an `if` or `while` (which is really an `if`)
Logical,
/// The short-circuiting "boolean or" operator
LogOr,
/// The short-circuiting "boolean and" operator
LogAnd,
/// Value comparison operators
Compare,
/// Constructor for a Range
Range,
/// Binary/bitwise operators
Binary,
/// Bit-shifting operators
Shift,
/// Addition and Subtraction operators
Factor,
/// Multiplication, Division, and Remainder operators
Term,
/// Negation, (De)reference, Try
Unary,
/// Place-projection operators
Project,
/// Array/Call subscripting and reference
Extend,
Max,
}
impl Prec {
pub const MIN: usize = Prec::Min.value();
pub const fn value(self) -> usize {
self as usize * 2
}
pub const fn prev(self) -> usize {
match self {
Self::Assign => self.value() + 1,
_ => self.value(),
}
}
pub const fn next(self) -> usize {
match self {
Self::Assign => self.value(),
_ => self.value() + 1,
}
}
}
/// `PseudoOperator`: fake operators used to give certain tokens special behavior.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Ps {
Id, // Identifier
Mid, // MetaIdentifier
Lit, // Literal
Use, // use Use
Def, // any definition (let, const, static, struct, enum, fn, ...)
For, // for Pat in Expr Expr else Expr
Lambda0, // || Expr
Lambda, // | Pat,* | Expr
DoubleRef, // && Expr
Make, // Expr{ Expr,* }
ImplicitDo, // An implicit semicolon
End, // Produces an empty value.
Op(Op), // A normal [ast::Op]
}
/// Tries to map the incoming [Token] to a prefix [expression operator](Op)
/// and its [precedence level](Prec)
fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Do => (Ps::Op(Op::Do), Prec::Do),
TKind::Semi => (Ps::End, Prec::Body),
TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max),
TKind::Grave => (Ps::Mid, Prec::Max),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
(Ps::Lit, Prec::Max)
}
TKind::Use => (Ps::Use, Prec::Max),
TKind::Pub => (Ps::Op(Op::Pub), Prec::Body),
TKind::For => (Ps::For, Prec::Body),
TKind::Match => (Ps::Op(Op::Match), Prec::Body),
TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign),
TKind::Fn
| TKind::Mod
| TKind::Impl
| TKind::Let
| TKind::Const
| TKind::Static
| TKind::Type
| TKind::Struct
| TKind::Enum => (Ps::Def, Prec::Body),
TKind::Loop => (Ps::Op(Op::Loop), Prec::Body),
TKind::If => (Ps::Op(Op::If), Prec::Body),
TKind::While => (Ps::Op(Op::While), Prec::Body),
TKind::Break => (Ps::Op(Op::Break), Prec::Body),
TKind::Return => (Ps::Op(Op::Return), Prec::Body),
TKind::LCurly => (Ps::Op(Op::Block), Prec::Min),
TKind::RCurly => (Ps::End, Prec::Do),
TKind::LBrack => (Ps::Op(Op::Array), Prec::Tuple),
TKind::RBrack => (Ps::End, Prec::Tuple),
TKind::LParen => (Ps::Op(Op::Group), Prec::Min),
TKind::RParen => (Ps::End, Prec::Tuple),
TKind::Amp => (Ps::Op(Op::Refer), Prec::Extend),
TKind::AmpAmp => (Ps::DoubleRef, Prec::Extend),
TKind::Bang => (Ps::Op(Op::Not), Prec::Unary),
TKind::BangBang => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Bar => (Ps::Lambda, Prec::Body),
TKind::BarBar => (Ps::Lambda0, Prec::Body),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary),
TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary),
TKind::Star => (Ps::Op(Op::Deref), Prec::Unary),
TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?,
})
}
/// Tries to map the incoming [Token] to an infix [expression operator](Op)
/// and its [precedence level](Prec)
const fn from_infix(token: &Token) -> PResult<(Ps, Prec)> {
Ok(match token.kind {
TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration
TKind::In => (Ps::Op(Op::Do), Prec::Do),
TKind::Eq => (Ps::Op(Op::Set), Prec::Assign),
TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign),
TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign),
TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign),
TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign),
TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign),
TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign),
TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign),
TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign),
TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign),
TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign),
TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple),
TKind::LCurly => (Ps::Make, Prec::Make),
TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical),
TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr),
TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd),
TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare),
TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare),
TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare),
TKind::BangEq => (Ps::Op(Op::Neq), Prec::Compare),
TKind::GtEq => (Ps::Op(Op::Geq), Prec::Compare),
TKind::Gt => (Ps::Op(Op::Gt), Prec::Compare),
TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range),
TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range),
TKind::Amp => (Ps::Op(Op::And), Prec::Binary),
TKind::Xor => (Ps::Op(Op::Xor), Prec::Binary),
TKind::Bar => (Ps::Op(Op::Or), Prec::Binary),
TKind::LtLt => (Ps::Op(Op::Shl), Prec::Shift),
TKind::GtGt => (Ps::Op(Op::Shr), Prec::Shift),
TKind::Plus => (Ps::Op(Op::Add), Prec::Factor),
TKind::Minus => (Ps::Op(Op::Sub), Prec::Factor),
TKind::Star => (Ps::Op(Op::Mul), Prec::Term),
TKind::Slash => (Ps::Op(Op::Div), Prec::Term),
TKind::Rem => (Ps::Op(Op::Rem), Prec::Term),
TKind::Question => (Ps::Op(Op::Try), Prec::Unary),
TKind::Dot => (Ps::Op(Op::Dot), Prec::Project),
TKind::LParen => (Ps::Op(Op::Call), Prec::Extend),
TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend),
TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max),
TKind::As => (Ps::Op(Op::As), Prec::Max),
_ => (Ps::ImplicitDo, Prec::Do),
})
}
impl<'t> Parse<'t> for Expr {
type Prec = usize;
/// Parses an [Expr]ession.
///
/// The `level` parameter indicates the operator binding level of the expression.
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
const MIN: usize = Prec::MIN;
// TODO: in-tree doc comments
while p.next_if(TKind::Doc)?.is_ok() {}
// Prefix
let tok @ &Token { kind, span, .. } = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, span);
no_eof(move || {
let mut head = match op {
// "End" is produced when an "empty" expression is syntactically required.
// This happens when a semi or closing delimiter begins an expression.
// The token which emitted "End" cannot be consumed, as it is expected
// elsewhere.
Ps::End if level <= prec.next() => Expr::Omitted,
Ps::End => Err(ParseError::NotPrefix(kind, span))?,
Ps::Id => Expr::Id(p.parse(())?),
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
Ps::Lit => Expr::Lit(p.parse(())?),
Ps::Use => Expr::Use(p.consume().parse(())?),
Ps::Def => Expr::Bind(p.parse(None)?),
Ps::Lambda | Ps::Lambda0 => {
p.consume();
let args = if op == Ps::Lambda {
p.opt(PPrec::Tuple, TKind::Bar)?
.unwrap_or(Pat::Op(PatOp::Tuple, vec![]))
} else {
Pat::Op(PatOp::Tuple, vec![])
};
let rety = p.opt_if(PPrec::Max, TKind::Arrow)?.unwrap_or(Pat::Ignore);
Expr::Bind(Box::new(Bind(
BindOp::Fn,
vec![],
Pat::Op(PatOp::Fn, vec![args, rety]),
vec![p.parse(Prec::Body.next())?],
)))
}
Ps::For => parse_for(p, ())?,
Ps::Op(Op::Match) => parse_match(p)?,
Ps::Op(Op::Meta) => Expr::Op(
Op::Meta,
vec![
p.consume()
.expect(TKind::LBrack)?
.opt(MIN, TKind::RBrack)?
.unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)),
p.parse(level)?,
],
),
Ps::Op(Op::Block) => Expr::Op(
Op::Block,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Ps::Op(Op::Array) => parse_array(p)?,
Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
Ps::Op(op @ (Op::If | Op::While)) => {
p.consume();
let exprs = vec![
// conditional restricted to Logical operators or above
p.parse(Prec::Logical.value())?,
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => {
p.consume().parse(prec.next())?
}
_ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| {
Expr::Op(
Op::Refer,
vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)],
)
})?,
Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
_ => unimplemented!("prefix {op:?}"),
};
// Infix and Postfix
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Ps::End
{
let kind = tok.kind;
let span = span.merge(p.span());
head = match op {
// Make (structor expressions) are context-sensitive
Ps::Make => match &head {
Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make(
head.anno(span),
p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?,
))),
_ => break,
},
// As is ImplicitDo (semicolon elision)
Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?),
Ps::ImplicitDo => break,
// Allow `;` at end of file
Ps::Op(Op::Do) => head.and_do(
span,
match p.consume().peek().allow_eof()? {
Some(_) => p.parse(prec.next())?,
None => Anno(Default::default(), span),
},
),
Ps::Op(Op::Index) => Expr::Op(
Op::Index,
p.consume()
.list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?,
),
Ps::Op(Op::Call) => Expr::Op(
Op::Call,
vec![
head.anno(span),
p.consume()
.opt(0, TKind::RParen)?
.unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)),
],
),
Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op(
op,
p.consume()
.list_bare(vec![head.anno(span)], prec.next(), kind)?,
),
Ps::Op(op @ Op::Try) => {
p.consume();
Expr::Op(op, vec![head.anno(span)])
}
Ps::Op(op) => {
Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?])
}
_ => Err(ParseError::NotInfix(kind, span))?,
}
}
Ok(head)
})
}
}
/// Parses an array with 0 or more elements, or an array-repetition
fn parse_array(p: &mut Parser<'_>) -> PResult<Expr> {
if p.consume().peek()?.kind == TKind::RBrack {
p.consume();
return Ok(Expr::Op(Op::Array, vec![]));
}
let prec = Prec::Tuple;
let item = p.parse(prec.value())?;
let repeat = p.opt_if(prec.next(), TKind::Semi)?;
p.expect(TKind::RBrack)?;
Ok(match (repeat, item) {
(Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]),
(None, Anno(Expr::Op(Op::Tuple, items), _)) => Expr::Op(Op::Array, items),
(None, item) => Expr::Op(Op::Array, vec![item]),
})
}
/// Parses a `match` expression
///
/// ```ignore
/// match scrutinee {
/// (Pat => Expr),*
/// }
/// ```
fn parse_match(p: &mut Parser<'_>) -> PResult<Expr> {
let scrutinee = p.consume().parse(Prec::Logical.value())?;
let arms = p
.expect(TKind::LCurly)?
.list(vec![], Some(BindOp::Match), TKind::Comma, TKind::RCurly)?
.into_iter()
.map(|Anno(arm, span)| Anno(Expr::Bind(Box::new(arm)), span));
let expr = Expr::Op(Op::Match, iter::once(scrutinee).chain(arms).collect());
Ok(expr)
}
/// Parses and desugars a `for` loop expression
///
/// Assumes the existence of the following items:
///
/// 1. `enum<T> Option { None, Some(T) }`
/// 2. `fn T::into_iter(&mut self) -> U`
/// 3. `U::next() -> Option<V>`
fn parse_for(p: &mut Parser<'_>, _level: ()) -> PResult<Expr> {
// for Pat
let pat = p.consume().parse(PPrec::Tuple)?;
// in Expr
let iter: Anno<Expr> = p.expect(TKind::In)?.parse(Prec::Logical.next())?;
let cspan = iter.1;
// Expr
let pass: Anno<Expr> = p.parse(Prec::Body.next())?;
let pspan = pass.1;
// else Expr?
let fail = match p.next_if(TKind::Else).allow_eof()? {
Some(Ok(_)) => p.parse(Prec::Body.next())?,
_ => Expr::Op(Op::Tuple, vec![]).anno(pspan),
};
let fspan = fail.1;
/*
for `pat in `iter `pass else `fail
==>
match (`iter).into_iter() {
#iter => loop match #iter.next() {
None => break `fail,
Some(`pat) => `pass,
},
}
*/
// TODO: A better way to do this kind of substitution desugaring
// without losing span information!
Ok(Expr::Op(
Op::Match,
vec![
Expr::Op(
Op::Dot,
vec![
iter,
Expr::Op(Op::Call, vec![Expr::Id("into_iter".into()).anno(cspan)]).anno(cspan),
],
)
.anno(cspan),
Expr::Bind(Box::new(Bind(
BindOp::Match,
vec![],
Pat::Name("#iter".into()),
vec![
Expr::Op(
Op::Loop,
vec![
Expr::Op(
Op::Match,
vec![
Expr::Op(
Op::Dot,
vec![
Expr::Id("#iter".into()).anno(cspan),
Expr::Op(
Op::Call,
vec![Expr::Id("next".into()).anno(cspan)],
)
.anno(cspan),
],
)
.anno(cspan),
Expr::Bind(Box::new(Bind(
BindOp::Match,
vec![],
Pat::Name("None".into()),
vec![Expr::Op(Op::Break, vec![fail]).anno(fspan)],
)))
.anno(fspan),
Expr::Bind(Box::new(Bind(
BindOp::Match,
vec![],
Pat::NamedTuple(
"Some".into(),
Box::new(Pat::Op(PatOp::Tuple, vec![pat])),
),
vec![pass],
)))
.anno(pspan),
],
)
.anno(pspan),
],
)
.anno(pspan),
],
)))
.anno(pspan),
],
))
}
/// Returns the [BindOp], [pattern precedence](PPrec), [arrow TKind](TKind), [body precedence](Prec),
/// and [else precedence](Prec), (if applicable,) which controls the parsing of Bind expressions.
///
/// The returned expression [Prec]edences are expected to be [`Prec::next`]ed, so they may
/// be one level of precedence lower than would be intuitive (i.e. [Prec::Assign] instead of [Prec::Tuple])
#[rustfmt::skip]
#[allow(clippy::type_complexity)]
fn from_bind(p: &mut Parser<'_>) -> PResult<(BindOp, PPrec, Option<TKind>, Option<Prec>, Option<Prec>)> {
let bk = match p.peek()?.kind {
// Token Operator Pat prec Body Token Body prec Else prec
TKind::Let => (BindOp::Let, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Compare), Some(Prec::Body)),
TKind::Const => (BindOp::Const, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Assign), None),
TKind::Static => (BindOp::Static, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Assign), None),
TKind::Type => (BindOp::Type, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Project), None),
TKind::Struct => (BindOp::Struct, PPrec::Tuple, None, None, None),
TKind::Enum => (BindOp::Enum, PPrec::Tuple, None, None, None),
TKind::Fn => (BindOp::Fn, PPrec::Fn, None, Some(Prec::Body), None),
TKind::Mod => (BindOp::Mod, PPrec::Max, None, Some(Prec::Body), None),
TKind::Impl => (BindOp::Impl, PPrec::Max, None, Some(Prec::Body), None),
TKind::Bar => (BindOp::Match, PPrec::Alt, Some(TKind::FatArrow), Some(Prec::Body), None),
// no consume!
_ => return Ok((BindOp::Match, PPrec::Alt, Some(TKind::FatArrow), Some(Prec::Body), None)),
};
p.consume();
Ok(bk)
}
impl<'t> Parse<'t> for Bind {
type Prec = Option<BindOp>;
fn parse(p: &mut Parser<'t>, expected_level: Self::Prec) -> PResult<Self> {
// let
let (level, patp, arrow, bodyp, failp) = from_bind(p)?;
if let Some(expected) = expected_level
&& level != expected
{
Err(ParseError::NotMatch(level, expected, p.span()))?
}
// <T,*>
let generics = match p.next_if(TKind::Lt)? {
Ok(_) => p.list(vec![], (), TKind::Comma, TKind::Gt)?,
Err(_) => vec![],
};
// Pat
let pat = p.parse(patp)?;
let Some(bodyp) = bodyp else {
return Ok(Self(level, generics, pat, vec![]));
};
// `=>` for match, `=`? for everything else
if let Some(arrow) = arrow
&& p.next_if(arrow).allow_eof()?.is_none_or(|v| v.is_err())
{
return Ok(Self(level, generics, pat, vec![]));
}
// `=` Expr
let body = p.parse(bodyp.next())?;
let Some(failp) = failp else {
return Ok(Self(level, generics, pat, vec![body]));
};
// `else` Expr
if p.next_if(TKind::Else)
.allow_eof()?
.is_none_or(|v| v.is_err())
{
return Ok(Self(level, generics, pat, vec![body]));
}
let fail = p.parse(failp.next())?;
Ok(Self(level, generics, pat, vec![body, fail]))
}
}
impl<'t> Parse<'t> for MakeArm {
type Prec = ();
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
let name = p
.next_if(TKind::Identifier)?
.map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?;
Ok(MakeArm(
name.lexeme.string().expect("Identifier should have String"),
p.opt_if(Prec::Body.value(), TKind::Colon)?,
))
}
}

171
src/parser/pat.rs Normal file
View File

@@ -0,0 +1,171 @@
use super::{PResult, PResultExt, Parse, ParseError, Parser};
use crate::{
ast::*,
token::{TKind, Token},
};
/// Precedence levels of value and type pattern expressions.
///
/// Lower (toward [Prec::Min]) precedence levels can contain
/// all higher (toward [Prec::Max]) precedence levels.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Prec {
/// The lowest precedence
Min,
/// "Alternate" pattern: `Pat | Pat`
Alt,
/// Tuple pattern: `Pat,+`
Tuple,
/// Type annotation: `Pat : Pat`
Typed,
/// Range pattern: `Pat .. Pat`, `Pat ..= Pat`
Range,
/// Function pattern: `Pat -> Pat`
Fn,
/// The highest precedence
Max,
}
impl Prec {
/// Returns the level of precedence higher than this one
const fn next(self) -> Self {
match self {
Self::Min => Self::Alt,
Self::Alt => Self::Tuple,
Self::Tuple => Self::Typed,
Self::Typed => Self::Range,
Self::Range => Self::Fn,
Self::Fn => Self::Max,
Self::Max => Self::Max,
}
}
}
/// Tries to map the incoming Token to a [pattern operator](PatOp)
/// and its [precedence level](Prec)
fn from_infix(token: &Token) -> Option<(PatOp, Prec)> {
Some(match token.kind {
TKind::DotDot => (PatOp::RangeEx, Prec::Range),
TKind::DotDotEq => (PatOp::RangeIn, Prec::Range),
TKind::Colon => (PatOp::Typed, Prec::Typed),
TKind::Comma => (PatOp::Tuple, Prec::Tuple),
TKind::Arrow => (PatOp::Fn, Prec::Fn),
TKind::Bar => (PatOp::Alt, Prec::Alt),
_ => None?,
})
}
impl<'t> Parse<'t> for Pat {
type Prec = Prec;
fn parse(p: &mut Parser<'t>, level: Prec) -> PResult<Self> {
let tok = p.peek()?;
// Prefix
let mut head = match tok.kind {
TKind::Fn => return p.consume().parse(Prec::Fn),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
Pat::Lit(p.parse(())?)
}
TKind::Bar => p.consume().parse(level)?,
TKind::Bang => p.consume().then(Pat::Never),
TKind::Amp => Pat::Op(PatOp::Ref, vec![p.consume().parse(Prec::Max)?]),
TKind::Star => Pat::Op(PatOp::Ptr, vec![p.consume().parse(Prec::Max)?]),
TKind::Mut => Pat::Op(PatOp::Mut, vec![p.consume().parse(Prec::Max)?]),
TKind::Pub => Pat::Op(PatOp::Pub, vec![p.consume().parse(Prec::Max)?]),
TKind::AmpAmp => Pat::Op(
PatOp::Ref,
vec![Pat::Op(PatOp::Ref, vec![p.consume().parse(Prec::Max)?])],
),
TKind::Identifier => match tok.lexeme.str() {
Some("_") => p.consume().then(Pat::Ignore),
_ => {
let mut path: Path = p.parse(())?;
// TODO: make these postfix.
match p.peek().map(|t| t.kind) {
Ok(TKind::LParen) => Pat::NamedTuple(path, p.parse(Prec::Typed)?),
Ok(TKind::LCurly) if level <= Prec::Tuple.next() => Pat::NamedStruct(
path,
p.consume()
.opt(Prec::Tuple, TKind::RCurly)?
.unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))),
),
Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() {
1 => Self::Name(path.parts.pop().expect("name has 1 part")),
_ => Self::Path(path),
},
Err(e) => Err(e)?,
}
}
},
TKind::Grave => Pat::MetId(p.consume().next()?.lexeme.to_string()),
TKind::DotDot => Pat::Op(
PatOp::Rest,
// Identifier in Rest position always becomes binder
match p.consume().peek().allow_eof()?.map(Token::kind) {
Some(TKind::Identifier) => vec![Pat::Name(
p.take_lexeme()
.expect("should have lexeme")
.string()
.expect("should be string"),
)],
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![],
},
),
TKind::DotDotEq => Pat::Op(
PatOp::RangeIn,
match p.consume().peek().allow_eof()?.map(Token::kind) {
Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?],
_ => vec![],
},
),
TKind::LParen => Pat::Op(
PatOp::Tuple,
p.consume()
.list(vec![], Prec::Typed, TKind::Comma, TKind::RParen)?,
),
TKind::LBrack => parse_array_pat(p)?,
_ => Err(ParseError::NotPattern(tok.kind, tok.span))?,
};
while let Ok(Some(tok)) = p.peek().allow_eof()
&& let Some((op, prec)) = from_infix(tok)
&& level <= prec
{
let kind = tok.kind;
head = match op {
PatOp::Typed => Pat::Op(PatOp::Typed, vec![head, p.consume().parse(Prec::Max)?]),
PatOp::Fn => Pat::Op(PatOp::Fn, vec![head, p.consume().parse(Prec::Fn.next())?]),
op @ (PatOp::RangeEx | PatOp::RangeIn) => Pat::Op(
op,
match p.consume().peek().map(|t| t.kind) {
Ok(TKind::Integer | TKind::Character | TKind::Identifier) => {
vec![head, p.parse(prec.next())?]
}
_ => vec![head],
},
),
op => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?),
}
}
Ok(head)
}
}
fn parse_array_pat(p: &mut Parser<'_>) -> PResult<Pat> {
if p.consume().peek()?.kind == TKind::RBrack {
p.consume();
return Ok(Pat::Op(PatOp::Slice, vec![]));
}
let item = p.parse(Prec::Tuple)?;
let repeat = p.opt_if(Prec::Tuple, TKind::Semi)?;
p.expect(TKind::RBrack)?;
Ok(match (repeat, item) {
(Some(repeat), item) => Pat::Op(PatOp::Arrep, vec![item, repeat]),
(None, Pat::Op(PatOp::Tuple, items)) => Pat::Op(PatOp::Slice, items),
(None, item) => Pat::Op(PatOp::Slice, vec![item]),
})
}

View File

@@ -21,9 +21,9 @@ pub const fn Span(head: u32, tail: u32) -> Span {
}
impl Span {
/// Updates `self` to include all but the last byte in `other`
/// Computes the [struct@Span] containing both `self` and `other`
pub fn merge(self, other: Span) -> Span {
Span { head: self.head.min(other.head), tail: self.tail.max(other.head) }
Span { head: self.head.min(other.head), tail: self.tail.max(other.tail) }
}
}