diff --git a/src/ast.rs b/src/ast.rs index 76efe97..f5d9fbc 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -4,184 +4,32 @@ pub mod macro_matcher; pub mod visit; -/// A value with an annotation. -#[derive(Clone, PartialEq, Eq)] -pub struct Anno(pub T, pub A); - -impl std::fmt::Debug for Anno { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - ::fmt(&self.1, f)?; - f.write_str(": ")?; - ::fmt(&self.0, f) - } -} - /// An annotation: extra data added on to important AST nodes. pub trait Annotation: Clone + std::fmt::Display + std::fmt::Debug + PartialEq + Eq {} impl Annotation for T {} -/// A qualified identifier -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct FqPath { - // TODO: Identifier interning - pub parts: Vec, - // TODO: generic parameters -} +/// A value with an annotation. +#[derive(Clone, PartialEq, Eq)] +pub struct Anno(pub T, pub A); -impl From<&str> for FqPath { - fn from(value: &str) -> Self { - Self { parts: vec![value.to_owned()] } - } -} - -/// A literal value (boolean, character, integer, string) -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Literal { - /// A boolean literal: true | false - Bool(bool), - /// A character literal: 'a', '\u{1f988}' - Char(char), - /// An integer literal: 0, 123, 0x10 - Int(u128, u32), - /// A string literal: - Str(String), -} - -/// A compound import declaration -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Use { - /// "*" - Glob, - /// Identifier - Name(String), - /// Identifier :: Use - Path(String, Box), - /// { Use, * } - Tree(Vec), -} - -/// Binding patterns for each kind of matchable value. +/// Expressions: The beating heart of Dough. /// -/// This covers both patterns in Match expressions, and type annotations. -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Pat { - /// Matches anything without binding - Ignore, - /// Matches nothing, ever - Never, - /// Matches nothing; used for macro substitution - MetId(String), - /// Matches anything, and binds it to a name - Name(String), - /// Matches against a named const value - Path(FqPath), - /// Matches a Struct Expression `Ident { Pat }` - NamedStruct(FqPath, Box), - /// Matches a Tuple Struct Expression `Ident ( Pat )` - NamedTuple(FqPath, Box), - /// Matches a literal value by equality comparison - Lit(Literal), - /// Matches a compound pattern - Op(PatOp, Vec), -} - -/// Operators on lists of patterns -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum PatOp { - /// Changes the visibility mode to "public" - Pub, - /// Changes the binding mode to "mutable" - Mut, - /// Matches the dereference of a pointer (`&pat`) - Ref, - /// Matches the dereference of a raw pointer (`*pat`) - Ptr, - /// Matches a partial decomposition (`..rest`) or upper-bounded range (`..100`) - Rest, - /// Matches an exclusive bounded range (`0..100`) - RangeEx, - /// Matches an inclusive bounded range (`0..=100`) - RangeIn, - /// Matches the elements of a tuple - Tuple, - /// Matches the elements of a slice or array - Slice, - /// Matches a constant-size slice with repeating elements - Arrep, - /// Matches a type annotation or struct member - Typed, - /// Matches a function signature - Fn, - /// Matches one of a list of alternatives - Alt, -} - -/// A pattern binding -/// ```ignore -/// let Pat (= Expr (else Expr)?)? -/// const Pat (= Expr (else Expr)?)? -/// static Pat (= Expr (else Expr)?)? -/// type Pat (= Expr)? -/// struct Pat -/// enum Pat -/// fn Pat Expr -/// mod Pat Expr -/// impl Pat Expr -/// Pat => Expr // in match -/// ``` -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Bind( - pub BindKind, - pub Vec, - pub Pat, - pub Vec, A>>, -); - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum BindKind { - /// A `let Pat (= Expr (else Expr)?)?` binding - Let, - /// A `const Pat = Expr` binding - Const, - /// A `static Pat = Expr` binding - Static, - /// A type-alias binding - Type, - /// A struct definition - Struct, - /// An enum definition - Enum, - /// A `fn Pat Expr` binding - Fn, - /// A `mod Pat Expr` binding - Mod, - /// An `impl Pat Expr` binding - Impl, - /// A `Pat => Expr` binding - Match, -} - -/// A make (constructor) expression -/// ```ignore -/// Expr { (Ident (: Expr)?),* } -/// ``` -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Make(pub Anno, A>, pub Vec>); - -/// A single "arm" of a make expression -/// ```text -/// Identifier (':' Expr)? -/// ``` -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct MakeArm(pub String, pub Option, A>>); - -/// Expressions: The beating heart of Dough +/// A program in Doughlang is a single expression which, at compile time, +/// sets up the state in which a program will run. This expression binds types, +/// functions, and values to names which are exposed at runtime. +/// +/// Whereas in the body of a function, `do` sequences are ordered, in the global +/// scope (or subsequent module scopes, which are children of the global module,) +/// `do` sequences are considered unordered, and subexpressions may be reordered +/// in whichever way the compiler sees fit. This is especially important when +/// performing import resolution, as imports typically depend on the order +/// in which names are bound. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Expr { /// Omitted by semicolon insertion-elision rules Omitted, /// An identifier - Id(FqPath), + Id(Path), /// An escaped token for macro binding MetId(String), /// A literal bool, string, char, or int @@ -197,6 +45,18 @@ pub enum Expr { Op(Op, Vec>), } +/// Doughlang's AST is partitioned by data representation, so it +/// considers any expression which is composed solely of keywords, +/// symbols, and other expressions as operator expressions. +/// +/// This includes: +/// - Do-sequence expressions: `Expr ; Expr ` +/// - Type-cast expressions `Expr as Expr` +/// - Binding-modifier expressions: `pub Expr`, `#[Expr] Expr` +/// - Block and Group expressions: `{Expr?}`, `(Expr?)` +/// - Control flow: `if`, `while`, `loop`, `match`, `break`, `return` +/// - Function calls `Expr (Expr,*)` +/// - Traditional binary and unary operators (add, sub, neg, assign) #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Op { // -- true operators @@ -270,6 +130,170 @@ pub enum Op { XorSet, // Expr ^= Expr OrSet, // Expr |= Expr } +/// A qualified identifier +/// +/// TODO: qualify identifier +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Path { + // TODO: Identifier interning + pub parts: Vec, + // TODO: generic parameters +} + +/// A literal value (boolean, character, integer, string) +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Literal { + /// A boolean literal: true | false + Bool(bool), + /// A character literal: 'a', '\u{1f988}' + Char(char), + /// An integer literal: 0, 123, 0x10 + Int(u128, u32), + /// A string literal: + Str(String), +} + +/// A compound import declaration +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Use { + /// "*" + Glob, + /// Identifier + Name(String), + /// Identifier :: Use + Path(String, Box), + /// { Use, * } + Tree(Vec), +} + +/// A pattern binding +/// ```ignore +/// let Pat (= Expr (else Expr)?)? +/// const Pat (= Expr (else Expr)?)? +/// static Pat (= Expr (else Expr)?)? +/// type Pat (= Expr)? +/// struct Pat +/// enum Pat +/// fn Pat Expr +/// mod Pat Expr +/// impl Pat Expr +/// Pat => Expr // in match +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Bind( + pub BindOp, + pub Vec, + pub Pat, + pub Vec, A>>, +); + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BindOp { + /// A `let Pat (= Expr (else Expr)?)?` binding + Let, + /// A `const Pat = Expr` binding + Const, + /// A `static Pat = Expr` binding + Static, + /// A type-alias binding + Type, + /// A `fn Pat Expr` binding + Fn, + /// A `mod Pat Expr` binding + Mod, + /// An `impl Pat Expr` binding + Impl, + /// A struct definition + Struct, + /// An enum definition + Enum, + /// A `Pat => Expr` binding + Match, +} + +/// A make (constructor) expression +/// ```ignore +/// Expr { (Ident (: Expr)?),* } +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Make(pub Anno, A>, pub Vec>); + +/// A single "arm" of a make expression +/// ```text +/// Identifier (':' Expr)? +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct MakeArm(pub String, pub Option, A>>); + +/// Binding patterns for each kind of matchable value. +/// +/// This covers both patterns in Match expressions, and type annotations. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Pat { + /// Matches anything without binding + Ignore, + /// Matches nothing, ever + Never, + /// Matches nothing; used for macro substitution + MetId(String), + /// Matches anything, and binds it to a name + Name(String), + /// Matches against a named const value + Path(Path), + /// Matches a Struct Expression `Ident { Pat }` + NamedStruct(Path, Box), + /// Matches a Tuple Struct Expression `Ident ( Pat )` + NamedTuple(Path, Box), + /// Matches a literal value by equality comparison + Lit(Literal), + /// Matches a compound pattern + Op(PatOp, Vec), +} + +/// Operators on lists of patterns +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum PatOp { + /// Changes the visibility mode to "public" + Pub, + /// Changes the binding mode to "mutable" + Mut, + /// Matches the dereference of a pointer (`&pat`) + Ref, + /// Matches the dereference of a raw pointer (`*pat`) + Ptr, + /// Matches a partial decomposition (`..rest`) or upper-bounded range (`..100`) + Rest, + /// Matches an exclusive bounded range (`0..100`) + RangeEx, + /// Matches an inclusive bounded range (`0..=100`) + RangeIn, + /// Matches the elements of a tuple + Tuple, + /// Matches the elements of a slice or array + Slice, + /// Matches a constant-size slice with repeating elements + Arrep, + /// Matches a type annotation or struct member + Typed, + /// Matches a function signature + Fn, + /// Matches one of a list of alternatives + Alt, +} + +impl std::fmt::Debug for Anno { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + ::fmt(&self.1, f)?; + f.write_str(": ")?; + ::fmt(&self.0, f) + } +} + +impl From<&str> for Path { + fn from(value: &str) -> Self { + Self { parts: vec![value.to_owned()] } + } +} impl Default for Expr { fn default() -> Self { @@ -313,106 +337,12 @@ impl Expr { use crate::{fmt::FmtAdapter, span::Span}; use std::{fmt::Display, format_args as fmt}; -impl Display for Literal { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Bool(v) => v.fmt(f), - Self::Char(c) => write!(f, "'{}'", c.escape_debug()), - Self::Int(i, 2) => write!(f, "0b{i:b}"), - Self::Int(i, 8) => write!(f, "0o{i:o}"), - Self::Int(i, 16) => write!(f, "0x{i:x}"), - Self::Int(i, _) => i.fmt(f), - Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()), - } - } -} - -impl Display for FqPath { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self { parts } = self; - f.list(parts, "::") - } -} - impl Display for Anno { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.0) } } -impl Display for Use { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Glob => "*".fmt(f), - Self::Name(name) => name.fmt(f), - Self::Path(segment, rest) => write!(f, "{segment}::{rest}"), - Self::Tree(items) => match items.len() { - 0 => "{}".fmt(f), - 1..=3 => f.delimit("{ ", " }").list(items, ", "), - _ => f - .delimit_indented("{", "}") - .list_wrap("\n", items, ",\n", ",\n"), - }, - } - } -} - -impl Display for Bind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self(op, gens, pat, exprs) = self; - op.fmt(f)?; - if !gens.is_empty() { - f.delimit("<", "> ").list(gens, ", ")?; - } - - match op { - BindKind::Match => f.delimit(fmt!("{pat} => "), "").list(exprs, ",!? "), - BindKind::Fn | BindKind::Mod | BindKind::Impl => { - f.delimit(fmt!("{pat} "), "").list(exprs, ",!? ") - } - BindKind::Struct | BindKind::Enum => match pat { - Pat::NamedStruct(name, bind) => match bind.as_ref() { - Pat::Op(PatOp::Tuple, parts) => f - .delimit_indented(fmt!("{name} {{"), "}") - .list_wrap("\n", parts, ",\n", ",\n"), - other => write!(f, "{name} {{ {other} }}"), - }, - _ => pat.fmt(f), - }, - _ => match exprs.as_slice() { - [] => write!(f, "{pat}"), - [value] => write!(f, "{pat} = {value}"), - [value, fail] => write!(f, "{pat} = {value} else {fail}"), - other => f.delimit(fmt!("{pat} ("), ")").list(other, ", "), - }, - } - } -} - -impl Display for BindKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - Self::Let => "let ", - Self::Const => "const ", - Self::Static => "static ", - Self::Type => "type ", - Self::Struct => "struct ", - Self::Enum => "enum ", - Self::Fn => "fn ", - Self::Mod => "mod ", - Self::Impl => "impl ", - Self::Match => "", - }) - } -} - -impl Display for Make { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self(expr, make_arms) = self; - f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ") - } -} - impl Display for Expr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -534,6 +464,100 @@ impl Display for Op { } } +impl Display for Path { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { parts } = self; + f.list(parts, "::") + } +} + +impl Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Bool(v) => v.fmt(f), + Self::Char(c) => write!(f, "'{}'", c.escape_debug()), + Self::Int(i, 2) => write!(f, "0b{i:b}"), + Self::Int(i, 8) => write!(f, "0o{i:o}"), + Self::Int(i, 16) => write!(f, "0x{i:x}"), + Self::Int(i, _) => i.fmt(f), + Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()), + } + } +} + +impl Display for Use { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Glob => "*".fmt(f), + Self::Name(name) => name.fmt(f), + Self::Path(segment, rest) => write!(f, "{segment}::{rest}"), + Self::Tree(items) => match items.len() { + 0 => "{}".fmt(f), + 1..=3 => f.delimit("{ ", " }").list(items, ", "), + _ => f + .delimit_indented("{", "}") + .list_wrap("\n", items, ",\n", ",\n"), + }, + } + } +} + +impl Display for Bind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self(op, gens, pat, exprs) = self; + op.fmt(f)?; + if !gens.is_empty() { + f.delimit("<", "> ").list(gens, ", ")?; + } + + match op { + BindOp::Match => f.delimit(fmt!("{pat} => "), "").list(exprs, ",!? "), + BindOp::Fn | BindOp::Mod | BindOp::Impl => { + f.delimit(fmt!("{pat} "), "").list(exprs, ",!? ") + } + BindOp::Struct | BindOp::Enum => match pat { + Pat::NamedStruct(name, bind) => match bind.as_ref() { + Pat::Op(PatOp::Tuple, parts) => f + .delimit_indented(fmt!("{name} {{"), "}") + .list_wrap("\n", parts, ",\n", ",\n"), + other => write!(f, "{name} {{ {other} }}"), + }, + _ => pat.fmt(f), + }, + _ => match exprs.as_slice() { + [] => write!(f, "{pat}"), + [value] => write!(f, "{pat} = {value}"), + [value, fail] => write!(f, "{pat} = {value} else {fail}"), + other => f.delimit(fmt!("{pat} ("), ")").list(other, ", "), + }, + } + } +} + +impl Display for BindOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + Self::Let => "let ", + Self::Const => "const ", + Self::Static => "static ", + Self::Type => "type ", + Self::Struct => "struct ", + Self::Enum => "enum ", + Self::Fn => "fn ", + Self::Mod => "mod ", + Self::Impl => "impl ", + Self::Match => "", + }) + } +} + +impl Display for Make { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self(expr, make_arms) = self; + f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ") + } +} + impl Display for MakeArm { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -598,7 +622,7 @@ impl TryFrom> for Pat { fn try_from(value: Expr) -> Result { Ok(match value { - Expr::Id(FqPath { mut parts }) if parts.len() == 1 => { + Expr::Id(Path { mut parts }) if parts.len() == 1 => { match parts.pop().expect("parts should have len 1") { ig if ig == "_" => Self::Ignore, name => Self::Name(name), diff --git a/src/ast/visit.rs b/src/ast/visit.rs index 5fb968b..ebdb91a 100644 --- a/src/ast/visit.rs +++ b/src/ast/visit.rs @@ -12,7 +12,7 @@ pub trait Visit<'a> { fn visit_ident(&mut self, name: &'a str) -> Result<(), Self::Error> { name.children(self) } - fn visit_path(&mut self, path: &'a FqPath) -> Result<(), Self::Error> { + fn visit_path(&mut self, path: &'a Path) -> Result<(), Self::Error> { path.children(self) } fn visit_literal(&mut self, lit: &'a Literal) -> Result<(), Self::Error> { @@ -52,7 +52,7 @@ impl<'a> Walk<'a> for str { } } -impl<'a> Walk<'a> for FqPath { +impl<'a> Walk<'a> for Path { fn visit_in + ?Sized>(&'a self, v: &mut V) -> Result<(), V::Error> { v.visit_path(self) } diff --git a/src/main.rs b/src/main.rs index ef46cd0..81c8dbe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ //! Tests the lexer use doughlang::{ ast::{Anno, Pat}, - parser::PPrec, + parser::pat::Prec as PPrec, }; #[allow(unused_imports)] use doughlang::{ diff --git a/src/parser.rs b/src/parser.rs index 7d1422c..986f763 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -6,74 +6,21 @@ use crate::{ span::Span, token::{Lexeme, TKind, Token}, }; -use std::{error::Error, fmt::Display, iter, vec}; -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum ParseError { - /// Reached the expected end of input. - EOF(Span), - /// Unexpectedly reached end of input. - UnexpectedEOF(Span), - FromLexer(LexError), - Expected(TKind, TKind, Span), - NotLiteral(TKind, Span), - NotUse(TKind, Span), - NotPattern(TKind, Span), - NotType(TKind, Span), - NotPrefix(TKind, Span), - NotInfix(TKind, Span), - NotPostfix(TKind, Span), +pub trait Parse<'t> { + type Prec: Copy; + + fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult + where Self: Sized; } -pub use ParseError::EOF; +pub mod expr; +pub mod pat; -impl Error for ParseError {} -impl Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::EOF(loc) => write!(f, "{loc}: Reached end of input."), - Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."), - Self::FromLexer(e) => e.fmt(f), - Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."), - Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."), - Self::NotUse(tk, loc) => write!(f, "{loc}: {tk:?} is no use!"), - Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."), - Self::NotType(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a type."), - Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."), - Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."), - Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."), - } - } -} - -pub type PResult = Result; - -trait PResultExt { - fn no_eof(self) -> PResult; - fn allow_eof(self) -> PResult>; -} - -impl PResultExt for PResult { - fn no_eof(self) -> Self { - match self { - Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)), - other => other, - } - } - fn allow_eof(self) -> PResult> { - match self { - Ok(t) => Ok(Some(t)), - Err(ParseError::EOF(_)) => Ok(None), - Err(e) => Err(e), - } - } -} - -/// Opens a scope where [`ParseError::EOF`] is unexpected (See [`PResultExt::no_eof`]) -fn no_eof(f: impl FnOnce() -> PResult) -> PResult { - f().no_eof() -} +pub mod error; +pub use error::{EOF, PResult, PResultExt, ParseError, no_eof}; +/// Handles stateful extraction from a [Lexer], with single-[Token] lookahead. #[derive(Debug)] pub struct Parser<'t> { pub lexer: Lexer<'t>, @@ -93,6 +40,7 @@ impl<'t> Parser<'t> { t } + /// Gets the [struct@Span] of the last-consumed [Token] pub const fn span(&self) -> Span { self.last_loc } @@ -116,11 +64,6 @@ impl<'t> Parser<'t> { }, }; let next_tok = self.next_tok.insert(next_tok); - - if let Ok(tok) = next_tok { - self.last_loc = tok.span; - } - next_tok.as_ref().map_err(|e| *e) } @@ -139,7 +82,12 @@ impl<'t> Parser<'t> { .next_tok .take() .unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc))); - self.elide_do = matches!(tok, Ok(Token { kind: TKind::RCurly | TKind::Semi, .. })); + + if let Ok(tok) = &tok { + self.last_loc = tok.span; + self.elide_do = matches!(tok.kind, TKind::RCurly | TKind::Semi) + } + tok } @@ -216,7 +164,7 @@ impl<'t> Parser<'t> { }) } - /// Parses a P unless the next token is `end` + /// Parses a P unless the next [Token]'s [TKind] is `end` pub fn opt>(&mut self, level: P::Prec, end: TKind) -> PResult> { let out = match self.peek_if(end)? { None => Some(self.parse(level).no_eof()?), @@ -226,6 +174,7 @@ impl<'t> Parser<'t> { Ok(out) } + /// Ensures the next [Token]'s [TKind] is `next` pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> { self.next_if(next)? .map_err(|tk| ParseError::Expected(next, tk, self.span()))?; @@ -239,14 +188,7 @@ impl<'t> Parser<'t> { } } -pub trait Parse<'t> { - type Prec: Copy; - - fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult - where Self: Sized; -} - -impl<'t> Parse<'t> for FqPath { +impl<'t> Parse<'t> for Path { type Prec = (); fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { @@ -261,7 +203,7 @@ impl<'t> Parse<'t> for FqPath { } } - Ok(FqPath { parts }) + Ok(Path { parts }) } } @@ -273,7 +215,7 @@ impl<'t> Parse<'t> for Literal { TKind::True => p.consume().then(Literal::Bool(true)), TKind::False => p.consume().then(Literal::Bool(false)), TKind::Character | TKind::Integer | TKind::String => { - match p.take().expect("should have Token").lexeme { + match p.take().expect("should have Token after peek").lexeme { Lexeme::String(str) => Literal::Str(str), Lexeme::Integer(int, base) => Literal::Int(int, base), Lexeme::Char(chr) => Literal::Char(chr), @@ -304,734 +246,6 @@ impl<'t> Parse<'t> for Use { } } -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum PPrec { - Min, - Alt, - Tuple, - Typed, - Range, - Fn, - Max, -} - -impl PPrec { - const fn next(self) -> Self { - match self { - Self::Min => Self::Alt, - Self::Alt => Self::Tuple, - Self::Tuple => Self::Typed, - Self::Typed => Self::Range, - Self::Range => Self::Fn, - Self::Fn => Self::Max, - Self::Max => Self::Max, - } - } -} - -enum PatPs { - Op(PatOp), -} - -fn pat_from_infix(token: &Token) -> Option<(PatPs, PPrec)> { - Some(match token.kind { - TKind::DotDot => (PatPs::Op(PatOp::RangeEx), PPrec::Range), - TKind::DotDotEq => (PatPs::Op(PatOp::RangeIn), PPrec::Range), - TKind::Colon => (PatPs::Op(PatOp::Typed), PPrec::Typed), - TKind::Comma => (PatPs::Op(PatOp::Tuple), PPrec::Tuple), - TKind::Arrow => (PatPs::Op(PatOp::Fn), PPrec::Fn), - TKind::Bar => (PatPs::Op(PatOp::Alt), PPrec::Alt), - _ => None?, - }) -} - -impl<'t> Parse<'t> for Pat { - type Prec = PPrec; - - fn parse(p: &mut Parser<'t>, level: PPrec) -> PResult { - let tok = p.peek()?; - - // Prefix - let mut head = match tok.kind { - TKind::Fn => return p.consume().parse(PPrec::Fn), - TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { - Pat::Lit(p.parse(())?) - } - TKind::Bar => p.consume().parse(level)?, - TKind::Bang => p.consume().then(Pat::Never), - TKind::Amp => Pat::Op(PatOp::Ref, vec![p.consume().parse(PPrec::Max)?]), - TKind::Star => Pat::Op(PatOp::Ptr, vec![p.consume().parse(PPrec::Max)?]), - TKind::Mut => Pat::Op(PatOp::Mut, vec![p.consume().parse(PPrec::Max)?]), - TKind::Pub => Pat::Op(PatOp::Pub, vec![p.consume().parse(PPrec::Max)?]), - TKind::AmpAmp => Pat::Op( - PatOp::Ref, - vec![Pat::Op(PatOp::Ref, vec![p.consume().parse(PPrec::Max)?])], - ), - TKind::Identifier => match tok.lexeme.str() { - Some("_") => p.consume().then(Pat::Ignore), - _ => { - let mut path: FqPath = p.parse(())?; - // TODO: make these postfix. - match p.peek().map(|t| t.kind) { - Ok(TKind::LParen) => Pat::NamedTuple(path, p.parse(PPrec::Typed)?), - Ok(TKind::LCurly) if level <= PPrec::Tuple.next() => Pat::NamedStruct( - path, - p.consume() - .opt(PPrec::Tuple, TKind::RCurly)? - .unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))), - ), - Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() { - 1 => Self::Name(path.parts.pop().expect("name has 1 part")), - _ => Self::Path(path), - }, - Err(e) => Err(e)?, - } - } - }, - TKind::Grave => Pat::MetId(p.consume().next()?.lexeme.to_string()), - TKind::DotDot => Pat::Op( - PatOp::Rest, - // Identifier in Rest position always becomes binder - match p.consume().peek().allow_eof()?.map(Token::kind) { - Some(TKind::Identifier) => vec![Pat::Name( - p.take_lexeme() - .expect("should have lexeme") - .string() - .expect("should be string"), - )], - Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?], - _ => vec![], - }, - ), - TKind::DotDotEq => Pat::Op( - PatOp::RangeIn, - match p.consume().peek().allow_eof()?.map(Token::kind) { - Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?], - _ => vec![], - }, - ), - TKind::LParen => Pat::Op( - PatOp::Tuple, - p.consume() - .list(vec![], PPrec::Typed, TKind::Comma, TKind::RParen)?, - ), - TKind::LBrack => parse_array_pat(p)?, - _ => Err(ParseError::NotPattern(tok.kind, tok.span))?, - }; - - while let Ok(Some(tok)) = p.peek().allow_eof() - && let Some((op, prec)) = pat_from_infix(tok) - && level <= prec - { - let kind = tok.kind; - head = match op { - PatPs::Op(PatOp::Typed) => { - Pat::Op(PatOp::Typed, vec![head, p.consume().parse(PPrec::Max)?]) - } - PatPs::Op(PatOp::Fn) => { - Pat::Op(PatOp::Fn, vec![head, p.consume().parse(PPrec::Fn.next())?]) - } - PatPs::Op(op @ (PatOp::RangeEx | PatOp::RangeIn)) => Pat::Op( - op, - match p.consume().peek().map(|t| t.kind) { - Ok(TKind::Integer | TKind::Character | TKind::Identifier) => { - vec![head, p.parse(prec.next())?] - } - _ => vec![head], - }, - ), - PatPs::Op(op) => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?), - } - } - Ok(head) - } -} - -fn parse_array_pat(p: &mut Parser<'_>) -> PResult { - if p.consume().peek()?.kind == TKind::RBrack { - p.consume(); - return Ok(Pat::Op(PatOp::Slice, vec![])); - } - - let item = p.parse(PPrec::Tuple)?; - let repeat = p.opt_if(PPrec::Tuple, TKind::Semi)?; - p.expect(TKind::RBrack)?; - - Ok(match (repeat, item) { - (Some(repeat), item) => Pat::Op(PatOp::Arrep, vec![item, repeat]), - (None, Pat::Op(PatOp::Tuple, items)) => Pat::Op(PatOp::Slice, items), - (None, item) => Pat::Op(PatOp::Slice, vec![item]), - }) -} - -/// Organizes the precedence hierarchy for syntactic elements -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum Prec { - Min, - /// The Semicolon Operator gets its own precedence level - Do, - /// An assignment - Assign, - /// Constructor for a tuple - Tuple, - /// The body of a function, conditional, etc. - Body, - /// Constructor for a struct - Make, - /// The conditional of an `if` or `while` (which is really an `if`) - Logical, - /// The short-circuiting "boolean or" operator - LogOr, - /// The short-circuiting "boolean and" operator - LogAnd, - /// Value comparison operators - Compare, - /// Constructor for a Range - Range, - /// Binary/bitwise operators - Binary, - /// Bit-shifting operators - Shift, - /// Addition and Subtraction operators - Factor, - /// Multiplication, Division, and Remainder operators - Term, - /// Negation, (De)reference, Try - Unary, - /// Place-projection operators - Project, - /// Array/Call subscripting and reference - Extend, - Max, -} - -impl Prec { - pub const MIN: usize = Prec::Min.value(); - - pub const fn value(self) -> usize { - self as usize * 2 - } - - pub const fn prev(self) -> usize { - match self { - Self::Assign => self.value() + 1, - _ => self.value(), - } - } - - pub const fn next(self) -> usize { - match self { - Self::Assign => self.value(), - _ => self.value() + 1, - } - } -} - -/// `PseudoOperator`: fake operators used to give certain tokens special behavior. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Ps { - Id, // Identifier - Mid, // MetaIdentifier - Lit, // Literal - Use, // use Use - Def, // any definition (let, const, static, struct, enum, fn, ...) - For, // for Pat in Expr Expr else Expr - Lambda0, // || Expr - Lambda, // | Pat,* | Expr - DoubleRef, // && Expr - Make, // Expr{ Expr,* } - ImplicitDo, // An implicit semicolon - End, // Produces an empty value. - Op(Op), // A normal [ast::Op] -} - -fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { - Ok(match token.kind { - TKind::Do => (Ps::Op(Op::Do), Prec::Do), - TKind::Semi => (Ps::End, Prec::Body), - - TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max), - TKind::Grave => (Ps::Mid, Prec::Max), - TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { - (Ps::Lit, Prec::Max) - } - TKind::Use => (Ps::Use, Prec::Max), - - TKind::Pub => (Ps::Op(Op::Pub), Prec::Body), - TKind::For => (Ps::For, Prec::Body), - TKind::Match => (Ps::Op(Op::Match), Prec::Body), - TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign), - - TKind::Fn - | TKind::Mod - | TKind::Impl - | TKind::Let - | TKind::Const - | TKind::Static - | TKind::Type - | TKind::Struct - | TKind::Enum => (Ps::Def, Prec::Body), - - TKind::Loop => (Ps::Op(Op::Loop), Prec::Body), - TKind::If => (Ps::Op(Op::If), Prec::Body), - TKind::While => (Ps::Op(Op::While), Prec::Body), - TKind::Break => (Ps::Op(Op::Break), Prec::Body), - TKind::Return => (Ps::Op(Op::Return), Prec::Body), - - TKind::LCurly => (Ps::Op(Op::Block), Prec::Min), - TKind::RCurly => (Ps::End, Prec::Do), - TKind::LBrack => (Ps::Op(Op::Array), Prec::Tuple), - TKind::RBrack => (Ps::End, Prec::Tuple), - TKind::LParen => (Ps::Op(Op::Group), Prec::Min), - TKind::RParen => (Ps::End, Prec::Tuple), - TKind::Amp => (Ps::Op(Op::Refer), Prec::Extend), - TKind::AmpAmp => (Ps::DoubleRef, Prec::Extend), - TKind::Bang => (Ps::Op(Op::Not), Prec::Unary), - TKind::BangBang => (Ps::Op(Op::Identity), Prec::Unary), - TKind::Bar => (Ps::Lambda, Prec::Body), - TKind::BarBar => (Ps::Lambda0, Prec::Body), - TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range), - TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range), - TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary), - TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary), - TKind::Star => (Ps::Op(Op::Deref), Prec::Unary), - TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary), - - kind => Err(ParseError::NotPrefix(kind, token.span))?, - }) -} - -const fn from_infix(token: &Token) -> PResult<(Ps, Prec)> { - Ok(match token.kind { - TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration - TKind::In => (Ps::Op(Op::Do), Prec::Do), - - TKind::Eq => (Ps::Op(Op::Set), Prec::Assign), - TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign), - TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign), - TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign), - TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign), - TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign), - TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign), - TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign), - TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign), - TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign), - TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign), - TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple), - TKind::LCurly => (Ps::Make, Prec::Make), - TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical), - TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr), - TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd), - TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare), - TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare), - TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare), - TKind::BangEq => (Ps::Op(Op::Neq), Prec::Compare), - TKind::GtEq => (Ps::Op(Op::Geq), Prec::Compare), - TKind::Gt => (Ps::Op(Op::Gt), Prec::Compare), - TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range), - TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range), - TKind::Amp => (Ps::Op(Op::And), Prec::Binary), - TKind::Xor => (Ps::Op(Op::Xor), Prec::Binary), - TKind::Bar => (Ps::Op(Op::Or), Prec::Binary), - TKind::LtLt => (Ps::Op(Op::Shl), Prec::Shift), - TKind::GtGt => (Ps::Op(Op::Shr), Prec::Shift), - TKind::Plus => (Ps::Op(Op::Add), Prec::Factor), - TKind::Minus => (Ps::Op(Op::Sub), Prec::Factor), - TKind::Star => (Ps::Op(Op::Mul), Prec::Term), - TKind::Slash => (Ps::Op(Op::Div), Prec::Term), - TKind::Rem => (Ps::Op(Op::Rem), Prec::Term), - - TKind::Question => (Ps::Op(Op::Try), Prec::Unary), - TKind::Dot => (Ps::Op(Op::Dot), Prec::Project), - TKind::LParen => (Ps::Op(Op::Call), Prec::Extend), - TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend), - - TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max), - TKind::As => (Ps::Op(Op::As), Prec::Max), - _ => (Ps::ImplicitDo, Prec::Do), - }) -} - -impl<'t> Parse<'t> for BindKind { - type Prec = (); - - fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { - let bk = match p.peek()?.kind { - TKind::Let => BindKind::Let, - TKind::Const => BindKind::Const, - TKind::Static => BindKind::Static, - TKind::Type => BindKind::Type, - TKind::Struct => BindKind::Struct, - TKind::Enum => BindKind::Enum, - TKind::Fn => BindKind::Fn, - TKind::Mod => BindKind::Mod, - TKind::Impl => BindKind::Impl, - TKind::Bar => BindKind::Match, - // no consume! - _ => return Ok(BindKind::Match), - }; - p.consume(); - Ok(bk) - } -} - -impl<'t> Parse<'t> for Bind { - type Prec = (); - - fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult { - let level = p.parse(())?; - let generics = match p.next_if(TKind::Lt)? { - Ok(_) => p.list(vec![], (), TKind::Comma, TKind::Gt)?, - Err(_) => vec![], - }; - - match level { - BindKind::Match => { - // |? Pat => Expr - Ok(Self( - level, - generics, - p.parse(PPrec::Alt)?, - vec![p.expect(TKind::FatArrow)?.parse(Prec::Body.next())?], - )) - } - BindKind::Mod | BindKind::Impl => Ok(Self( - level, - generics, - p.parse(PPrec::Max)?, - vec![p.parse(Prec::Body.next())?], - )), - BindKind::Fn => Ok(Self( - level, - generics, - p.parse(PPrec::Fn)?, - vec![p.parse(Prec::Body.next())?], - )), - _ => { - // let Pat - let pat = p.parse(PPrec::Tuple)?; - if p.next_if(TKind::Eq).allow_eof()?.is_none_or(|v| v.is_err()) { - return Ok(Self(level, generics, pat, vec![])); - } - - // = Expr - let body = p.parse(Prec::Tuple.value())?; - if p.next_if(TKind::Else) - .allow_eof()? - .is_none_or(|v| v.is_err()) - { - return Ok(Self(level, generics, pat, vec![body])); - } - - // else Expr - Ok(Self( - level, - generics, - pat, - vec![body, p.parse(Prec::Body.next())?], - )) - } - } - } -} - -impl<'t> Parse<'t> for MakeArm { - type Prec = (); - - fn parse(p: &mut Parser<'t>, _level: ()) -> PResult { - let name = p - .next_if(TKind::Identifier)? - .map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?; - Ok(MakeArm( - name.lexeme.string().expect("Identifier should have String"), - p.opt_if(Prec::Body.value(), TKind::Colon)?, - )) - } -} - -impl<'t> Parse<'t> for Expr { - type Prec = usize; - - /// Parses an [Expr]ession. - /// - /// The `level` parameter indicates the operator binding level of the expression. - fn parse(p: &mut Parser<'t>, level: usize) -> PResult { - const MIN: usize = Prec::MIN; - - // TODO: in-tree doc comments - while p.next_if(TKind::Doc)?.is_ok() {} - - // Prefix - let tok @ &Token { kind, span, .. } = p.peek()?; - let ((op, prec), span) = (from_prefix(tok)?, span); - no_eof(move || { - let mut head = match op { - // "End" is produced when an "empty" expression is syntactically required. - // This happens when a semi or closing delimiter begins an expression. - // The token which emitted "End" cannot be consumed, as it is expected elsewhere. - Ps::End if level <= prec.next() => Expr::Omitted, - Ps::End => Err(ParseError::NotPrefix(kind, span))?, - - Ps::Id => Expr::Id(p.parse(())?), - Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()), - Ps::Lit => Expr::Lit(p.parse(())?), - Ps::Use => Expr::Use(p.consume().parse(())?), - Ps::Def => Expr::Bind(p.parse(())?), - Ps::Lambda | Ps::Lambda0 => { - p.consume(); - - let args = if op == Ps::Lambda { - p.opt(PPrec::Tuple, TKind::Bar)? - .unwrap_or(Pat::Op(PatOp::Tuple, vec![])) - } else { - Pat::Op(PatOp::Tuple, vec![]) - }; - - let rety = p.opt_if(PPrec::Max, TKind::Arrow)?.unwrap_or(Pat::Ignore); - - Expr::Bind(Box::new(Bind( - BindKind::Fn, - vec![], - Pat::Op(PatOp::Fn, vec![args, rety]), - vec![p.parse(Prec::Body.next())?], - ))) - } - Ps::For => parse_for(p, ())?, - Ps::Op(Op::Match) => parse_match(p)?, - Ps::Op(Op::Meta) => Expr::Op( - Op::Meta, - vec![ - p.consume() - .expect(TKind::LBrack)? - .opt(MIN, TKind::RBrack)? - .unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)), - p.parse(level)?, - ], - ), - Ps::Op(Op::Block) => Expr::Op( - Op::Block, - p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(), - ), - Ps::Op(Op::Array) => parse_array(p)?, - Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? { - Some(value) => Expr::Op(Op::Group, vec![value]), - None => Expr::Op(Op::Tuple, vec![]), - }, - Ps::Op(op @ (Op::If | Op::While)) => { - p.consume(); - let exprs = vec![ - // conditional restricted to Logical operators or above - p.parse(Prec::Logical.value())?, - p.parse(prec.next())?, - match p.peek() { - Ok(Token { kind: TKind::Else, .. }) => { - p.consume().parse(prec.next())? - } - _ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())), - }, - ]; - Expr::Op(op, exprs) - } - Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| { - Expr::Op( - Op::Refer, - vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)], - ) - })?, - - Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]), - _ => unimplemented!("prefix {op:?}"), - }; - - // Infix and Postfix - while let Ok(Some(tok)) = p.peek().allow_eof() - && let Ok((op, prec)) = from_infix(tok) - && level <= prec.prev() - && op != Ps::End - { - let kind = tok.kind; - let span = span.merge(p.span()); - - head = match op { - // Make (structor expressions) are context-sensitive - Ps::Make => match &head { - Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make( - head.anno(span), - p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?, - ))), - _ => break, - }, - // As is ImplicitDo (semicolon elision) - Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?), - Ps::ImplicitDo => break, - // Allow `;` at end of file - Ps::Op(Op::Do) => head.and_do( - span, - match p.consume().peek().allow_eof()? { - Some(_) => p.parse(prec.next())?, - None => Anno(Default::default(), span), - }, - ), - Ps::Op(Op::Index) => Expr::Op( - Op::Index, - p.consume() - .list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?, - ), - Ps::Op(Op::Call) => Expr::Op( - Op::Call, - vec![ - head.anno(span), - p.consume() - .opt(0, TKind::RParen)? - .unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)), - ], - ), - Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op( - op, - p.consume() - .list_bare(vec![head.anno(span)], prec.next(), kind)?, - ), - Ps::Op(op @ Op::Try) => { - p.consume(); - Expr::Op(op, vec![head.anno(span)]) - } - Ps::Op(op) => { - Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]) - } - _ => Err(ParseError::NotInfix(kind, span))?, - } - } - - Ok(head) - }) - } -} - -/// Parses an array with 0 or more elements, or an array-repetition -fn parse_array(p: &mut Parser<'_>) -> PResult { - if p.consume().peek()?.kind == TKind::RBrack { - p.consume(); - return Ok(Expr::Op(Op::Array, vec![])); - } - - let prec = Prec::Tuple; - let item = p.parse(prec.value())?; - let repeat = p.opt_if(prec.next(), TKind::Semi)?; - p.expect(TKind::RBrack)?; - - Ok(match (repeat, item) { - (Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]), - (None, Anno(Expr::Op(Op::Tuple, items), _)) => Expr::Op(Op::Array, items), - (None, item) => Expr::Op(Op::Array, vec![item]), - }) -} - -fn parse_match(p: &mut Parser<'_>) -> PResult { - let scrutinee = p.consume().parse(Prec::Logical.value())?; - - let arms = p - .expect(TKind::LCurly)? - .list(vec![], (), TKind::Comma, TKind::RCurly)? - .into_iter() - .map(|Anno(arm, span)| Anno(Expr::Bind(Box::new(arm)), span)); - - let expr = Expr::Op(Op::Match, iter::once(scrutinee).chain(arms).collect()); - - Ok(expr) -} - -fn parse_for(p: &mut Parser<'_>, _level: ()) -> PResult { - // for Pat - let pat = p.consume().parse(PPrec::Tuple)?; - // in Expr - let iter: Anno = p.expect(TKind::In)?.parse(Prec::Logical.next())?; - let cspan = iter.1; - // Expr - let pass: Anno = p.parse(Prec::Body.next())?; - let pspan = pass.1; - // else Expr? - let fail = match p.next_if(TKind::Else).allow_eof()? { - Some(Ok(_)) => p.parse(Prec::Body.next())?, - _ => Expr::Op(Op::Tuple, vec![]).anno(pspan), - }; - let fspan = fail.1; - /* - for `pat in `iter `pass else `fail - ==> - match (`iter).into_iter() { - #iter => loop match #iter.next() { - None => break `fail, - Some(`pat) => `pass, - }, - } - */ - - // TODO: A better way to do this kind of substitution desugaring - // without losing span information! - Ok(Expr::Op( - Op::Match, - vec![ - Expr::Op( - Op::Dot, - vec![ - iter, - Expr::Op(Op::Call, vec![Expr::Id("into_iter".into()).anno(cspan)]).anno(cspan), - ], - ) - .anno(cspan), - Expr::Bind(Box::new(Bind( - BindKind::Match, - vec![], - Pat::Name("#iter".into()), - vec![ - Expr::Op( - Op::Loop, - vec![ - Expr::Op( - Op::Match, - vec![ - Expr::Op( - Op::Dot, - vec![ - Expr::Id("#iter".into()).anno(cspan), - Expr::Op( - Op::Call, - vec![Expr::Id("next".into()).anno(cspan)], - ) - .anno(cspan), - ], - ) - .anno(cspan), - Expr::Bind(Box::new(Bind( - BindKind::Match, - vec![], - Pat::Name("None".into()), - vec![Expr::Op(Op::Break, vec![fail]).anno(fspan)], - ))) - .anno(fspan), - Expr::Bind(Box::new(Bind( - BindKind::Match, - vec![], - Pat::NamedTuple( - "Some".into(), - Box::new(Pat::Op(PatOp::Tuple, vec![pat])), - ), - vec![pass], - ))) - .anno(pspan), - ], - ) - .anno(pspan), - ], - ) - .anno(pspan), - ], - ))) - .anno(pspan), - ], - )) -} - impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno

{ type Prec = P::Prec; fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult diff --git a/src/parser/error.rs b/src/parser/error.rs new file mode 100644 index 0000000..7326fd2 --- /dev/null +++ b/src/parser/error.rs @@ -0,0 +1,70 @@ +use crate::{ast::BindOp, lexer::LexError, span::Span, token::TKind}; +use std::{error::Error, fmt::Display}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ParseError { + /// Reached the expected end of input. + EOF(Span), + /// Unexpectedly reached end of input. + UnexpectedEOF(Span), + FromLexer(LexError), + Expected(TKind, TKind, Span), + NotLiteral(TKind, Span), + NotUse(TKind, Span), + NotPattern(TKind, Span), + NotMatch(BindOp, BindOp, Span), + NotPrefix(TKind, Span), + NotInfix(TKind, Span), + NotPostfix(TKind, Span), +} + +pub use ParseError::EOF; + +impl Error for ParseError {} +impl Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::EOF(loc) => write!(f, "{loc}: Reached end of input."), + Self::UnexpectedEOF(loc) => write!(f, "{loc}: Unexpected end of input."), + Self::FromLexer(e) => e.fmt(f), + Self::Expected(e, tk, loc) => write!(f, "{loc}: Expected {e:?}, got {tk:?}."), + Self::NotLiteral(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a literal."), + Self::NotUse(tk, loc) => write!(f, "{loc}: {tk:?} is no use!"), + Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."), + Self::NotMatch(bk, ex, loc) => { + write!(f, "{loc}: {bk:?} is not valid in a {ex:?} expression.") + } + Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."), + Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."), + Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."), + } + } +} + +pub type PResult = Result; + +pub trait PResultExt { + fn no_eof(self) -> PResult; + fn allow_eof(self) -> PResult>; +} + +impl PResultExt for PResult { + fn no_eof(self) -> Self { + match self { + Err(ParseError::EOF(span)) => Err(ParseError::UnexpectedEOF(span)), + other => other, + } + } + fn allow_eof(self) -> PResult> { + match self { + Ok(t) => Ok(Some(t)), + Err(ParseError::EOF(_)) => Ok(None), + Err(e) => Err(e), + } + } +} + +/// Opens a scope where [`ParseError::EOF`] is unexpected (See [`PResultExt::no_eof`]) +pub fn no_eof(f: impl FnOnce() -> PResult) -> PResult { + f().no_eof() +} diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 0000000..52f06d7 --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,592 @@ +use super::{PResult, PResultExt, Parse, ParseError, Parser, no_eof, pat::Prec as PPrec}; +use crate::{ + ast::*, + token::{TKind, Token}, +}; +use std::iter; + +/// Organizes the precedence hierarchy for syntactic elements +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Prec { + Min, + /// The Semicolon Operator gets its own precedence level + Do, + /// An assignment + Assign, + /// Constructor for a tuple + Tuple, + /// The body of a function, conditional, etc. + Body, + /// Constructor for a struct + Make, + /// The conditional of an `if` or `while` (which is really an `if`) + Logical, + /// The short-circuiting "boolean or" operator + LogOr, + /// The short-circuiting "boolean and" operator + LogAnd, + /// Value comparison operators + Compare, + /// Constructor for a Range + Range, + /// Binary/bitwise operators + Binary, + /// Bit-shifting operators + Shift, + /// Addition and Subtraction operators + Factor, + /// Multiplication, Division, and Remainder operators + Term, + /// Negation, (De)reference, Try + Unary, + /// Place-projection operators + Project, + /// Array/Call subscripting and reference + Extend, + Max, +} + +impl Prec { + pub const MIN: usize = Prec::Min.value(); + + pub const fn value(self) -> usize { + self as usize * 2 + } + + pub const fn prev(self) -> usize { + match self { + Self::Assign => self.value() + 1, + _ => self.value(), + } + } + + pub const fn next(self) -> usize { + match self { + Self::Assign => self.value(), + _ => self.value() + 1, + } + } +} + +/// `PseudoOperator`: fake operators used to give certain tokens special behavior. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Ps { + Id, // Identifier + Mid, // MetaIdentifier + Lit, // Literal + Use, // use Use + Def, // any definition (let, const, static, struct, enum, fn, ...) + For, // for Pat in Expr Expr else Expr + Lambda0, // || Expr + Lambda, // | Pat,* | Expr + DoubleRef, // && Expr + Make, // Expr{ Expr,* } + ImplicitDo, // An implicit semicolon + End, // Produces an empty value. + Op(Op), // A normal [ast::Op] +} + +/// Tries to map the incoming [Token] to a prefix [expression operator](Op) +/// and its [precedence level](Prec) +fn from_prefix(token: &Token) -> PResult<(Ps, Prec)> { + Ok(match token.kind { + TKind::Do => (Ps::Op(Op::Do), Prec::Do), + TKind::Semi => (Ps::End, Prec::Body), + + TKind::Identifier | TKind::ColonColon => (Ps::Id, Prec::Max), + TKind::Grave => (Ps::Mid, Prec::Max), + TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { + (Ps::Lit, Prec::Max) + } + TKind::Use => (Ps::Use, Prec::Max), + + TKind::Pub => (Ps::Op(Op::Pub), Prec::Body), + TKind::For => (Ps::For, Prec::Body), + TKind::Match => (Ps::Op(Op::Match), Prec::Body), + TKind::Macro => (Ps::Op(Op::Macro), Prec::Assign), + + TKind::Fn + | TKind::Mod + | TKind::Impl + | TKind::Let + | TKind::Const + | TKind::Static + | TKind::Type + | TKind::Struct + | TKind::Enum => (Ps::Def, Prec::Body), + + TKind::Loop => (Ps::Op(Op::Loop), Prec::Body), + TKind::If => (Ps::Op(Op::If), Prec::Body), + TKind::While => (Ps::Op(Op::While), Prec::Body), + TKind::Break => (Ps::Op(Op::Break), Prec::Body), + TKind::Return => (Ps::Op(Op::Return), Prec::Body), + + TKind::LCurly => (Ps::Op(Op::Block), Prec::Min), + TKind::RCurly => (Ps::End, Prec::Do), + TKind::LBrack => (Ps::Op(Op::Array), Prec::Tuple), + TKind::RBrack => (Ps::End, Prec::Tuple), + TKind::LParen => (Ps::Op(Op::Group), Prec::Min), + TKind::RParen => (Ps::End, Prec::Tuple), + TKind::Amp => (Ps::Op(Op::Refer), Prec::Extend), + TKind::AmpAmp => (Ps::DoubleRef, Prec::Extend), + TKind::Bang => (Ps::Op(Op::Not), Prec::Unary), + TKind::BangBang => (Ps::Op(Op::Identity), Prec::Unary), + TKind::Bar => (Ps::Lambda, Prec::Body), + TKind::BarBar => (Ps::Lambda0, Prec::Body), + TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range), + TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range), + TKind::Minus => (Ps::Op(Op::Neg), Prec::Unary), + TKind::Plus => (Ps::Op(Op::Identity), Prec::Unary), + TKind::Star => (Ps::Op(Op::Deref), Prec::Unary), + TKind::Hash => (Ps::Op(Op::Meta), Prec::Unary), + + kind => Err(ParseError::NotPrefix(kind, token.span))?, + }) +} + +/// Tries to map the incoming [Token] to an infix [expression operator](Op) +/// and its [precedence level](Prec) +const fn from_infix(token: &Token) -> PResult<(Ps, Prec)> { + Ok(match token.kind { + TKind::Semi => (Ps::Op(Op::Do), Prec::Do), // the inspiration + TKind::In => (Ps::Op(Op::Do), Prec::Do), + + TKind::Eq => (Ps::Op(Op::Set), Prec::Assign), + TKind::StarEq => (Ps::Op(Op::MulSet), Prec::Assign), + TKind::SlashEq => (Ps::Op(Op::DivSet), Prec::Assign), + TKind::RemEq => (Ps::Op(Op::RemSet), Prec::Assign), + TKind::PlusEq => (Ps::Op(Op::AddSet), Prec::Assign), + TKind::MinusEq => (Ps::Op(Op::SubSet), Prec::Assign), + TKind::LtLtEq => (Ps::Op(Op::ShlSet), Prec::Assign), + TKind::GtGtEq => (Ps::Op(Op::ShrSet), Prec::Assign), + TKind::AmpEq => (Ps::Op(Op::AndSet), Prec::Assign), + TKind::XorEq => (Ps::Op(Op::XorSet), Prec::Assign), + TKind::BarEq => (Ps::Op(Op::OrSet), Prec::Assign), + TKind::Comma => (Ps::Op(Op::Tuple), Prec::Tuple), + TKind::LCurly => (Ps::Make, Prec::Make), + TKind::XorXor => (Ps::Op(Op::LogXor), Prec::Logical), + TKind::BarBar => (Ps::Op(Op::LogOr), Prec::LogOr), + TKind::AmpAmp => (Ps::Op(Op::LogAnd), Prec::LogAnd), + TKind::Lt => (Ps::Op(Op::Lt), Prec::Compare), + TKind::LtEq => (Ps::Op(Op::Leq), Prec::Compare), + TKind::EqEq => (Ps::Op(Op::Eq), Prec::Compare), + TKind::BangEq => (Ps::Op(Op::Neq), Prec::Compare), + TKind::GtEq => (Ps::Op(Op::Geq), Prec::Compare), + TKind::Gt => (Ps::Op(Op::Gt), Prec::Compare), + TKind::DotDot => (Ps::Op(Op::RangeEx), Prec::Range), + TKind::DotDotEq => (Ps::Op(Op::RangeIn), Prec::Range), + TKind::Amp => (Ps::Op(Op::And), Prec::Binary), + TKind::Xor => (Ps::Op(Op::Xor), Prec::Binary), + TKind::Bar => (Ps::Op(Op::Or), Prec::Binary), + TKind::LtLt => (Ps::Op(Op::Shl), Prec::Shift), + TKind::GtGt => (Ps::Op(Op::Shr), Prec::Shift), + TKind::Plus => (Ps::Op(Op::Add), Prec::Factor), + TKind::Minus => (Ps::Op(Op::Sub), Prec::Factor), + TKind::Star => (Ps::Op(Op::Mul), Prec::Term), + TKind::Slash => (Ps::Op(Op::Div), Prec::Term), + TKind::Rem => (Ps::Op(Op::Rem), Prec::Term), + + TKind::Question => (Ps::Op(Op::Try), Prec::Unary), + TKind::Dot => (Ps::Op(Op::Dot), Prec::Project), + TKind::LParen => (Ps::Op(Op::Call), Prec::Extend), + TKind::LBrack => (Ps::Op(Op::Index), Prec::Extend), + + TKind::RParen | TKind::RBrack | TKind::RCurly => (Ps::End, Prec::Max), + TKind::As => (Ps::Op(Op::As), Prec::Max), + _ => (Ps::ImplicitDo, Prec::Do), + }) +} + +impl<'t> Parse<'t> for Expr { + type Prec = usize; + + /// Parses an [Expr]ession. + /// + /// The `level` parameter indicates the operator binding level of the expression. + fn parse(p: &mut Parser<'t>, level: usize) -> PResult { + const MIN: usize = Prec::MIN; + + // TODO: in-tree doc comments + while p.next_if(TKind::Doc)?.is_ok() {} + + // Prefix + let tok @ &Token { kind, span, .. } = p.peek()?; + let ((op, prec), span) = (from_prefix(tok)?, span); + no_eof(move || { + let mut head = match op { + // "End" is produced when an "empty" expression is syntactically required. + // This happens when a semi or closing delimiter begins an expression. + // The token which emitted "End" cannot be consumed, as it is expected + // elsewhere. + Ps::End if level <= prec.next() => Expr::Omitted, + Ps::End => Err(ParseError::NotPrefix(kind, span))?, + + Ps::Id => Expr::Id(p.parse(())?), + Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()), + Ps::Lit => Expr::Lit(p.parse(())?), + Ps::Use => Expr::Use(p.consume().parse(())?), + Ps::Def => Expr::Bind(p.parse(None)?), + Ps::Lambda | Ps::Lambda0 => { + p.consume(); + + let args = if op == Ps::Lambda { + p.opt(PPrec::Tuple, TKind::Bar)? + .unwrap_or(Pat::Op(PatOp::Tuple, vec![])) + } else { + Pat::Op(PatOp::Tuple, vec![]) + }; + + let rety = p.opt_if(PPrec::Max, TKind::Arrow)?.unwrap_or(Pat::Ignore); + + Expr::Bind(Box::new(Bind( + BindOp::Fn, + vec![], + Pat::Op(PatOp::Fn, vec![args, rety]), + vec![p.parse(Prec::Body.next())?], + ))) + } + Ps::For => parse_for(p, ())?, + Ps::Op(Op::Match) => parse_match(p)?, + Ps::Op(Op::Meta) => Expr::Op( + Op::Meta, + vec![ + p.consume() + .expect(TKind::LBrack)? + .opt(MIN, TKind::RBrack)? + .unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)), + p.parse(level)?, + ], + ), + Ps::Op(Op::Block) => Expr::Op( + Op::Block, + p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(), + ), + Ps::Op(Op::Array) => parse_array(p)?, + Ps::Op(Op::Group) => match p.consume().opt(MIN, TKind::RParen)? { + Some(value) => Expr::Op(Op::Group, vec![value]), + None => Expr::Op(Op::Tuple, vec![]), + }, + Ps::Op(op @ (Op::If | Op::While)) => { + p.consume(); + let exprs = vec![ + // conditional restricted to Logical operators or above + p.parse(Prec::Logical.value())?, + p.parse(prec.next())?, + match p.peek() { + Ok(Token { kind: TKind::Else, .. }) => { + p.consume().parse(prec.next())? + } + _ => Expr::Op(Op::Tuple, vec![]).anno(span.merge(p.span())), + }, + ]; + Expr::Op(op, exprs) + } + Ps::DoubleRef => p.consume().parse(prec.next()).map(|Anno(expr, span)| { + Expr::Op( + Op::Refer, + vec![Anno(Expr::Op(Op::Refer, vec![Anno(expr, span)]), span)], + ) + })?, + + Ps::Op(op) => Expr::Op(op, vec![p.consume().parse(prec.next())?]), + _ => unimplemented!("prefix {op:?}"), + }; + + // Infix and Postfix + while let Ok(Some(tok)) = p.peek().allow_eof() + && let Ok((op, prec)) = from_infix(tok) + && level <= prec.prev() + && op != Ps::End + { + let kind = tok.kind; + let span = span.merge(p.span()); + + head = match op { + // Make (structor expressions) are context-sensitive + Ps::Make => match &head { + Expr::Id(_) | Expr::MetId(_) => Expr::Make(Box::new(Make( + head.anno(span), + p.consume().list(vec![], (), TKind::Comma, TKind::RCurly)?, + ))), + _ => break, + }, + // As is ImplicitDo (semicolon elision) + Ps::ImplicitDo if p.elide_do => head.and_do(span, p.parse(prec.next())?), + Ps::ImplicitDo => break, + // Allow `;` at end of file + Ps::Op(Op::Do) => head.and_do( + span, + match p.consume().peek().allow_eof()? { + Some(_) => p.parse(prec.next())?, + None => Anno(Default::default(), span), + }, + ), + Ps::Op(Op::Index) => Expr::Op( + Op::Index, + p.consume() + .list(vec![head.anno(span)], 0, TKind::Comma, TKind::RBrack)?, + ), + Ps::Op(Op::Call) => Expr::Op( + Op::Call, + vec![ + head.anno(span), + p.consume() + .opt(0, TKind::RParen)? + .unwrap_or_else(|| Expr::Op(Op::Tuple, vec![]).anno(span)), + ], + ), + Ps::Op(op @ (Op::Tuple | Op::Dot | Op::LogAnd | Op::LogOr)) => Expr::Op( + op, + p.consume() + .list_bare(vec![head.anno(span)], prec.next(), kind)?, + ), + Ps::Op(op @ Op::Try) => { + p.consume(); + Expr::Op(op, vec![head.anno(span)]) + } + Ps::Op(op) => { + Expr::Op(op, vec![head.anno(span), p.consume().parse(prec.next())?]) + } + _ => Err(ParseError::NotInfix(kind, span))?, + } + } + + Ok(head) + }) + } +} + +/// Parses an array with 0 or more elements, or an array-repetition +fn parse_array(p: &mut Parser<'_>) -> PResult { + if p.consume().peek()?.kind == TKind::RBrack { + p.consume(); + return Ok(Expr::Op(Op::Array, vec![])); + } + + let prec = Prec::Tuple; + let item = p.parse(prec.value())?; + let repeat = p.opt_if(prec.next(), TKind::Semi)?; + p.expect(TKind::RBrack)?; + + Ok(match (repeat, item) { + (Some(repeat), item) => Expr::Op(Op::ArRep, vec![item, repeat]), + (None, Anno(Expr::Op(Op::Tuple, items), _)) => Expr::Op(Op::Array, items), + (None, item) => Expr::Op(Op::Array, vec![item]), + }) +} + +/// Parses a `match` expression +/// +/// ```ignore +/// match scrutinee { +/// (Pat => Expr),* +/// } +/// ``` +fn parse_match(p: &mut Parser<'_>) -> PResult { + let scrutinee = p.consume().parse(Prec::Logical.value())?; + + let arms = p + .expect(TKind::LCurly)? + .list(vec![], Some(BindOp::Match), TKind::Comma, TKind::RCurly)? + .into_iter() + .map(|Anno(arm, span)| Anno(Expr::Bind(Box::new(arm)), span)); + + let expr = Expr::Op(Op::Match, iter::once(scrutinee).chain(arms).collect()); + + Ok(expr) +} + +/// Parses and desugars a `for` loop expression +/// +/// Assumes the existence of the following items: +/// +/// 1. `enum Option { None, Some(T) }` +/// 2. `fn T::into_iter(&mut self) -> U` +/// 3. `U::next() -> Option` +fn parse_for(p: &mut Parser<'_>, _level: ()) -> PResult { + // for Pat + let pat = p.consume().parse(PPrec::Tuple)?; + // in Expr + let iter: Anno = p.expect(TKind::In)?.parse(Prec::Logical.next())?; + let cspan = iter.1; + // Expr + let pass: Anno = p.parse(Prec::Body.next())?; + let pspan = pass.1; + // else Expr? + let fail = match p.next_if(TKind::Else).allow_eof()? { + Some(Ok(_)) => p.parse(Prec::Body.next())?, + _ => Expr::Op(Op::Tuple, vec![]).anno(pspan), + }; + let fspan = fail.1; + /* + for `pat in `iter `pass else `fail + ==> + match (`iter).into_iter() { + #iter => loop match #iter.next() { + None => break `fail, + Some(`pat) => `pass, + }, + } + */ + + // TODO: A better way to do this kind of substitution desugaring + // without losing span information! + Ok(Expr::Op( + Op::Match, + vec![ + Expr::Op( + Op::Dot, + vec![ + iter, + Expr::Op(Op::Call, vec![Expr::Id("into_iter".into()).anno(cspan)]).anno(cspan), + ], + ) + .anno(cspan), + Expr::Bind(Box::new(Bind( + BindOp::Match, + vec![], + Pat::Name("#iter".into()), + vec![ + Expr::Op( + Op::Loop, + vec![ + Expr::Op( + Op::Match, + vec![ + Expr::Op( + Op::Dot, + vec![ + Expr::Id("#iter".into()).anno(cspan), + Expr::Op( + Op::Call, + vec![Expr::Id("next".into()).anno(cspan)], + ) + .anno(cspan), + ], + ) + .anno(cspan), + Expr::Bind(Box::new(Bind( + BindOp::Match, + vec![], + Pat::Name("None".into()), + vec![Expr::Op(Op::Break, vec![fail]).anno(fspan)], + ))) + .anno(fspan), + Expr::Bind(Box::new(Bind( + BindOp::Match, + vec![], + Pat::NamedTuple( + "Some".into(), + Box::new(Pat::Op(PatOp::Tuple, vec![pat])), + ), + vec![pass], + ))) + .anno(pspan), + ], + ) + .anno(pspan), + ], + ) + .anno(pspan), + ], + ))) + .anno(pspan), + ], + )) +} + +/// Returns the [BindOp], [pattern precedence](PPrec), [arrow TKind](TKind), [body precedence](Prec), +/// and [else precedence](Prec), (if applicable,) which controls the parsing of Bind expressions. +/// +/// The returned expression [Prec]edences are expected to be [`Prec::next`]ed, so they may +/// be one level of precedence lower than would be intuitive (i.e. [Prec::Assign] instead of [Prec::Tuple]) +#[rustfmt::skip] +#[allow(clippy::type_complexity)] +fn from_bind(p: &mut Parser<'_>) -> PResult<(BindOp, PPrec, Option, Option, Option)> { + let bk = match p.peek()?.kind { + // Token Operator Pat prec Body Token Body prec Else prec + TKind::Let => (BindOp::Let, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Compare), Some(Prec::Body)), + TKind::Const => (BindOp::Const, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Assign), None), + TKind::Static => (BindOp::Static, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Assign), None), + TKind::Type => (BindOp::Type, PPrec::Tuple, Some(TKind::Eq), Some(Prec::Project), None), + TKind::Struct => (BindOp::Struct, PPrec::Tuple, None, None, None), + TKind::Enum => (BindOp::Enum, PPrec::Tuple, None, None, None), + TKind::Fn => (BindOp::Fn, PPrec::Fn, None, Some(Prec::Body), None), + TKind::Mod => (BindOp::Mod, PPrec::Max, None, Some(Prec::Body), None), + TKind::Impl => (BindOp::Impl, PPrec::Max, None, Some(Prec::Body), None), + TKind::Bar => (BindOp::Match, PPrec::Alt, Some(TKind::FatArrow), Some(Prec::Body), None), + // no consume! + _ => return Ok((BindOp::Match, PPrec::Alt, Some(TKind::FatArrow), Some(Prec::Body), None)), + }; + + p.consume(); + Ok(bk) +} + +impl<'t> Parse<'t> for Bind { + type Prec = Option; + + fn parse(p: &mut Parser<'t>, expected_level: Self::Prec) -> PResult { + // let + let (level, patp, arrow, bodyp, failp) = from_bind(p)?; + + if let Some(expected) = expected_level + && level != expected + { + Err(ParseError::NotMatch(level, expected, p.span()))? + } + + // + let generics = match p.next_if(TKind::Lt)? { + Ok(_) => p.list(vec![], (), TKind::Comma, TKind::Gt)?, + Err(_) => vec![], + }; + + // Pat + let pat = p.parse(patp)?; + + let Some(bodyp) = bodyp else { + return Ok(Self(level, generics, pat, vec![])); + }; + + // `=>` for match, `=`? for everything else + if let Some(arrow) = arrow + && p.next_if(arrow).allow_eof()?.is_none_or(|v| v.is_err()) + { + return Ok(Self(level, generics, pat, vec![])); + } + + // `=` Expr + let body = p.parse(bodyp.next())?; + + let Some(failp) = failp else { + return Ok(Self(level, generics, pat, vec![body])); + }; + + // `else` Expr + if p.next_if(TKind::Else) + .allow_eof()? + .is_none_or(|v| v.is_err()) + { + return Ok(Self(level, generics, pat, vec![body])); + } + + let fail = p.parse(failp.next())?; + + Ok(Self(level, generics, pat, vec![body, fail])) + } +} + +impl<'t> Parse<'t> for MakeArm { + type Prec = (); + + fn parse(p: &mut Parser<'t>, _level: ()) -> PResult { + let name = p + .next_if(TKind::Identifier)? + .map_err(|tk| ParseError::Expected(TKind::Identifier, tk, p.span()))?; + Ok(MakeArm( + name.lexeme.string().expect("Identifier should have String"), + p.opt_if(Prec::Body.value(), TKind::Colon)?, + )) + } +} diff --git a/src/parser/pat.rs b/src/parser/pat.rs new file mode 100644 index 0000000..2b3d5f6 --- /dev/null +++ b/src/parser/pat.rs @@ -0,0 +1,171 @@ +use super::{PResult, PResultExt, Parse, ParseError, Parser}; +use crate::{ + ast::*, + token::{TKind, Token}, +}; + +/// Precedence levels of value and type pattern expressions. +/// +/// Lower (toward [Prec::Min]) precedence levels can contain +/// all higher (toward [Prec::Max]) precedence levels. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Prec { + /// The lowest precedence + Min, + /// "Alternate" pattern: `Pat | Pat` + Alt, + /// Tuple pattern: `Pat,+` + Tuple, + /// Type annotation: `Pat : Pat` + Typed, + /// Range pattern: `Pat .. Pat`, `Pat ..= Pat` + Range, + /// Function pattern: `Pat -> Pat` + Fn, + /// The highest precedence + Max, +} + +impl Prec { + /// Returns the level of precedence higher than this one + const fn next(self) -> Self { + match self { + Self::Min => Self::Alt, + Self::Alt => Self::Tuple, + Self::Tuple => Self::Typed, + Self::Typed => Self::Range, + Self::Range => Self::Fn, + Self::Fn => Self::Max, + Self::Max => Self::Max, + } + } +} + +/// Tries to map the incoming Token to a [pattern operator](PatOp) +/// and its [precedence level](Prec) +fn from_infix(token: &Token) -> Option<(PatOp, Prec)> { + Some(match token.kind { + TKind::DotDot => (PatOp::RangeEx, Prec::Range), + TKind::DotDotEq => (PatOp::RangeIn, Prec::Range), + TKind::Colon => (PatOp::Typed, Prec::Typed), + TKind::Comma => (PatOp::Tuple, Prec::Tuple), + TKind::Arrow => (PatOp::Fn, Prec::Fn), + TKind::Bar => (PatOp::Alt, Prec::Alt), + _ => None?, + }) +} + +impl<'t> Parse<'t> for Pat { + type Prec = Prec; + + fn parse(p: &mut Parser<'t>, level: Prec) -> PResult { + let tok = p.peek()?; + + // Prefix + let mut head = match tok.kind { + TKind::Fn => return p.consume().parse(Prec::Fn), + TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { + Pat::Lit(p.parse(())?) + } + TKind::Bar => p.consume().parse(level)?, + TKind::Bang => p.consume().then(Pat::Never), + TKind::Amp => Pat::Op(PatOp::Ref, vec![p.consume().parse(Prec::Max)?]), + TKind::Star => Pat::Op(PatOp::Ptr, vec![p.consume().parse(Prec::Max)?]), + TKind::Mut => Pat::Op(PatOp::Mut, vec![p.consume().parse(Prec::Max)?]), + TKind::Pub => Pat::Op(PatOp::Pub, vec![p.consume().parse(Prec::Max)?]), + TKind::AmpAmp => Pat::Op( + PatOp::Ref, + vec![Pat::Op(PatOp::Ref, vec![p.consume().parse(Prec::Max)?])], + ), + TKind::Identifier => match tok.lexeme.str() { + Some("_") => p.consume().then(Pat::Ignore), + _ => { + let mut path: Path = p.parse(())?; + // TODO: make these postfix. + match p.peek().map(|t| t.kind) { + Ok(TKind::LParen) => Pat::NamedTuple(path, p.parse(Prec::Typed)?), + Ok(TKind::LCurly) if level <= Prec::Tuple.next() => Pat::NamedStruct( + path, + p.consume() + .opt(Prec::Tuple, TKind::RCurly)? + .unwrap_or_else(|| Box::new(Pat::Op(PatOp::Tuple, vec![]))), + ), + Ok(_) | Err(ParseError::EOF(_)) => match path.parts.len() { + 1 => Self::Name(path.parts.pop().expect("name has 1 part")), + _ => Self::Path(path), + }, + Err(e) => Err(e)?, + } + } + }, + TKind::Grave => Pat::MetId(p.consume().next()?.lexeme.to_string()), + TKind::DotDot => Pat::Op( + PatOp::Rest, + // Identifier in Rest position always becomes binder + match p.consume().peek().allow_eof()?.map(Token::kind) { + Some(TKind::Identifier) => vec![Pat::Name( + p.take_lexeme() + .expect("should have lexeme") + .string() + .expect("should be string"), + )], + Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?], + _ => vec![], + }, + ), + TKind::DotDotEq => Pat::Op( + PatOp::RangeIn, + match p.consume().peek().allow_eof()?.map(Token::kind) { + Some(TKind::Grave | TKind::Integer | TKind::Character) => vec![p.parse(level)?], + _ => vec![], + }, + ), + TKind::LParen => Pat::Op( + PatOp::Tuple, + p.consume() + .list(vec![], Prec::Typed, TKind::Comma, TKind::RParen)?, + ), + TKind::LBrack => parse_array_pat(p)?, + _ => Err(ParseError::NotPattern(tok.kind, tok.span))?, + }; + + while let Ok(Some(tok)) = p.peek().allow_eof() + && let Some((op, prec)) = from_infix(tok) + && level <= prec + { + let kind = tok.kind; + head = match op { + PatOp::Typed => Pat::Op(PatOp::Typed, vec![head, p.consume().parse(Prec::Max)?]), + PatOp::Fn => Pat::Op(PatOp::Fn, vec![head, p.consume().parse(Prec::Fn.next())?]), + op @ (PatOp::RangeEx | PatOp::RangeIn) => Pat::Op( + op, + match p.consume().peek().map(|t| t.kind) { + Ok(TKind::Integer | TKind::Character | TKind::Identifier) => { + vec![head, p.parse(prec.next())?] + } + _ => vec![head], + }, + ), + op => Pat::Op(op, p.consume().list_bare(vec![head], prec.next(), kind)?), + } + } + Ok(head) + } +} + +fn parse_array_pat(p: &mut Parser<'_>) -> PResult { + if p.consume().peek()?.kind == TKind::RBrack { + p.consume(); + return Ok(Pat::Op(PatOp::Slice, vec![])); + } + + let item = p.parse(Prec::Tuple)?; + let repeat = p.opt_if(Prec::Tuple, TKind::Semi)?; + p.expect(TKind::RBrack)?; + + Ok(match (repeat, item) { + (Some(repeat), item) => Pat::Op(PatOp::Arrep, vec![item, repeat]), + (None, Pat::Op(PatOp::Tuple, items)) => Pat::Op(PatOp::Slice, items), + (None, item) => Pat::Op(PatOp::Slice, vec![item]), + }) +} diff --git a/src/span.rs b/src/span.rs index b1624d9..2a54553 100644 --- a/src/span.rs +++ b/src/span.rs @@ -21,9 +21,9 @@ pub const fn Span(head: u32, tail: u32) -> Span { } impl Span { - /// Updates `self` to include all but the last byte in `other` + /// Computes the [struct@Span] containing both `self` and `other` pub fn merge(self, other: Span) -> Span { - Span { head: self.head.min(other.head), tail: self.tail.max(other.head) } + Span { head: self.head.min(other.head), tail: self.tail.max(other.tail) } } }