src: Split parser into modules, reorganize AST

This commit is contained in:
2025-10-28 22:15:06 -04:00
parent a3cab92b35
commit 8e2bc5ad85
8 changed files with 1146 additions and 1075 deletions

View File

@@ -4,184 +4,32 @@ pub mod macro_matcher;
pub mod visit;
/// A value with an annotation.
#[derive(Clone, PartialEq, Eq)]
pub struct Anno<T: Annotation, A: Annotation = Span>(pub T, pub A);
impl<T: Annotation, A: Annotation> std::fmt::Debug for Anno<T, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
<A as std::fmt::Debug>::fmt(&self.1, f)?;
f.write_str(": ")?;
<T as std::fmt::Debug>::fmt(&self.0, f)
}
}
/// An annotation: extra data added on to important AST nodes.
pub trait Annotation: Clone + std::fmt::Display + std::fmt::Debug + PartialEq + Eq {}
impl<T: Clone + std::fmt::Debug + std::fmt::Display + PartialEq + Eq> Annotation for T {}
/// A qualified identifier
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FqPath {
// TODO: Identifier interning
pub parts: Vec<String>,
// TODO: generic parameters
}
/// A value with an annotation.
#[derive(Clone, PartialEq, Eq)]
pub struct Anno<T: Annotation, A: Annotation = Span>(pub T, pub A);
impl From<&str> for FqPath {
fn from(value: &str) -> Self {
Self { parts: vec![value.to_owned()] }
}
}
/// A literal value (boolean, character, integer, string)
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal: true | false
Bool(bool),
/// A character literal: 'a', '\u{1f988}'
Char(char),
/// An integer literal: 0, 123, 0x10
Int(u128, u32),
/// A string literal:
Str(String),
}
/// A compound import declaration
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Use {
/// "*"
Glob,
/// Identifier
Name(String),
/// Identifier :: Use
Path(String, Box<Use>),
/// { Use, * }
Tree(Vec<Use>),
}
/// Binding patterns for each kind of matchable value.
/// Expressions: The beating heart of Dough.
///
/// This covers both patterns in Match expressions, and type annotations.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pat {
/// Matches anything without binding
Ignore,
/// Matches nothing, ever
Never,
/// Matches nothing; used for macro substitution
MetId(String),
/// Matches anything, and binds it to a name
Name(String),
/// Matches against a named const value
Path(FqPath),
/// Matches a Struct Expression `Ident { Pat }`
NamedStruct(FqPath, Box<Pat>),
/// Matches a Tuple Struct Expression `Ident ( Pat )`
NamedTuple(FqPath, Box<Pat>),
/// Matches a literal value by equality comparison
Lit(Literal),
/// Matches a compound pattern
Op(PatOp, Vec<Pat>),
}
/// Operators on lists of patterns
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum PatOp {
/// Changes the visibility mode to "public"
Pub,
/// Changes the binding mode to "mutable"
Mut,
/// Matches the dereference of a pointer (`&pat`)
Ref,
/// Matches the dereference of a raw pointer (`*pat`)
Ptr,
/// Matches a partial decomposition (`..rest`) or upper-bounded range (`..100`)
Rest,
/// Matches an exclusive bounded range (`0..100`)
RangeEx,
/// Matches an inclusive bounded range (`0..=100`)
RangeIn,
/// Matches the elements of a tuple
Tuple,
/// Matches the elements of a slice or array
Slice,
/// Matches a constant-size slice with repeating elements
Arrep,
/// Matches a type annotation or struct member
Typed,
/// Matches a function signature
Fn,
/// Matches one of a list of alternatives
Alt,
}
/// A pattern binding
/// ```ignore
/// let Pat (= Expr (else Expr)?)?
/// const Pat (= Expr (else Expr)?)?
/// static Pat (= Expr (else Expr)?)?
/// type Pat (= Expr)?
/// struct Pat
/// enum Pat
/// fn Pat Expr
/// mod Pat Expr
/// impl Pat Expr
/// Pat => Expr // in match
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Bind<A: Annotation = Span>(
pub BindKind,
pub Vec<FqPath>,
pub Pat,
pub Vec<Anno<Expr<A>, A>>,
);
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum BindKind {
/// A `let Pat (= Expr (else Expr)?)?` binding
Let,
/// A `const Pat = Expr` binding
Const,
/// A `static Pat = Expr` binding
Static,
/// A type-alias binding
Type,
/// A struct definition
Struct,
/// An enum definition
Enum,
/// A `fn Pat Expr` binding
Fn,
/// A `mod Pat Expr` binding
Mod,
/// An `impl Pat Expr` binding
Impl,
/// A `Pat => Expr` binding
Match,
}
/// A make (constructor) expression
/// ```ignore
/// Expr { (Ident (: Expr)?),* }
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Make<A: Annotation = Span>(pub Anno<Expr<A>, A>, pub Vec<MakeArm<A>>);
/// A single "arm" of a make expression
/// ```text
/// Identifier (':' Expr)?
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MakeArm<A: Annotation = Span>(pub String, pub Option<Anno<Expr<A>, A>>);
/// Expressions: The beating heart of Dough
/// A program in Doughlang is a single expression which, at compile time,
/// sets up the state in which a program will run. This expression binds types,
/// functions, and values to names which are exposed at runtime.
///
/// Whereas in the body of a function, `do` sequences are ordered, in the global
/// scope (or subsequent module scopes, which are children of the global module,)
/// `do` sequences are considered unordered, and subexpressions may be reordered
/// in whichever way the compiler sees fit. This is especially important when
/// performing import resolution, as imports typically depend on the order
/// in which names are bound.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Expr<A: Annotation = Span> {
/// Omitted by semicolon insertion-elision rules
Omitted,
/// An identifier
Id(FqPath),
Id(Path),
/// An escaped token for macro binding
MetId(String),
/// A literal bool, string, char, or int
@@ -197,6 +45,18 @@ pub enum Expr<A: Annotation = Span> {
Op(Op, Vec<Anno<Self, A>>),
}
/// Doughlang's AST is partitioned by data representation, so it
/// considers any expression which is composed solely of keywords,
/// symbols, and other expressions as operator expressions.
///
/// This includes:
/// - Do-sequence expressions: `Expr ; Expr `
/// - Type-cast expressions `Expr as Expr`
/// - Binding-modifier expressions: `pub Expr`, `#[Expr] Expr`
/// - Block and Group expressions: `{Expr?}`, `(Expr?)`
/// - Control flow: `if`, `while`, `loop`, `match`, `break`, `return`
/// - Function calls `Expr (Expr,*)`
/// - Traditional binary and unary operators (add, sub, neg, assign)
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Op {
// -- true operators
@@ -270,6 +130,170 @@ pub enum Op {
XorSet, // Expr ^= Expr
OrSet, // Expr |= Expr
}
/// A qualified identifier
///
/// TODO: qualify identifier
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path {
// TODO: Identifier interning
pub parts: Vec<String>,
// TODO: generic parameters
}
/// A literal value (boolean, character, integer, string)
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal: true | false
Bool(bool),
/// A character literal: 'a', '\u{1f988}'
Char(char),
/// An integer literal: 0, 123, 0x10
Int(u128, u32),
/// A string literal:
Str(String),
}
/// A compound import declaration
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Use {
/// "*"
Glob,
/// Identifier
Name(String),
/// Identifier :: Use
Path(String, Box<Use>),
/// { Use, * }
Tree(Vec<Use>),
}
/// A pattern binding
/// ```ignore
/// let Pat (= Expr (else Expr)?)?
/// const Pat (= Expr (else Expr)?)?
/// static Pat (= Expr (else Expr)?)?
/// type Pat (= Expr)?
/// struct Pat
/// enum Pat
/// fn Pat Expr
/// mod Pat Expr
/// impl Pat Expr
/// Pat => Expr // in match
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Bind<A: Annotation = Span>(
pub BindOp,
pub Vec<Path>,
pub Pat,
pub Vec<Anno<Expr<A>, A>>,
);
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BindOp {
/// A `let Pat (= Expr (else Expr)?)?` binding
Let,
/// A `const Pat = Expr` binding
Const,
/// A `static Pat = Expr` binding
Static,
/// A type-alias binding
Type,
/// A `fn Pat Expr` binding
Fn,
/// A `mod Pat Expr` binding
Mod,
/// An `impl Pat Expr` binding
Impl,
/// A struct definition
Struct,
/// An enum definition
Enum,
/// A `Pat => Expr` binding
Match,
}
/// A make (constructor) expression
/// ```ignore
/// Expr { (Ident (: Expr)?),* }
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Make<A: Annotation = Span>(pub Anno<Expr<A>, A>, pub Vec<MakeArm<A>>);
/// A single "arm" of a make expression
/// ```text
/// Identifier (':' Expr)?
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MakeArm<A: Annotation = Span>(pub String, pub Option<Anno<Expr<A>, A>>);
/// Binding patterns for each kind of matchable value.
///
/// This covers both patterns in Match expressions, and type annotations.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pat {
/// Matches anything without binding
Ignore,
/// Matches nothing, ever
Never,
/// Matches nothing; used for macro substitution
MetId(String),
/// Matches anything, and binds it to a name
Name(String),
/// Matches against a named const value
Path(Path),
/// Matches a Struct Expression `Ident { Pat }`
NamedStruct(Path, Box<Pat>),
/// Matches a Tuple Struct Expression `Ident ( Pat )`
NamedTuple(Path, Box<Pat>),
/// Matches a literal value by equality comparison
Lit(Literal),
/// Matches a compound pattern
Op(PatOp, Vec<Pat>),
}
/// Operators on lists of patterns
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PatOp {
/// Changes the visibility mode to "public"
Pub,
/// Changes the binding mode to "mutable"
Mut,
/// Matches the dereference of a pointer (`&pat`)
Ref,
/// Matches the dereference of a raw pointer (`*pat`)
Ptr,
/// Matches a partial decomposition (`..rest`) or upper-bounded range (`..100`)
Rest,
/// Matches an exclusive bounded range (`0..100`)
RangeEx,
/// Matches an inclusive bounded range (`0..=100`)
RangeIn,
/// Matches the elements of a tuple
Tuple,
/// Matches the elements of a slice or array
Slice,
/// Matches a constant-size slice with repeating elements
Arrep,
/// Matches a type annotation or struct member
Typed,
/// Matches a function signature
Fn,
/// Matches one of a list of alternatives
Alt,
}
impl<T: Annotation, A: Annotation> std::fmt::Debug for Anno<T, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
<A as std::fmt::Debug>::fmt(&self.1, f)?;
f.write_str(": ")?;
<T as std::fmt::Debug>::fmt(&self.0, f)
}
}
impl From<&str> for Path {
fn from(value: &str) -> Self {
Self { parts: vec![value.to_owned()] }
}
}
impl<A: Annotation> Default for Expr<A> {
fn default() -> Self {
@@ -313,106 +337,12 @@ impl<A: Annotation> Expr<A> {
use crate::{fmt::FmtAdapter, span::Span};
use std::{fmt::Display, format_args as fmt};
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Bool(v) => v.fmt(f),
Self::Char(c) => write!(f, "'{}'", c.escape_debug()),
Self::Int(i, 2) => write!(f, "0b{i:b}"),
Self::Int(i, 8) => write!(f, "0o{i:o}"),
Self::Int(i, 16) => write!(f, "0x{i:x}"),
Self::Int(i, _) => i.fmt(f),
Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()),
}
}
}
impl Display for FqPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { parts } = self;
f.list(parts, "::")
}
}
impl<T: Display + Annotation, A: Annotation> Display for Anno<T, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Display for Use {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Glob => "*".fmt(f),
Self::Name(name) => name.fmt(f),
Self::Path(segment, rest) => write!(f, "{segment}::{rest}"),
Self::Tree(items) => match items.len() {
0 => "{}".fmt(f),
1..=3 => f.delimit("{ ", " }").list(items, ", "),
_ => f
.delimit_indented("{", "}")
.list_wrap("\n", items, ",\n", ",\n"),
},
}
}
}
impl<A: Annotation> Display for Bind<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(op, gens, pat, exprs) = self;
op.fmt(f)?;
if !gens.is_empty() {
f.delimit("<", "> ").list(gens, ", ")?;
}
match op {
BindKind::Match => f.delimit(fmt!("{pat} => "), "").list(exprs, ",!? "),
BindKind::Fn | BindKind::Mod | BindKind::Impl => {
f.delimit(fmt!("{pat} "), "").list(exprs, ",!? ")
}
BindKind::Struct | BindKind::Enum => match pat {
Pat::NamedStruct(name, bind) => match bind.as_ref() {
Pat::Op(PatOp::Tuple, parts) => f
.delimit_indented(fmt!("{name} {{"), "}")
.list_wrap("\n", parts, ",\n", ",\n"),
other => write!(f, "{name} {{ {other} }}"),
},
_ => pat.fmt(f),
},
_ => match exprs.as_slice() {
[] => write!(f, "{pat}"),
[value] => write!(f, "{pat} = {value}"),
[value, fail] => write!(f, "{pat} = {value} else {fail}"),
other => f.delimit(fmt!("{pat} ("), ")").list(other, ", "),
},
}
}
}
impl Display for BindKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
Self::Let => "let ",
Self::Const => "const ",
Self::Static => "static ",
Self::Type => "type ",
Self::Struct => "struct ",
Self::Enum => "enum ",
Self::Fn => "fn ",
Self::Mod => "mod ",
Self::Impl => "impl ",
Self::Match => "",
})
}
}
impl<A: Annotation> Display for Make<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(expr, make_arms) = self;
f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ")
}
}
impl<A: Annotation> Display for Expr<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@@ -534,6 +464,100 @@ impl Display for Op {
}
}
impl Display for Path {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { parts } = self;
f.list(parts, "::")
}
}
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Bool(v) => v.fmt(f),
Self::Char(c) => write!(f, "'{}'", c.escape_debug()),
Self::Int(i, 2) => write!(f, "0b{i:b}"),
Self::Int(i, 8) => write!(f, "0o{i:o}"),
Self::Int(i, 16) => write!(f, "0x{i:x}"),
Self::Int(i, _) => i.fmt(f),
Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()),
}
}
}
impl Display for Use {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Glob => "*".fmt(f),
Self::Name(name) => name.fmt(f),
Self::Path(segment, rest) => write!(f, "{segment}::{rest}"),
Self::Tree(items) => match items.len() {
0 => "{}".fmt(f),
1..=3 => f.delimit("{ ", " }").list(items, ", "),
_ => f
.delimit_indented("{", "}")
.list_wrap("\n", items, ",\n", ",\n"),
},
}
}
}
impl<A: Annotation> Display for Bind<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(op, gens, pat, exprs) = self;
op.fmt(f)?;
if !gens.is_empty() {
f.delimit("<", "> ").list(gens, ", ")?;
}
match op {
BindOp::Match => f.delimit(fmt!("{pat} => "), "").list(exprs, ",!? "),
BindOp::Fn | BindOp::Mod | BindOp::Impl => {
f.delimit(fmt!("{pat} "), "").list(exprs, ",!? ")
}
BindOp::Struct | BindOp::Enum => match pat {
Pat::NamedStruct(name, bind) => match bind.as_ref() {
Pat::Op(PatOp::Tuple, parts) => f
.delimit_indented(fmt!("{name} {{"), "}")
.list_wrap("\n", parts, ",\n", ",\n"),
other => write!(f, "{name} {{ {other} }}"),
},
_ => pat.fmt(f),
},
_ => match exprs.as_slice() {
[] => write!(f, "{pat}"),
[value] => write!(f, "{pat} = {value}"),
[value, fail] => write!(f, "{pat} = {value} else {fail}"),
other => f.delimit(fmt!("{pat} ("), ")").list(other, ", "),
},
}
}
}
impl Display for BindOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
Self::Let => "let ",
Self::Const => "const ",
Self::Static => "static ",
Self::Type => "type ",
Self::Struct => "struct ",
Self::Enum => "enum ",
Self::Fn => "fn ",
Self::Mod => "mod ",
Self::Impl => "impl ",
Self::Match => "",
})
}
}
impl<A: Annotation> Display for Make<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(expr, make_arms) = self;
f.delimit(fmt!("({expr} {{"), "})").list(make_arms, ", ")
}
}
impl<A: Annotation> Display for MakeArm<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@@ -598,7 +622,7 @@ impl<A: Annotation> TryFrom<Expr<A>> for Pat {
fn try_from(value: Expr<A>) -> Result<Self, Self::Error> {
Ok(match value {
Expr::Id(FqPath { mut parts }) if parts.len() == 1 => {
Expr::Id(Path { mut parts }) if parts.len() == 1 => {
match parts.pop().expect("parts should have len 1") {
ig if ig == "_" => Self::Ignore,
name => Self::Name(name),