commit c83218d750d3cd8af6ddadf35b1a6b1429d53f2f Author: John Date: Thu Aug 28 02:26:06 2025 -0400 Initial Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..7bc6f00 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,179 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags", + "libc", + "parking_lot", +] + +[[package]] +name = "doughlang" +version = "0.1.0" +dependencies = [ + "repline", + "unicode-ident", +] + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] + +[[package]] +name = "repline" +version = "0.0.8" +source = "registry+https://git.soft.fish/j/_cargo-index.git" +checksum = "9e0ba602730444faec5566123f0717a61c74275988c82840a29cbda8b970438d" +dependencies = [ + "crossterm", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..2f1088b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "doughlang" +version = "0.1.0" +edition = "2024" + +[dependencies] +repline = { version = "0.0.8", registry = "soft-fish" } +unicode-ident = "1.0.12" diff --git a/dummy.do b/dummy.do new file mode 100644 index 0000000..6c7f0c0 --- /dev/null +++ b/dummy.do @@ -0,0 +1,48 @@ +#!/usr/bin/env conlang +// This is a Conlang file. + +// This is a function. It can be called with the call operator. +// The function called `main` is the program's entrypoint +// fn main() -> (&str, bool, i128) + +const main = fn () { + // An if expression is like the ternary conditional operator in C + let y = if 10 < 50 { + "\u{1f988}" + } else { + "x" + }; + + // A `while` expression is like the while-else construct in Python, + // but it returns a value via the `break` keyword + let z = while false { + // do a thing repeatedly + break true + } else { + // If `while` does not `break`, fall through to the `else` expression + false + }; + // The same is true of `for` expressions! + // let w = for idx in 0..100 { + // if idx > 2 * 2 { + // break idx + // } + // } else { + // 12345 + // }; + + + // desugars to + { + let _pass = || if idx > 2 * 2 { break idx }; + let _body = || { 12345 }; + let _it = 0..100; + loop if let idx = _it.next() _pass() else _fail() + }; + + + // A block evaluates to its last expression, + // or Empty if there is none + // (🦈, false, 5) + (y, z, w) +} diff --git a/license.md b/license.md new file mode 100644 index 0000000..967f2c6 --- /dev/null +++ b/license.md @@ -0,0 +1,13 @@ +Copyright (c) 2022 Soft Fish + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +The software is provided *"AS IS"* and the author *DISCLAIMS ALL WARRANTIES* with +regard to this software *INCLUDING ALL IMPLIED WARRANTIES of merchantability +and fitness.* In no event shall the author be liable for any special, direct, +indirect, or consequential damages or any damages whatsoever resulting from +loss of use, data or profits, whether in an action of contract, negligence or +other tortious action, arising out of or in connection with the use or +performance of this software. diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..c460cf0 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,16 @@ +unstable_features = true +max_width = 100 +wrap_comments = true +comment_width = 100 +struct_lit_width = 100 + +imports_granularity = "Crate" +# Allow structs to fill an entire line +# use_small_heuristics = "Max" +# Allow small functions on single line +# fn_single_line = true + +# Alignment +enum_discrim_align_threshold = 12 +#struct_field_align_threshold = 12 +where_single_line = true diff --git a/samples/receiver.do b/samples/receiver.do new file mode 100644 index 0000000..9a81277 --- /dev/null +++ b/samples/receiver.do @@ -0,0 +1,59 @@ +#!/usr/bin/env dough + +/* +Type = "type" Identifier ()? '=' TypeSpec + +TypeSpec = ( + | Identifier + | str (StructField),* uct + | tup (TupleField),* le + | cho (ChoiceField),* ice +) + +StructField = Identifier ':' TypeSpec +TupleField = TypeSpec +EnumField = Identifier ('(' TypeSpec ')')? + +*/ + +// Product type with named fields +type Product = { + a: i32, + b: T, + c: { + d: i32, + e: i32, + f: [] + }, +}; + +// Product type with indexed fields +type Tuple = ( + i32, + T, + U, +); + +// Choice/Sum type, which degrades to enumeration +type Sum = Nothing | A(Product) | B(Tuple) ; + + +// Kotlin style? +type Option = { + None, + Some(V) +} + +// fucked up? +type Option (None | Some(V)); + +fn x(self: &Sum) -> Product { + match self { + Nothing | B(_) => panic(), + A(value) => + } +} + +fun x(a: T) -> A { + a.get() +} diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..6838d42 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,384 @@ +//! The Abstract Syntax Tree defines an interface between the parser and type checker + +pub mod matcher; + +/// A value with an annotation. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Anno(pub T, pub A); + +/// An annotation: extra data added on to important AST nodes. +pub trait Annotation: Clone + std::fmt::Display + std::fmt::Debug + PartialEq + Eq {} +impl Annotation for T {} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Literal { + /// A boolean literal: true | false + Bool(bool), + /// A character literal: 'a', '\u{1f988}' + Char(char), + /// An integer literal: 0, 123, 0x10 + Int(i128), + /// A string literal: + Str(String), +} + +/// Binding patterns for each kind of matchable [Ty] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Pat { + Ignore, + MetId(String), + Name(String), + Rest(Option>), + Lit(Literal), + Tuple(Vec), + Slice(Vec), +} + +/// The arms of a make expression +/// ```ignore +/// Identifier (':' Expr)? +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct MakeArm(pub String, pub Option, A>>); + +/// The arms of a match expression +/// ```ignore +/// (Pat |)* Pat? => Expr +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct MatchArm(pub Vec, pub Anno, A>); + +/// In-universe types +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Ty { + /// `_` + Infer, + /// `(..Tys)` + Tuple(Vec), + /// `[Ty]` + Slice(Box), + /// `[Ty; _]` + Array(Box, usize), + /// `[Rety, ..Args]` + Fn(Vec), +} + +/// Expressions: The beating heart of Dough +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Expr { + /// An identifier + Id(String), + /// A meta-identifier + MetId(String), + /// A literal bool, string, char, or int + Lit(Literal), + /// let pattern = expr + Let(Pat, Option>>), + /// `const Pat (= Expr)?` (Basically let rec) + Const(Pat, Box>), + /// `| Pat,* | Expr` | `|| Expr` | `fn (Pat,*) Expr` + Fn(Vec, Box>), + /// Expr { (Ident (: Expr)?),* } + Make(Box>, Vec>), + /// match Expr { MatchArm,* } + Match(Box>, Vec>), + /// Op Expr | Expr Op | Expr (Op Expr)+ | Op Expr Expr else Expr + Op(Op, Vec>), +} + +impl Expr { + pub fn anno(self, annotation: A) -> Anno, A> { + Anno(self, annotation) + } + + pub fn is_place(&self) -> bool { + matches!( + self, + Self::Id(_) + | Self::Op(Op::Index, _) + | Self::Op(Op::Dot, _) + | Self::Op(Op::Path, _) + | Self::Op(Op::Deref, _) + ) + } + + // pub fn is_assignee(&self) -> bool { + // match self { + // Self::Id(_) => todo!(), + // Self::MetId(_) => todo!(), + // Self::Lit(literal) => todo!(), + // Self::Let(pat, anno) => todo!(), + // Self::Const(pat, anno) => todo!(), + // Self::Fn(pats, anno) => todo!(), + // Self::Make(anno, make_arms) => todo!(), + // Self::Match(anno, match_arms) => todo!(), + // Self::Op(Op::Add, annos) => todo!(), + // Self::Op(Op::And, _) => false, + // } + // } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Op { + // -- fake operators used to assign precedences to special forms + Id, // Identifier + Mid, // MetaIdentifier + Lit, // Literal + Let, // let Pat = Expr + Const, // const Pat = Expr + Fn, // fn ( Pat,* ) Expr + Make, // Expr{ Expr,* } + Macro, // macro Expr => Expr + Match, // match Expr { MatchArm,* } + End, // Produces an empty value. + + // -- true operators + Do, // Expr ; Expr + Block, // { Expr } + Array, // [ Expr,* ] + Group, // ( Expr ,?) + Tuple, // ( Expr,* ) + + Try, // Expr '?' + Index, // Expr [ Expr,* ] + Call, // Expr ( Expr,* ) + + Lambda, // |Pat?| Expr + + Loop, // loop Expr + If, // if Expr Expr (else Expr)? + While, // while Expr Expr (else Expr)? + Break, // break Expr + Return, // return Expr + + Dot, // Expr . Expr + Path, // Expr :: Expr + + RangeEx, // Expr? ..Expr + RangeIn, // Expr? ..=Expr + Neg, // -Expr + Not, // !Expr + Identity, // !!Expr + Refer, // &Expr + Deref, // *Expr + + Mul, // Expr * Expr + Div, // Expr / Expr + Rem, // Expr % Expr + + Add, // Expr + Expr + Sub, // Expr - Expr + + Shl, // Expr << Expr + Shr, // Expr >> Expr + + And, // Expr & Expr + Xor, // Expr ^ Expr + Or, // Expr | Expr + + Lt, // Expr < Expr + Leq, // Expr <= Expr + Eq, // Expr == Expr + Neq, // Expr != Expr + Geq, // Expr >= Expr + Gt, // Expr > Expr + + LogAnd, // Expr && Expr + LogXor, // Expr ^^ Expr + LogOr, // Expr || Expr + + Set, // Expr = Expr +} + +use crate::{fmt::FmtAdapter, span::Span}; +use std::{fmt::Display, format_args as fmt}; + +impl Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Bool(v) => v.fmt(f), + Self::Char(c) => write!(f, "'{}'", c.escape_debug()), + Self::Int(i) => i.fmt(f), + Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()), + } + } +} + +impl Display for Anno { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Display for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Id(id) => id.fmt(f), + Self::MetId(id) => write!(f, "`{id}"), + Self::Lit(literal) => literal.fmt(f), + Self::Let(pat, Some(expr)) => write!(f, "let {pat} = {expr}"), + Self::Let(pat, None) => write!(f, "let {pat}"), + Self::Const(pat, expr) => write!(f, "const {pat} = {expr}"), + Self::Make(expr, make_arms) => { + f.delimit(fmt!("make {expr} {{"), "}").list(make_arms, ", ") + } + Self::Match(expr, match_arms) => f + .delimit_indented(fmt!("match {expr} {{\n"), "\n}") + .list_end(match_arms, ",\n", ","), + Self::Fn(pats, expr) => f.delimit("fn (", fmt!(") {expr}")).list(pats, ", "), + + Self::Op(op @ (Op::If | Op::While), exprs) => match exprs.as_slice() { + [cond, pass, fail] => write!(f, "{op}{cond} {pass} else {fail}"), + other => f.delimit(fmt!("({op}, "), ")").list(other, ", "), + }, + Self::Op(Op::Array, exprs) => f.delimit("[", "]").list(exprs, ", "), + Self::Op(Op::Block, exprs) => f.delimit_indented("{\n", "\n}").list(exprs, ", "), + Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "), + + Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() { + [callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "), + [] => write!(f, "{op}"), + }, + Self::Op(op @ Op::Index, exprs) => match exprs.as_slice() { + [callee, args @ ..] => f.delimit(fmt!("{callee}["), "]").list(args, ", "), + [] => write!(f, "{op}"), + }, + + Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"), + Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "), + Self::Op(op @ Op::Try, exprs) => f.delimit("", op).list(exprs, ", "), + Self::Op(op, exprs) => match exprs.as_slice() { + [_] => f.delimit(op, "").list(exprs, ", "), + many => f.delimit("(", ")").list(many, op), + }, + } + } +} + +impl Display for Op { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Op::Do => "; ".fmt(f), + Op::Id => "_".fmt(f), + Op::Mid => "`".fmt(f), + Op::Lit => "##".fmt(f), + Op::Let => "let ".fmt(f), + Op::Const => "const ".fmt(f), + Op::Fn => "fn ".fmt(f), + Op::Macro => "macro ".fmt(f), + Op::Match => "match ".fmt(f), + Op::End => "()".fmt(f), + Op::Block => "{}".fmt(f), + Op::Array => "[]".fmt(f), + Op::Group => "()".fmt(f), + Op::Tuple => "()".fmt(f), + Op::Try => "?".fmt(f), + Op::Index => "".fmt(f), + Op::Call => "".fmt(f), + Op::Make => "".fmt(f), + Op::Lambda => "".fmt(f), + Op::Loop => "loop ".fmt(f), + Op::If => "if ".fmt(f), + Op::While => "while ".fmt(f), + Op::Break => "break ".fmt(f), + Op::Return => "return ".fmt(f), + Op::Dot => ".".fmt(f), + Op::Path => "::".fmt(f), + Op::RangeEx => " .. ".fmt(f), + Op::RangeIn => " ..= ".fmt(f), + Op::Neg => "-".fmt(f), + Op::Not => "!".fmt(f), + Op::Identity => "!!".fmt(f), + Op::Refer => "&".fmt(f), + Op::Deref => "*".fmt(f), + Op::Mul => " * ".fmt(f), + Op::Div => " / ".fmt(f), + Op::Rem => " % ".fmt(f), + Op::Add => " + ".fmt(f), + Op::Sub => " - ".fmt(f), + Op::Shl => " << ".fmt(f), + Op::Shr => " >> ".fmt(f), + Op::And => " & ".fmt(f), + Op::Xor => " ^ ".fmt(f), + Op::Or => " | ".fmt(f), + Op::Lt => " < ".fmt(f), + Op::Leq => " <= ".fmt(f), + Op::Eq => " == ".fmt(f), + Op::Neq => " != ".fmt(f), + Op::Geq => " >= ".fmt(f), + Op::Gt => " > ".fmt(f), + Op::LogAnd => " && ".fmt(f), + Op::LogXor => " ^^ ".fmt(f), + Op::LogOr => " || ".fmt(f), + Op::Set => " = ".fmt(f), + } + } +} + +impl Display for MakeArm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self(name, Some(body)) => write!(f, "{name}: {body}"), + Self(name, None) => write!(f, "{name}"), + } + } +} + +impl Display for MatchArm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self(pats, expr) = self; + f.delimit("", fmt!(" => {expr}")).list(pats, " | ") + } +} + +impl Display for Pat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Ignore => "_".fmt(f), + Self::Lit(literal) => literal.fmt(f), + Self::MetId(name) => name.fmt(f), + Self::Name(name) => name.fmt(f), + Self::Rest(Some(rest)) => write!(f, "..{rest}"), + Self::Rest(None) => write!(f, ".."), + Self::Tuple(pats) => f.delimit("(", ")").list(pats, ", "), + Self::Slice(pats) => f.delimit("[", "]").list(pats, ", "), + } + } +} + +impl TryFrom> for Pat { + type Error = Expr; + + fn try_from(value: Expr) -> Result { + Ok(match value { + Expr::Id(name) if name == "_" => Self::Ignore, + Expr::Id(name) => Self::Name(name), + Expr::MetId(name) => Self::MetId(name), + Expr::Lit(literal) => Self::Lit(literal), + Expr::Op(Op::RangeEx, exprs) if exprs.is_empty() => Self::Rest(None), + Expr::Op(Op::RangeEx, mut exprs) if exprs.len() == 1 => { + Self::Rest(Some(Box::new(Self::try_from(exprs.remove(0))?))) + } + Expr::Op(Op::Tuple, exprs) => Self::Tuple( + exprs + .into_iter() + .map(Self::try_from) + .collect::>()?, + ), + Expr::Op(Op::Array, exprs) => Self::Slice( + exprs + .into_iter() + .map(Self::try_from) + .collect::>()?, + ), + other => Err(other)?, + }) + } +} +impl TryFrom, A>> for Pat { + type Error = Expr; + + fn try_from(value: Anno, A>) -> Result { + Self::try_from(value.0) + } +} diff --git a/src/ast/matcher.rs b/src/ast/matcher.rs new file mode 100644 index 0000000..8483d5d --- /dev/null +++ b/src/ast/matcher.rs @@ -0,0 +1,260 @@ +//! Implements pattern matching + +use super::*; +use std::collections::HashMap; + +/// Stores a substitution from meta-identifiers to values +#[derive(Clone, Debug)] +pub struct Subst { + pub exp: HashMap>, + pub pat: HashMap, +} + +impl Default for Subst { + fn default() -> Self { + Self { exp: Default::default(), pat: Default::default() } + } +} + +pub trait Match { + /// Applies a substitution rule from `pat` to `template` on `self` + fn apply_rule(&mut self, pat: &Self, template: &Self) -> bool + where Self: Sized + Clone { + let Some(sub) = self.construct(pat) else { + return false; + }; + + *self = template.clone(); + self.apply(&sub); + + true + } + + /// With self as the pattern, recursively applies the Subst + fn apply(&mut self, sub: &Subst); + + /// Implements recursive Subst-building for Self + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool; + + /// Constructs a Subst + fn construct(&self, pat: &Self) -> Option> { + let mut sub = Subst::default(); + Match::recurse(&mut sub, pat, self).then_some(sub) + } + + /// Matches self against the provided pattern + fn match_with(&self, pat: &Self, sub: &mut Subst) -> bool { + Match::recurse(sub, pat, self) + } +} + +impl + Annotation, A: Annotation> Match for Anno { + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + Match::recurse(sub, &pat.0, &expr.0) + } + + fn apply(&mut self, sub: &Subst) { + self.0.apply(sub); + } +} + +impl Match for Expr { + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + match (pat, expr) { + (Expr::MetId(name), _) if name == "_" => true, + (Expr::MetId(name), _) => sub + .exp + .insert(name.clone(), expr.clone()) + .filter(|v| v != expr) + .is_none(), + (Expr::Id(pat), Expr::Id(expr)) => pat == expr, + (Expr::Id(_), _) => false, + (Expr::Lit(pat), Expr::Lit(expr)) => pat == expr, + (Expr::Lit(_), _) => false, + (Expr::Let(pat_pat, pat_expr), Expr::Let(expr_pat, expr_expr)) => { + Match::recurse(sub, pat_pat, expr_pat) && Match::recurse(sub, pat_expr, expr_expr) + } + (Expr::Let(..), _) => false, + (Expr::Const(pat_pat, pat_expr), Expr::Const(expr_pat, expr_expr)) => { + Match::recurse(sub, pat_pat, expr_pat) && Match::recurse(sub, pat_expr, expr_expr) + } + (Expr::Const(..), _) => false, + (Expr::Make(pat, pat_arms), Expr::Make(expr, expr_arms)) => { + Match::recurse(sub, pat, expr) && Match::recurse(sub, pat_arms, expr_arms) + } + (Expr::Make(..), _) => false, + (Expr::Match(pat, pat_arms), Expr::Match(expr, expr_arms)) => { + Match::recurse(sub, pat, expr) && Match::recurse(sub, pat_arms, expr_arms) + } + (Expr::Match(..), _) => false, + (Expr::Fn(pat_pats, pat_expr), Expr::Fn(expr_pats, expr_expr)) => { + Match::recurse(sub, pat_pats, expr_pats) && Match::recurse(sub, pat_expr, expr_expr) + } + (Expr::Fn(..), _) => false, + (Expr::Op(pat_op, pat_exprs), Expr::Op(expr_op, expr_exprs)) => { + Match::recurse(sub, pat_op, expr_op) && Match::recurse(sub, pat_exprs, expr_exprs) + } + (Expr::Op(..), _) => false, + } + } + + fn apply(&mut self, sub: &Subst) { + match self { + Expr::MetId(id) => { + if let Some(expr) = sub.exp.get(id) { + *self = expr.clone() + } + } + Expr::Id(_) | Expr::Lit(_) => {} + Expr::Let(pat, expr) => { + pat.apply(sub); + expr.apply(sub); + } + Expr::Const(pat, expr) => { + pat.apply(sub); + expr.apply(sub); + } + Expr::Make(expr, make_arms) => { + expr.apply(sub); + make_arms.apply(sub); + } + Expr::Match(expr, match_arms) => { + expr.apply(sub); + match_arms.apply(sub); + } + Expr::Fn(pats, expr) => { + pats.apply(sub); + expr.apply(sub); + } + Expr::Op(op, exprs) => { + op.apply(sub); + exprs.apply(sub); + } + }; + } +} + +impl Match for MakeArm { + // TODO: order-independent matching for MakeArm specifically. + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + pat.0 == expr.0 && Match::recurse(sub, &pat.1, &expr.1) + } + + fn apply(&mut self, sub: &Subst) { + let Self(_, expr) = self; + expr.apply(sub); + } +} + +impl Match for MatchArm { + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + Match::recurse(sub, &pat.0, &expr.0) && Match::recurse(sub, &pat.1, &expr.1) + } + + fn apply(&mut self, sub: &Subst) { + let Self(pats, expr) = self; + pats.apply(sub); + expr.apply(sub); + } +} + +impl Match for Pat { + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + match (pat, expr) { + (Pat::MetId(name), _) if name == "_" => true, + (Pat::MetId(name), _) => sub + .pat + .insert(name.clone(), expr.clone()) + .filter(|v| v != expr) + .is_none(), + (Pat::Ignore, Pat::Ignore) => true, + (Pat::Ignore, _) => false, + (Pat::Name(pat), Pat::Name(expr)) => pat == expr, + (Pat::Name(_), _) => false, + (Pat::Rest(pat), Pat::Rest(expr)) => Match::recurse(sub, pat, expr), + (Pat::Rest(_), _) => false, + (Pat::Lit(pat), Pat::Lit(expr)) => pat == expr, + (Pat::Lit(_), _) => false, + (Pat::Tuple(pat), Pat::Tuple(expr)) => Match::recurse(sub, pat, expr), + (Pat::Tuple(_), _) => false, + (Pat::Slice(pat), Pat::Slice(expr)) => Match::recurse(sub, pat, expr), + (Pat::Slice(_), _) => false, + } + } + + fn apply(&mut self, sub: &Subst) { + match self { + Pat::Ignore | Pat::Name(_) | Pat::Lit(_) => {} + Pat::MetId(id) => { + if let Some(expr) = sub.pat.get(id) { + *self = expr.clone() + } + } + Pat::Rest(pat) => pat.apply(sub), + Pat::Tuple(pats) => pats.apply(sub), + Pat::Slice(pats) => pats.apply(sub), + } + } +} + +impl Match for Op { + fn recurse(_: &mut Subst, pat: &Self, expr: &Self) -> bool { + pat == expr + } + + fn apply(&mut self, _sub: &Subst) {} +} + +impl> Match for [T] { + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + if pat.len() != expr.len() { + return false; + } + for (pat, expr) in pat.iter().zip(expr.iter()) { + if !Match::recurse(sub, pat, expr) { + return false; + } + } + true + } + + fn apply(&mut self, sub: &Subst) { + for item in self { + item.apply(sub); + } + } +} + +impl> Match for Box { + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + Match::recurse(sub, pat.as_ref(), expr.as_ref()) + } + + fn apply(&mut self, sub: &Subst) { + self.as_mut().apply(sub); + } +} + +impl> Match for Vec { + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + Match::recurse(sub, pat.as_slice(), expr.as_slice()) + } + + fn apply(&mut self, sub: &Subst) { + self.as_mut_slice().apply(sub); + } +} + +impl> Match for Option { + fn recurse(sub: &mut Subst, pat: &Self, expr: &Self) -> bool { + match (pat, expr) { + (Some(pat), Some(expr)) => Match::recurse(sub, pat, expr), + (None, None) => true, + _ => false, + } + } + + fn apply(&mut self, sub: &Subst) { + self.as_mut_slice().apply(sub); + } +} diff --git a/src/fmt.rs b/src/fmt.rs new file mode 100644 index 0000000..2839dc5 --- /dev/null +++ b/src/fmt.rs @@ -0,0 +1,139 @@ +//! The Conlang format extensions + +use std::fmt::{Display, Write}; + +impl FmtAdapter for W {} +pub trait FmtAdapter: Write { + /// Indents by one level. + fn indent(&mut self) -> Indent<'_, Self> { + Indent::new(self, " ") + } + + /// Pastes `indent` after each newline. + fn indent_with(&mut self, indent: &'static str) -> Indent<'_, Self> { + Indent::new(self, indent) + } + + /// Delimits a section with `open` and `close`. + fn delimit(&mut self, open: O, close: E) -> Delimit<'_, Self, E> { + Delimit::new(self, open, close) + } + + /// Delimits a section with `open` and `close`, raising the indent level within. + fn delimit_indented( + &mut self, + open: O, + close: E, + ) -> DelimitIndent<'_, Self, E> { + DelimitIndent::new(self, open, close) + } + + /// Formats bracketed lists of the kind (Item (Comma Item)*)? + #[inline] + fn list(&mut self, items: &[Item], sep: Sep) -> std::fmt::Result { + self.list_end(items, sep, "") + } + + fn list_end( + &mut self, + items: &[Item], + sep: Sep, + end: End, + ) -> std::fmt::Result { + let mut pats = items; + while let [pat, rest @ ..] = pats { + write!(self, "{pat}")?; + if !rest.is_empty() { + write!(self, "{sep}")?; + } + pats = rest + } + write!(self, "{end}") + } +} + +/// Pads text with leading indentation after every newline +pub struct Indent<'f, F: Write + ?Sized> { + indent: &'static str, + needs_indent: bool, + f: &'f mut F, +} + +impl<'f, F: Write + ?Sized> Indent<'f, F> { + pub fn new(f: &'f mut F, indent: &'static str) -> Self { + Indent { f, needs_indent: false, indent } + } +} + +impl Write for Indent<'_, F> { + fn write_str(&mut self, s: &str) -> std::fmt::Result { + for s in s.split_inclusive('\n') { + if self.needs_indent { + self.f.write_str(self.indent)?; + } + self.f.write_str(s)?; + self.needs_indent = s.ends_with('\n'); + } + Ok(()) + } + fn write_char(&mut self, c: char) -> std::fmt::Result { + if self.needs_indent { + self.f.write_str(" ")?; + } + self.needs_indent = c == '\n'; + self.f.write_char(c) + } +} + +/// Prints delimiters around anything formatted with this. Implies [Indent] +pub struct Delimit<'f, F: Write + ?Sized, E: Display = &'static str> { + f: &'f mut F, + close: E, +} + +impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> { + pub fn new(f: &'f mut F, open: O, close: E) -> Self { + let _ = write!(f, "{open}"); + Self { f, close } + } +} + +impl Drop for Delimit<'_, F, E> { + fn drop(&mut self) { + let Self { f, close, .. } = self; + let _ = write!(f, "{close}"); + } +} + +impl Write for Delimit<'_, F, E> { + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.f.write_str(s) + } +} + +/// Prints delimiters around anything formatted with this. Implies [Indent] +pub struct DelimitIndent<'f, F: Write + ?Sized, E: Display = &'static str> { + f: Indent<'f, F>, + close: E, +} + +impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> { + pub fn new(f: &'f mut F, open: O, close: E) -> Self { + let mut f = f.indent(); + let _ = write!(f, "{open}"); + Self { f, close } + } +} + +impl Drop for DelimitIndent<'_, F, E> { + fn drop(&mut self) { + let Self { f: Indent { f, .. }, close, .. } = self; + let _ = write!(f, "{}", close); + } +} + +impl Write for DelimitIndent<'_, F, E> { + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.f.write_str(s) + } +} diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..e54044c --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,323 @@ +//! A lobster +use std::ops::Range; +#[allow(dead_code)] +use std::{iter::Peekable, str::CharIndices}; +use unicode_ident::{is_xid_continue, is_xid_start}; + +use crate::{span::Span, token::*}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct LexError { + pub pos: u32, + pub res: &'static str, +} +impl std::error::Error for LexError {} +impl std::fmt::Display for LexError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { pos, res } = self; + write!(f, "{pos}: {res}") + } +} + +#[derive(Clone, Debug)] +pub struct Lexer<'t> { + /// The source text + text: &'t str, + /// A peekable iterator over the source text + iter: Peekable>, + /// The start of the current token + head: u32, + /// The end of the current token + tail: u32, +} + +impl<'t> Lexer<'t> { + /// Constructs a new Lexer from some text + pub fn new(text: &'t str) -> Self { + let iter = text.char_indices().peekable(); + Self { text, iter, head: 0, tail: 0 } + } + + /// Peeks the next character without advancing the lexer + pub fn peek(&mut self) -> Option { + self.iter.peek().map(|&(_, c)| c) + } + + fn advance_tail(&mut self) { + match self.iter.peek() { + Some(&(idx, _)) => self.tail = idx as u32, + None => { + self.tail = self.text.len() as _; + } + } + } + + /// Takes the last character + pub fn take(&mut self) -> Option { + let (_, c) = self.iter.next()?; + self.advance_tail(); + Some(c) + } + + pub fn next_if(&mut self, expected: char) -> Option { + let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?; + self.advance_tail(); + Some(c) + } + + /// Consumes the last-peeked character, advancing the tail + pub fn consume(&mut self) -> &mut Self { + self.iter.next(); + self.advance_tail(); + self + } + + /// Produces a LexError at the start of the current token + pub fn error(&self, res: &'static str) -> LexError { + LexError { pos: self.head, res } + } + + /// Produces a Token + pub fn produce(&mut self, kind: TKind) -> Token { + self.advance_tail(); + let span = Span(self.head, self.tail); + self.head = self.tail; + Token { lexeme: self.text[Range::from(span)].to_owned(), kind, span } + } + + pub fn produce_with_lexeme(&mut self, kind: TKind, lexeme: String) -> Token { + self.advance_tail(); + let span = Span(self.head, self.tail); + self.head = self.tail; + Token { lexeme, kind, span } + } + + /// Consumes 0 or more whitespace + pub fn skip_whitespace(&mut self) -> &mut Self { + while self.peek().is_some_and(char::is_whitespace) { + let _ = self.consume(); + } + self + } + + pub fn start_token(&mut self) -> &mut Self { + self.head = self.tail; + self + } + + /// Scans forward until it finds the next Token in the input + pub fn scan(&mut self) -> Result { + use TKind::*; + // !"#%&'()*+,-./:;<=>?@[\\]^`{|}~ + let tok = match self + .skip_whitespace() + .start_token() + .peek() + .ok_or_else(|| self.error("EOF"))? + { + '!' => Bang, + '"' => return self.string(), + '#' => Hash, + '%' => Rem, + '&' => Amp, + '\'' => return self.character(), + '(' => LParen, + ')' => RParen, + '*' => Star, + '+' => Plus, + ',' => Comma, + '-' => Minus, + '.' => Dot, + '/' => Slash, + '0' => Integer, + '1'..='9' => return self.digits::<10>(), + ':' => Colon, + ';' => Semi, + '<' => Lt, + '=' => Eq, + '>' => Gt, + '?' => Question, + '@' => At, + '[' => LBrack, + '\\' => Backslash, + ']' => RBrack, + '^' => Xor, + '`' => Grave, + '{' => LCurly, + '|' => Bar, + '}' => RCurly, + '~' => Tilde, + '_' => return self.identifier(), + c if is_xid_start(c) => return self.identifier(), + _ => Err(self.error("Invalid"))?, + }; + + // Handle digraphs + let tok = match (tok, self.consume().peek()) { + (Integer, Some('b')) => return self.consume().digits::<2>(), + (Integer, Some('d')) => return self.consume().digits::<10>(), + (Integer, Some('o')) => return self.consume().digits::<8>(), + (Integer, Some('x')) => return self.consume().digits::<16>(), + (Integer, Some('z')) => return self.consume().digits::<36>(), + (Integer, _) => return self.digits::<10>(), + (Amp, Some('&')) => AmpAmp, + (Amp, Some('=')) => AmpEq, + (Bang, Some('!')) => BangBang, + (Bang, Some('=')) => BangEq, + (Bar, Some('|')) => BarBar, + (Bar, Some('=')) => BarEq, + (Colon, Some(':')) => ColonColon, + (Dot, Some('.')) => DotDot, + (Eq, Some('=')) => EqEq, + (Eq, Some('>')) => FatArrow, + (Gt, Some('=')) => GtEq, + (Gt, Some('>')) => GtGt, + (Hash, Some('!')) => HashBang, + (Lt, Some('=')) => LtEq, + (Lt, Some('<')) => LtLt, + (Minus, Some('=')) => MinusEq, + (Minus, Some('>')) => Arrow, + (Plus, Some('=')) => PlusEq, + (Rem, Some('=')) => RemEq, + (Slash, Some('*')) => return Ok(self.block_comment()?.produce(Comment)), + (Slash, Some('=')) => SlashEq, + (Slash, Some('/')) => return self.line_comment(), + (Star, Some('=')) => StarEq, + (Xor, Some('=')) => XorEq, + (Xor, Some('^')) => XorXor, + _ => return Ok(self.produce(tok)), + }; + + // Handle trigraphs + let tok = match (tok, self.consume().peek()) { + (HashBang, Some('/')) => return self.line_comment(), + (DotDot, Some('=')) => DotDotEq, + (GtGt, Some('=')) => GtGtEq, + (LtLt, Some('=')) => LtLtEq, + _ => return Ok(self.produce(tok)), + }; + + Ok(self.consume().produce(tok)) + } + + pub fn line_comment(&mut self) -> Result { + while self.consume().peek().is_some_and(|c| c != '\n') {} + Ok(self.produce(TKind::Comment)) + } + + pub fn block_comment(&mut self) -> Result<&mut Self, LexError> { + self.consume(); + while let Some(c) = self.take() { + match (c, self.peek()) { + ('/', Some('*')) => self.block_comment()?, + ('*', Some('/')) => return Ok(self.consume()), + _ => continue, + }; + } + Err(self.error("Unterminated block comment")) + } + + pub fn identifier(&mut self) -> Result { + while self.consume().peek().is_some_and(is_xid_continue) {} + let token = self.produce(TKind::Identifier); + Ok(Token { + kind: match token.lexeme.as_str() { + "break" => TKind::Break, + "const" => TKind::Const, + "do" => TKind::Do, + "else" => TKind::Else, + "false" => TKind::False, + "fn" => TKind::Fn, + "if" => TKind::If, + "let" => TKind::Let, + "loop" => TKind::Loop, + "macro" => TKind::Macro, + "match" => TKind::Match, + "return" => TKind::Return, + "then" => TKind::Do, + "true" => TKind::True, + "while" => TKind::While, + _ => token.kind, + }, + ..token + }) + } + + pub fn character(&mut self) -> Result { + let c = match self.consume().take() { + Some('\\') => self.escape()?, + Some(c) => c, + None => '\0', + }; + if self.take().is_some_and(|c| c == '\'') { + Ok(self.produce_with_lexeme(TKind::Character, c.into())) + } else { + Err(self.error("Unterminated character")) + } + } + + pub fn string(&mut self) -> Result { + let mut lexeme = String::new(); + self.consume(); + loop { + lexeme.push(match self.take() { + None => Err(self.error("Unterminated string"))?, + Some('\\') => self.escape()?, + Some('"') => break, + Some(c) => c, + }) + } + lexeme.shrink_to_fit(); + Ok(self.produce_with_lexeme(TKind::String, lexeme)) + } + + pub fn escape(&mut self) -> Result { + Ok(match self.take().ok_or_else(|| self.error("EOF"))? { + ' ' => '\u{a0}', + '0' => '\0', + 'a' => '\x07', + 'b' => '\x08', + 'e' => '\x1b', + 'f' => '\x0c', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'u' => self.unicode_escape()?, + 'x' => self.hex_escape()?, + c => c, + }) + } + + pub fn hex_escape(&mut self) -> Result { + let out = (self.digit::<16>()? << 4) + self.digit::<16>()?; + char::from_u32(out).ok_or(self.error("Invalid digit")) + } + + pub fn unicode_escape(&mut self) -> Result { + self.next_if('{') + .ok_or_else(|| self.error("No unicode escape opener"))?; + let mut out = 0; + while let Some(c) = self.take() { + if c == '}' { + return char::from_u32(out).ok_or_else(|| self.error("Bad unicode value")); + } + out = out * 16 + c.to_digit(16).ok_or_else(|| self.error("Invalid digit"))?; + } + Err(self.error("Unterminated unicode escape")) + } + + pub fn digits(&mut self) -> Result { + while self.peek().is_some_and(|c| c.is_digit(BASE)) { + self.consume(); + } + Ok(self.produce(TKind::Integer)) + } + + pub fn digit(&mut self) -> Result { + if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) { + Ok(digit) + } else { + Err(self.error("Invalid digit")) + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f0a92e4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,78 @@ +//! The Dough Programming Language +//! +//! A simpler programming language + +pub mod fmt; + +pub mod span; + +pub mod token; + +pub mod lexer; + +pub mod ast; + +pub mod parser; + +pub mod typed_ast { + //! The Typed AST defines an interface between the type checker and code generator + + use crate::span::Span; + use std::collections::HashMap; + + pub struct Table { + /// Fully qualified names, for debugging + pub names: Vec, + /// The unprojected relative stack offset + pub local: HashMap, + } + + /// DefID annotation + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct Defn { + pub span: Span, + /// The index of this name in the associated Table + pub defid: usize, + } +} + +pub mod typeck {} + +pub mod ir { + //! The IR defines an interface between the code generator and interpreter(?) +} + +pub mod interpreter { + //! The Doughlang interpreter interprets an AST + + use std::sync::{Arc, Mutex}; + + #[derive(Clone, Debug)] + pub enum Value { + Bool(bool), + ISize(isize), + Adt(Arc>), + } + impl Value { + pub fn cast(self, as_type: &str) -> Self { + match (self, as_type) { + (Self::ISize(v), "isize") => Self::ISize(v), // redundant cast + (v, _) => v, // invalid cast! + } + } + } + + pub enum Adt { + Array(Vec), + Tuple(Vec), + } + + impl std::fmt::Debug for Adt { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Array(elem) => f.debug_list().entries(elem).finish(), + Self::Tuple(elem) => f.debug_list().entries(elem).finish(), + } + } + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..03f7288 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,121 @@ +//! Tests the lexer +#[allow(unused_imports)] +use doughlang::{ + ast::{ + Expr, + matcher::{Match, Subst}, + }, + lexer::{LexError, Lexer}, + parser::{ParseError, Parser}, + span::Span, + token::{TKind, Token}, +}; +use repline::prebaked::*; +use std::{ + error::Error, + io::{IsTerminal, stdin}, +}; + +fn main() -> Result<(), Box> { + if stdin().is_terminal() { + read_and("\x1b[32m", " >", "?>", |line| match line.trim_end() { + "" => Ok(Response::Continue), + "exit" => Ok(Response::Break), + "clear" => { + print!("\x1b[H\x1b[2J"); + Ok(Response::Deny) + } + "pat" => { + if let Err(e) = subst() { + println!("\x1b[31m{e}\x1b[0m"); + } + Ok(Response::Deny) + } + _ => { + parse(line); + Ok(Response::Accept) + } + })?; + } else { + let doc = std::io::read_to_string(stdin())?; + lex(&doc); + parse(&doc); + } + Ok(()) +} + +fn lex(document: &str) { + let mut lexer = Lexer::new(document); + loop { + match lexer.scan() { + Ok(Token { lexeme, kind, span: Span { head, tail } }) => { + println!( + "{kind:?}\x1b[11G {head:<4} {tail:<4} {}", + lexeme.escape_debug() + ) + } + Err(e) => { + eprintln!("{e}"); + break; + } + } + } +} + +fn subst() -> Result<(), Box> { + let mut rl = repline::Repline::new("\x1b[35mexp", " >", "?>"); + let exp = rl.read()?; + let mut exp: Expr = Parser::new(Lexer::new(&exp)).parse(0)?; + println!("\x1b[G\x1b[J{exp}"); + + rl.accept(); + + loop { + rl.set_color("\x1b[36mpat"); + let pat = rl.read()?; + rl.accept(); + print!("\x1b[G\x1b[J"); + let mut p = Parser::new(Lexer::new(&pat)); + + let Ok(pat) = p.parse::(0) else { + println!("{exp}"); + continue; + }; + + if p.next_if(TKind::Colon).is_err() { + let Some(Subst { exp, pat }) = exp.construct(&pat) else { + continue; + }; + for (name, pat) in pat.iter() { + println!("{name}: {pat}") + } + for (name, expr) in exp.iter() { + println!("{name}: {expr}") + } + continue; + } + + let sub: Expr = p.parse(0)?; + if exp.apply_rule(&pat, &sub) { + println!("{exp}"); + } else { + println!("No match: {pat} in {exp}\n") + } + } +} + +fn parse(document: &str) { + let mut parser = Parser::new(Lexer::new(document)); + loop { + match parser.parse::(0) { + // Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break, + Err(e) => { + println!("\x1b[31m{e}\x1b[0m"); + break; + } + Ok(v) => { + println!("{v}"); + } + } + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..a6085ec --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,516 @@ +//! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes. +use crate::{ + ast::*, + lexer::{LexError, Lexer}, + span::Span, + token::{TKind, Token}, +}; +use std::{error::Error, fmt::Display, vec}; + +pub mod numeric; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ParseError { + FromLexer(LexError), + Expected(TKind, Span), + NotPattern(TKind, Span), + NotPrefix(TKind, Span), + NotInfix(TKind, Span), + NotPostfix(TKind, Span), +} +impl Error for ParseError {} +impl Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::FromLexer(e) => e.fmt(f), + Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."), + Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."), + Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."), + Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."), + Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."), + } + } +} + +pub type PResult = Result; + +#[derive(Debug)] +pub struct Parser<'t> { + pub lexer: Lexer<'t>, + pub next_tok: Option, + pub last_loc: Span, +} + +impl<'t> Parser<'t> { + /// Constructs a new Parser + pub fn new(lexer: Lexer<'t>) -> Self { + Self { lexer, next_tok: None, last_loc: Span::default() } + } + + /// The identity function. This exists to make production chaining easier. + pub fn then(&self, t: T) -> T { + t + } + + pub fn span(&self) -> Span { + self.last_loc + } + + /// Parses a value that implements the [Parse] trait. + pub fn parse>(&mut self, level: usize) -> PResult { + Parse::parse(self, level) + } + + /// Peeks the next [Token]. Returns [ParseError::FromLexer] on lexer error. + pub fn peek(&mut self) -> PResult<&Token> { + let next_tok = match self.next_tok.take() { + Some(tok) => tok, + None => match self.lexer.scan() { + Ok(tok) => tok, + Err(e) => Err(ParseError::FromLexer(e))?, + }, + }; + self.last_loc = next_tok.span; + self.next_tok = Some(next_tok); + Ok(self.next_tok.as_ref().expect("should have token")) + } + + /// Peeks the next token if it matches the `expected` [TKind] + pub fn peek_if(&mut self, expected: TKind) -> Option<&Token> { + self.peek().into_iter().find(|tok| tok.kind == expected) + } + + /// Consumes and returns the currently-peeked [Token]. + pub fn take(&mut self) -> Option { + self.next_tok.take() + } + + /// Consumes the currently-peeked [Token], returning its lexeme without cloning. + pub fn take_lexeme(&mut self) -> Option { + self.take().map(|tok| tok.lexeme) + } + + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> PResult { + self.peek()?; + Ok(self.take().expect("should have token here")) + } + + /// Consumes and returns the next [Token] if it matches the `expected` [TKind] + pub fn next_if(&mut self, expected: TKind) -> PResult { + let token = self.peek()?; + if token.kind == expected { + Ok(self.take().expect("should have token here")) + } else { + Err(ParseError::Expected(expected, token.span)) + } + } + + /// Parses a list of P separated by `sep` tokens, ending in an `end` token. + /// ```nobnf + /// List = (T `sep`)* T? `end` ; + /// ``` + pub fn list>( + &mut self, + mut elems: Vec

, + sep: TKind, + end: TKind, + ) -> PResult> { + while self.peek_if(end).is_none() { + elems.push(self.parse(0)?); + if self.next_if(sep).is_err() { + break; + } + } + self.next_if(end)?; + Ok(elems) + } + + /// Parses into an [`Option

`] if the next token is `next` + pub fn opt_if>(&mut self, level: usize, next: TKind) -> PResult> { + Ok(match self.next_if(next) { + Ok(_) => Some(self.parse(level)?), + Err(_) => None, + }) + } + + /// Parses an expression into a vec unless the next token is `end` + pub fn opt>(&mut self, level: usize, end: TKind) -> PResult> { + let out = match self.peek_if(end) { + None => Some(self.parse(level)?), + Some(_) => None, + }; + self.next_if(end)?; + Ok(out) + } + + /// Consumes the currently peeked token without returning it. + pub fn consume(&mut self) -> &mut Self { + self.next_tok = None; + self + } +} + +pub trait Parse<'t> { + fn parse(p: &mut Parser<'t>, level: usize) -> PResult + where Self: Sized; +} + +impl<'t> Parse<'t> for Literal { + fn parse(p: &mut Parser<'t>, _level: usize) -> PResult { + let tok = p.peek()?; + Ok(match tok.kind { + TKind::True => p.consume().then(Literal::Bool(true)), + TKind::False => p.consume().then(Literal::Bool(false)), + TKind::Character => { + Literal::Char(p.take_lexeme().expect("should have Token").remove(0)) + } + TKind::Integer => { + let Token { lexeme, kind: _, span } = p.take().expect("should have Token"); + // TODO: more complex int parsing + let int = lexeme + .parse() + .map_err(|_| ParseError::Expected(TKind::Integer, span))?; + Literal::Int(int) + } + TKind::String => Literal::Str(p.take_lexeme().expect("should have Token")), + _ => Err(ParseError::Expected(TKind::Integer, tok.span))?, + }) + } +} + +impl<'t> Parse<'t> for Pat { + fn parse(p: &mut Parser<'t>, level: usize) -> PResult { + let tok = p.peek()?; + match tok.kind { + TKind::Comment => p.consume().parse(level), + TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { + Ok(Pat::Lit(p.parse(0)?)) + } + TKind::Identifier => match tok.lexeme.as_str() { + "_" => Ok(p.consume().then(Pat::Ignore)), + _ => Ok(Pat::Name(p.take_lexeme().expect("should have Token"))), + }, + TKind::Grave => Ok(Pat::MetId(p.consume().next_if(TKind::Identifier)?.lexeme)), + TKind::DotDot => Ok(Pat::Rest(match p.consume().peek_if(TKind::Identifier) { + Some(_) => Some(p.parse(level)?), + None => None, + })), + TKind::LParen => Ok(Pat::Tuple(p.consume().list( + vec![], + TKind::Comma, + TKind::RParen, + )?)), + TKind::LBrack => Ok(Pat::Slice(p.consume().list( + vec![], + TKind::Comma, + TKind::RBrack, + )?)), + _ => Err(ParseError::NotPattern(tok.kind, tok.span)), + } + } +} + +impl<'t> Parse<'t> for MatchArm { + fn parse(p: &mut Parser<'t>, _level: usize) -> PResult { + p.next_if(TKind::Bar).ok(); + Ok(MatchArm( + p.list(vec![], TKind::Bar, TKind::FatArrow)?, + p.parse(0)?, + )) + } +} + +impl<'t> Parse<'t> for MakeArm { + fn parse(p: &mut Parser<'t>, level: usize) -> PResult { + Ok(MakeArm(p.next_if(TKind::Identifier)?.lexeme, { + p.next_if(TKind::Colon) + .ok() + .map(|_| p.parse(level)) + .transpose()? + })) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum Prec { + Min, + Do, + Assign, + Tuple, + Make, + Body, + Logical, + LogOr, + LogAnd, + Compare, + Range, + Binary, + Shift, + Factor, + Term, + Project, + Unary, + Extend, + Max, +} + +impl Prec { + pub const MIN: usize = Prec::Min.value(); + pub const fn value(self) -> usize { + self as usize * 2 + } + pub const fn prev(self) -> usize { + match self { + Self::Assign => self.value() + 1, + _ => self.value(), + } + } + pub const fn next(self) -> usize { + match self { + Self::Assign => self.value(), + _ => self.value() + 1, + } + } +} + +fn from_prefix(token: &Token) -> PResult<(Op, Prec)> { + Ok(match token.kind { + TKind::Do => (Op::Do, Prec::Do), + TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => { + (Op::Lit, Prec::Max) + } + TKind::Identifier => (Op::Id, Prec::Max), + TKind::Grave => (Op::Mid, Prec::Max), + TKind::Fn => (Op::Fn, Prec::Body), + + TKind::Match => (Op::Match, Prec::Body), + TKind::Macro => (Op::Macro, Prec::Assign), + TKind::Let => (Op::Let, Prec::Body), + TKind::Const => (Op::Const, Prec::Body), + TKind::Loop => (Op::Loop, Prec::Body), + TKind::If => (Op::If, Prec::Body), + TKind::While => (Op::While, Prec::Body), + TKind::Break => (Op::Break, Prec::Body), + TKind::Return => (Op::Return, Prec::Body), + + TKind::LBrack => (Op::Array, Prec::Min), + TKind::RBrack => (Op::End, Prec::Min), + TKind::LCurly => (Op::Block, Prec::Min), + TKind::RCurly => (Op::End, Prec::Min), + TKind::LParen => (Op::Group, Prec::Min), + TKind::RParen => (Op::End, Prec::Min), + TKind::Amp => (Op::Refer, Prec::Max), + // TKind::AmpAmp => todo!("addraddr"), + TKind::Bang => (Op::Not, Prec::Unary), + TKind::BangBang => (Op::Identity, Prec::Unary), + TKind::Bar => (Op::Lambda, Prec::Min), + TKind::BarBar => (Op::Lambda, Prec::Max), + TKind::DotDot => (Op::RangeEx, Prec::Range), + TKind::DotDotEq => (Op::RangeIn, Prec::Range), + TKind::Minus => (Op::Neg, Prec::Unary), + TKind::Plus => (Op::Identity, Prec::Unary), + TKind::Star => (Op::Deref, Prec::Unary), + + kind => Err(ParseError::NotPrefix(kind, token.span))?, + }) +} + +fn from_infix(token: &Token) -> PResult<(Op, Prec)> { + Ok(match token.kind { + TKind::Semi => (Op::Do, Prec::Do), // the inspiration + TKind::RParen => (Op::End, Prec::Do), + TKind::Comma => (Op::Tuple, Prec::Tuple), + TKind::Eq => (Op::Set, Prec::Assign), + TKind::XorXor => (Op::LogXor, Prec::Logical), + TKind::AmpAmp => (Op::LogAnd, Prec::LogAnd), + TKind::BarBar => (Op::LogOr, Prec::LogOr), + TKind::Lt => (Op::Lt, Prec::Compare), + TKind::LtEq => (Op::Leq, Prec::Compare), + TKind::EqEq => (Op::Eq, Prec::Compare), + TKind::BangEq => (Op::Neq, Prec::Compare), + TKind::GtEq => (Op::Geq, Prec::Compare), + TKind::Gt => (Op::Gt, Prec::Compare), + TKind::DotDot => (Op::RangeEx, Prec::Range), + TKind::DotDotEq => (Op::RangeIn, Prec::Range), + TKind::Amp => (Op::And, Prec::Binary), + TKind::Xor => (Op::Xor, Prec::Binary), + TKind::Bar => (Op::Or, Prec::Binary), + TKind::LtLt => (Op::Shl, Prec::Shift), + TKind::GtGt => (Op::Shr, Prec::Shift), + TKind::Plus => (Op::Add, Prec::Factor), + TKind::Minus => (Op::Sub, Prec::Factor), + TKind::Star => (Op::Mul, Prec::Term), + TKind::Slash => (Op::Div, Prec::Term), + TKind::Rem => (Op::Rem, Prec::Term), + TKind::Dot => (Op::Dot, Prec::Project), + TKind::ColonColon => (Op::Path, Prec::Max), + kind => Err(ParseError::NotInfix(kind, token.span))?, + }) +} + +fn from_postfix(token: &Token) -> PResult<(Op, Prec)> { + Ok(match token.kind { + TKind::Question => (Op::Try, Prec::Unary), + TKind::LParen => (Op::Call, Prec::Extend), + TKind::LBrack => (Op::Index, Prec::Extend), + TKind::LCurly => (Op::Make, Prec::Make), + kind => Err(ParseError::NotPostfix(kind, token.span))?, + }) +} + +#[rustfmt::skip] +fn should_coagulate(prev: Op, op: Op) -> bool { + prev == op && (match prev { + Op::Do => true, + Op::Tuple => true, + Op::Dot => false, + Op::Path => true, + Op::Lt => false, + Op::Leq => false, + Op::Eq => false, + Op::Neq => false, + Op::Geq => false, + Op::Gt => false, + _ => false, + }) +} + +impl<'t> Parse<'t> for Expr { + /// Parses an [Expr]ession. + /// + /// The `level` parameter indicates the operator binding level of the expression. + fn parse(p: &mut Parser<'t>, level: usize) -> PResult { + const MIN: usize = Prec::MIN; + while p.next_if(TKind::Comment).is_ok() {} + + // Prefix + let tok = p.peek()?; + let ((op, prec), span) = (from_prefix(tok)?, tok.span); + + let mut head = match op { + // Empty is returned when a block finisher is an expr prefix. + // It's the only expr that doesn't consume. + Op::End if level == Prec::Do.next() => Expr::Op(Op::Tuple, vec![]), + Op::End => Err(ParseError::NotPrefix(tok.kind, span))?, + + Op::Id => Expr::Id(p.take_lexeme().expect("should have ident")), + Op::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme), + Op::Lit => Expr::Lit(p.parse(MIN)?), + Op::Let => Expr::Let(p.consume().parse(MIN)?, p.opt_if(prec.next(), TKind::Eq)?), + Op::Const => Expr::Const(p.consume().parse(prec.next())?, { + p.next_if(TKind::Eq)?; + p.parse(prec.next())? + }), + Op::Macro => Expr::Op( + op, + vec![p.consume().parse(prec.next())?, { + p.next_if(TKind::FatArrow)?; + p.parse(prec.next())? + }], + ), + Op::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, { + p.next_if(TKind::LCurly)?; + p.list(vec![], TKind::Comma, TKind::RCurly)? + }), + Op::Block => Expr::Op( + op, + p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(), + ), + Op::Array => Expr::Op(op, p.consume().list(vec![], TKind::Comma, TKind::RBrack)?), + Op::Group => match p.consume().opt(MIN, TKind::RParen)? { + Some(value) => Expr::Op(Op::Group, vec![value]), + None => Expr::Op(Op::Tuple, vec![]), + }, + Op::If | Op::While => { + p.consume(); + let exprs = vec![ + // conditional restricted to Logical operators or above + p.parse(Prec::Logical.value())?, + p.parse(prec.next())?, + match p.peek() { + Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?, + _ => Expr::Op(Op::End, vec![]).anno(span.merge(p.span())), + }, + ]; + Expr::Op(op, exprs) + } + Op::Fn => { + p.consume().next_if(TKind::LParen)?; + Expr::Fn( + p.list(vec![], TKind::Comma, TKind::RParen)?, + p.parse(prec.next())?, + ) + } + // dirty hack: There are two closure operators, signaled by returned prec. + Op::Lambda if prec == Prec::Min => Expr::Fn( + p.consume().list(vec![], TKind::Comma, TKind::Bar)?, + p.parse(Prec::Body.next())?, + ), + Op::Lambda => Expr::Fn(vec![], p.consume().parse(Prec::Body.next())?), + + _ => Expr::Op(op, vec![p.consume().parse(prec.next())?]), + }; + + // Postfix + while let Ok(tok) = p.peek() + && let Ok((op, prec)) = from_postfix(tok) + && level <= prec.prev() + && op != Op::End + { + let span = span.merge(p.span()); + p.consume(); + head = match op { + Op::Make => Expr::Make( + head.anno(span).into(), + p.consume().list(vec![], TKind::Comma, TKind::RCurly)?, + ), + Op::Index => Expr::Op( + op, + p.list(vec![head.anno(span)], TKind::Comma, TKind::RBrack)?, + ), + Op::Call => Expr::Op( + op, + p.list(vec![head.anno(span)], TKind::Comma, TKind::RParen)?, + ), + _ => Expr::Op(op, vec![head.anno(span)]), + }; + } + + // Infix + while let Ok(tok) = p.peek() + && let Ok((op, prec)) = from_infix(tok) + && level <= prec.prev() + && op != Op::End + { + let span = span.merge(p.span()); + p.consume(); + + head = match head { + // controls expression chaining vs coagulating + Expr::Op(prev, mut args) if should_coagulate(prev, op) => { + args.push(p.parse(prec.next())?); + Expr::Op(op, args) + } + head => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]), + } + } + + Ok(head) + } +} + +impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno

{ + fn parse(p: &mut Parser<'t>, level: usize) -> PResult + where Self: Sized { + let start = p.span(); + Ok(Anno(p.parse(level)?, start.merge(p.span()))) + } +} + +impl<'t, P: Parse<'t>> Parse<'t> for Box

{ + fn parse(p: &mut Parser<'t>, level: usize) -> PResult + where Self: Sized { + Ok(Box::new(p.parse(level)?)) + } +} diff --git a/src/parser/numeric.rs b/src/parser/numeric.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/parser/numeric.rs @@ -0,0 +1 @@ + diff --git a/src/span.rs b/src/span.rs new file mode 100644 index 0000000..206c09a --- /dev/null +++ b/src/span.rs @@ -0,0 +1,42 @@ +use std::ops::Range; + +/// Stores the start and end byte position +#[derive(Clone, Copy, Default, PartialEq, Eq)] +pub struct Span { + pub head: u32, + pub tail: u32, +} + +impl std::fmt::Debug for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { head, tail } = self; + write!(f, "[{head}:{tail}]") + } +} + +#[allow(non_snake_case)] +/// Stores the start and end byte position +pub fn Span(head: u32, tail: u32) -> Span { + Span { head, tail } +} + +impl Span { + /// Updates `self` to include all but the last byte in `other` + pub fn merge(self, other: Span) -> Span { + Span { head: self.head.min(other.head), tail: self.tail.max(other.head) } + } +} + +impl From for Range { + fn from(value: Span) -> Self { + let Span { head, tail } = value; + (head as usize)..(tail as usize) + } +} + +impl std::fmt::Display for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { head, tail } = self; + write!(f, "{head}:{tail}") + } +} diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..497743c --- /dev/null +++ b/src/token.rs @@ -0,0 +1,88 @@ +//! The Token defines an interface between lexer and parser + +use crate::span::Span; + +#[derive(Clone, Debug)] +pub struct Token { + pub lexeme: String, + pub kind: TKind, + pub span: Span, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TKind { + Comment, + Break, + Const, + Do, + Else, + False, + Fn, + If, + Let, + Loop, + Macro, + Match, + Return, + True, + While, + + Identifier, // or Keyword + Character, + String, + Integer, // 0(x[0-9A-Fa-f]* | d[0-9]* | o[0-7]* | b[0-1]*) | [1-9][0-9]* + LCurly, // { + RCurly, // } + LBrack, // [ + RBrack, // ] + LParen, // ( + RParen, // ) + Amp, // & + AmpAmp, // && + AmpEq, // &= + Arrow, // -> + At, // @ + Backslash, // \ + Bang, // ! + BangBang, // !! + BangEq, // != + Bar, // | + BarBar, // || + BarEq, // |= + Colon, // : + ColonColon, // :: + Comma, // , + Dot, // . + DotDot, // .. + DotDotEq, // ..= + Eq, // = + EqEq, // == + FatArrow, // => + Grave, // ` + Gt, // > + GtEq, // >= + GtGt, // >> + GtGtEq, // >>= + Hash, // # + HashBang, // #! + Lt, // < + LtEq, // <= + LtLt, // << + LtLtEq, // <<= + Minus, // - + MinusEq, // -= + Plus, // + + PlusEq, // += + Question, // ? + Rem, // % + RemEq, // %= + Semi, // ; + Slash, // / + SlashEq, // /= + Star, // * + StarEq, // *= + Tilde, // ~ + Xor, // ^ + XorEq, // ^= + XorXor, // ^^ +}