Initial Commit

This commit is contained in:
John 2025-08-28 02:26:06 -04:00 committed by Val
commit c83218d750
17 changed files with 2276 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

179
Cargo.lock generated Normal file
View File

@ -0,0 +1,179 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "cfg-if"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
[[package]]
name = "crossterm"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
dependencies = [
"bitflags",
"libc",
"parking_lot",
]
[[package]]
name = "doughlang"
version = "0.1.0"
dependencies = [
"repline",
"unicode-ident",
]
[[package]]
name = "libc"
version = "0.2.174"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
[[package]]
name = "lock_api"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "parking_lot"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "redox_syscall"
version = "0.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
dependencies = [
"bitflags",
]
[[package]]
name = "repline"
version = "0.0.8"
source = "registry+https://git.soft.fish/j/_cargo-index.git"
checksum = "9e0ba602730444faec5566123f0717a61c74275988c82840a29cbda8b970438d"
dependencies = [
"crossterm",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "doughlang"
version = "0.1.0"
edition = "2024"
[dependencies]
repline = { version = "0.0.8", registry = "soft-fish" }
unicode-ident = "1.0.12"

48
dummy.do Normal file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env conlang
// This is a Conlang file.
// This is a function. It can be called with the call operator.
// The function called `main` is the program's entrypoint
// fn main() -> (&str, bool, i128)
const main = fn () {
// An if expression is like the ternary conditional operator in C
let y = if 10 < 50 {
"\u{1f988}"
} else {
"x"
};
// A `while` expression is like the while-else construct in Python,
// but it returns a value via the `break` keyword
let z = while false {
// do a thing repeatedly
break true
} else {
// If `while` does not `break`, fall through to the `else` expression
false
};
// The same is true of `for` expressions!
// let w = for idx in 0..100 {
// if idx > 2 * 2 {
// break idx
// }
// } else {
// 12345
// };
// desugars to
{
let _pass = || if idx > 2 * 2 { break idx };
let _body = || { 12345 };
let _it = 0..100;
loop if let idx = _it.next() _pass() else _fail()
};
// A block evaluates to its last expression,
// or Empty if there is none
// (🦈, false, 5)
(y, z, w)
}

13
license.md Normal file
View File

@ -0,0 +1,13 @@
Copyright (c) 2022 Soft Fish <j@soft.fish>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
The software is provided *"AS IS"* and the author *DISCLAIMS ALL WARRANTIES* with
regard to this software *INCLUDING ALL IMPLIED WARRANTIES of merchantability
and fitness.* In no event shall the author be liable for any special, direct,
indirect, or consequential damages or any damages whatsoever resulting from
loss of use, data or profits, whether in an action of contract, negligence or
other tortious action, arising out of or in connection with the use or
performance of this software.

16
rustfmt.toml Normal file
View File

@ -0,0 +1,16 @@
unstable_features = true
max_width = 100
wrap_comments = true
comment_width = 100
struct_lit_width = 100
imports_granularity = "Crate"
# Allow structs to fill an entire line
# use_small_heuristics = "Max"
# Allow small functions on single line
# fn_single_line = true
# Alignment
enum_discrim_align_threshold = 12
#struct_field_align_threshold = 12
where_single_line = true

59
samples/receiver.do Normal file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env dough
/*
Type = "type" Identifier (<Generics>)? '=' TypeSpec
TypeSpec = (
| Identifier
| str (StructField),* uct
| tup (TupleField),* le
| cho (ChoiceField),* ice
)
StructField = Identifier ':' TypeSpec
TupleField = TypeSpec
EnumField = Identifier ('(' TypeSpec ')')?
*/
// Product type with named fields
type Product<T> = {
a: i32,
b: T,
c: {
d: i32,
e: i32,
f: []
},
};
// Product type with indexed fields
type Tuple<T, U> = (
i32,
T,
U,
);
// Choice/Sum type, which degrades to enumeration
type Sum = Nothing | A(Product) | B(Tuple) ;
// Kotlin style?
type <V> Option = {
None,
Some(V)
}
// fucked up?
type Option<V> (None | Some(V));
fn x(self: &Sum) -> Product {
match self {
Nothing | B(_) => panic(),
A(value) =>
}
}
fun x(a: T<A>) -> A {
a.get()
}

384
src/ast.rs Normal file
View File

@ -0,0 +1,384 @@
//! The Abstract Syntax Tree defines an interface between the parser and type checker
pub mod matcher;
/// A value with an annotation.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Anno<T: Annotation, A: Annotation = Span>(pub T, pub A);
/// An annotation: extra data added on to important AST nodes.
pub trait Annotation: Clone + std::fmt::Display + std::fmt::Debug + PartialEq + Eq {}
impl<T: Clone + std::fmt::Debug + std::fmt::Display + PartialEq + Eq> Annotation for T {}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal: true | false
Bool(bool),
/// A character literal: 'a', '\u{1f988}'
Char(char),
/// An integer literal: 0, 123, 0x10
Int(i128),
/// A string literal:
Str(String),
}
/// Binding patterns for each kind of matchable [Ty]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pat {
Ignore,
MetId(String),
Name(String),
Rest(Option<Box<Pat>>),
Lit(Literal),
Tuple(Vec<Pat>),
Slice(Vec<Pat>),
}
/// The arms of a make expression
/// ```ignore
/// Identifier (':' Expr)?
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MakeArm<A: Annotation = Span>(pub String, pub Option<Anno<Expr<A>, A>>);
/// The arms of a match expression
/// ```ignore
/// (Pat |)* Pat? => Expr
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MatchArm<A: Annotation = Span>(pub Vec<Pat>, pub Anno<Expr<A>, A>);
/// In-universe types
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Ty {
/// `_`
Infer,
/// `(..Tys)`
Tuple(Vec<Ty>),
/// `[Ty]`
Slice(Box<Ty>),
/// `[Ty; _]`
Array(Box<Ty>, usize),
/// `[Rety, ..Args]`
Fn(Vec<Ty>),
}
/// Expressions: The beating heart of Dough
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Expr<A: Annotation = Span> {
/// An identifier
Id(String),
/// A meta-identifier
MetId(String),
/// A literal bool, string, char, or int
Lit(Literal),
/// let pattern = expr
Let(Pat, Option<Box<Anno<Self, A>>>),
/// `const Pat (= Expr)?` (Basically let rec)
Const(Pat, Box<Anno<Self, A>>),
/// `| Pat,* | Expr` | `|| Expr` | `fn (Pat,*) Expr`
Fn(Vec<Pat>, Box<Anno<Self, A>>),
/// Expr { (Ident (: Expr)?),* }
Make(Box<Anno<Self, A>>, Vec<MakeArm<A>>),
/// match Expr { MatchArm,* }
Match(Box<Anno<Self, A>>, Vec<MatchArm<A>>),
/// Op Expr | Expr Op | Expr (Op Expr)+ | Op Expr Expr else Expr
Op(Op, Vec<Anno<Self, A>>),
}
impl<A: Annotation> Expr<A> {
pub fn anno(self, annotation: A) -> Anno<Expr<A>, A> {
Anno(self, annotation)
}
pub fn is_place(&self) -> bool {
matches!(
self,
Self::Id(_)
| Self::Op(Op::Index, _)
| Self::Op(Op::Dot, _)
| Self::Op(Op::Path, _)
| Self::Op(Op::Deref, _)
)
}
// pub fn is_assignee(&self) -> bool {
// match self {
// Self::Id(_) => todo!(),
// Self::MetId(_) => todo!(),
// Self::Lit(literal) => todo!(),
// Self::Let(pat, anno) => todo!(),
// Self::Const(pat, anno) => todo!(),
// Self::Fn(pats, anno) => todo!(),
// Self::Make(anno, make_arms) => todo!(),
// Self::Match(anno, match_arms) => todo!(),
// Self::Op(Op::Add, annos) => todo!(),
// Self::Op(Op::And, _) => false,
// }
// }
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Op {
// -- fake operators used to assign precedences to special forms
Id, // Identifier
Mid, // MetaIdentifier
Lit, // Literal
Let, // let Pat = Expr
Const, // const Pat = Expr
Fn, // fn ( Pat,* ) Expr
Make, // Expr{ Expr,* }
Macro, // macro Expr => Expr
Match, // match Expr { MatchArm,* }
End, // Produces an empty value.
// -- true operators
Do, // Expr ; Expr
Block, // { Expr }
Array, // [ Expr,* ]
Group, // ( Expr ,?)
Tuple, // ( Expr,* )
Try, // Expr '?'
Index, // Expr [ Expr,* ]
Call, // Expr ( Expr,* )
Lambda, // |Pat?| Expr
Loop, // loop Expr
If, // if Expr Expr (else Expr)?
While, // while Expr Expr (else Expr)?
Break, // break Expr
Return, // return Expr
Dot, // Expr . Expr
Path, // Expr :: Expr
RangeEx, // Expr? ..Expr
RangeIn, // Expr? ..=Expr
Neg, // -Expr
Not, // !Expr
Identity, // !!Expr
Refer, // &Expr
Deref, // *Expr
Mul, // Expr * Expr
Div, // Expr / Expr
Rem, // Expr % Expr
Add, // Expr + Expr
Sub, // Expr - Expr
Shl, // Expr << Expr
Shr, // Expr >> Expr
And, // Expr & Expr
Xor, // Expr ^ Expr
Or, // Expr | Expr
Lt, // Expr < Expr
Leq, // Expr <= Expr
Eq, // Expr == Expr
Neq, // Expr != Expr
Geq, // Expr >= Expr
Gt, // Expr > Expr
LogAnd, // Expr && Expr
LogXor, // Expr ^^ Expr
LogOr, // Expr || Expr
Set, // Expr = Expr
}
use crate::{fmt::FmtAdapter, span::Span};
use std::{fmt::Display, format_args as fmt};
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Bool(v) => v.fmt(f),
Self::Char(c) => write!(f, "'{}'", c.escape_debug()),
Self::Int(i) => i.fmt(f),
Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()),
}
}
}
impl<T: Display + Annotation, A: Annotation> Display for Anno<T, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl<A: Annotation> Display for Expr<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Id(id) => id.fmt(f),
Self::MetId(id) => write!(f, "`{id}"),
Self::Lit(literal) => literal.fmt(f),
Self::Let(pat, Some(expr)) => write!(f, "let {pat} = {expr}"),
Self::Let(pat, None) => write!(f, "let {pat}"),
Self::Const(pat, expr) => write!(f, "const {pat} = {expr}"),
Self::Make(expr, make_arms) => {
f.delimit(fmt!("make {expr} {{"), "}").list(make_arms, ", ")
}
Self::Match(expr, match_arms) => f
.delimit_indented(fmt!("match {expr} {{\n"), "\n}")
.list_end(match_arms, ",\n", ","),
Self::Fn(pats, expr) => f.delimit("fn (", fmt!(") {expr}")).list(pats, ", "),
Self::Op(op @ (Op::If | Op::While), exprs) => match exprs.as_slice() {
[cond, pass, fail] => write!(f, "{op}{cond} {pass} else {fail}"),
other => f.delimit(fmt!("({op}, "), ")").list(other, ", "),
},
Self::Op(Op::Array, exprs) => f.delimit("[", "]").list(exprs, ", "),
Self::Op(Op::Block, exprs) => f.delimit_indented("{\n", "\n}").list(exprs, ", "),
Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "),
Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() {
[callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "),
[] => write!(f, "{op}"),
},
Self::Op(op @ Op::Index, exprs) => match exprs.as_slice() {
[callee, args @ ..] => f.delimit(fmt!("{callee}["), "]").list(args, ", "),
[] => write!(f, "{op}"),
},
Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"),
Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "),
Self::Op(op @ Op::Try, exprs) => f.delimit("", op).list(exprs, ", "),
Self::Op(op, exprs) => match exprs.as_slice() {
[_] => f.delimit(op, "").list(exprs, ", "),
many => f.delimit("(", ")").list(many, op),
},
}
}
}
impl Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Op::Do => "; ".fmt(f),
Op::Id => "_".fmt(f),
Op::Mid => "`".fmt(f),
Op::Lit => "##".fmt(f),
Op::Let => "let ".fmt(f),
Op::Const => "const ".fmt(f),
Op::Fn => "fn ".fmt(f),
Op::Macro => "macro ".fmt(f),
Op::Match => "match ".fmt(f),
Op::End => "()".fmt(f),
Op::Block => "{}".fmt(f),
Op::Array => "[]".fmt(f),
Op::Group => "()".fmt(f),
Op::Tuple => "()".fmt(f),
Op::Try => "?".fmt(f),
Op::Index => "".fmt(f),
Op::Call => "".fmt(f),
Op::Make => "".fmt(f),
Op::Lambda => "".fmt(f),
Op::Loop => "loop ".fmt(f),
Op::If => "if ".fmt(f),
Op::While => "while ".fmt(f),
Op::Break => "break ".fmt(f),
Op::Return => "return ".fmt(f),
Op::Dot => ".".fmt(f),
Op::Path => "::".fmt(f),
Op::RangeEx => " .. ".fmt(f),
Op::RangeIn => " ..= ".fmt(f),
Op::Neg => "-".fmt(f),
Op::Not => "!".fmt(f),
Op::Identity => "!!".fmt(f),
Op::Refer => "&".fmt(f),
Op::Deref => "*".fmt(f),
Op::Mul => " * ".fmt(f),
Op::Div => " / ".fmt(f),
Op::Rem => " % ".fmt(f),
Op::Add => " + ".fmt(f),
Op::Sub => " - ".fmt(f),
Op::Shl => " << ".fmt(f),
Op::Shr => " >> ".fmt(f),
Op::And => " & ".fmt(f),
Op::Xor => " ^ ".fmt(f),
Op::Or => " | ".fmt(f),
Op::Lt => " < ".fmt(f),
Op::Leq => " <= ".fmt(f),
Op::Eq => " == ".fmt(f),
Op::Neq => " != ".fmt(f),
Op::Geq => " >= ".fmt(f),
Op::Gt => " > ".fmt(f),
Op::LogAnd => " && ".fmt(f),
Op::LogXor => " ^^ ".fmt(f),
Op::LogOr => " || ".fmt(f),
Op::Set => " = ".fmt(f),
}
}
}
impl<A: Annotation> Display for MakeArm<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self(name, Some(body)) => write!(f, "{name}: {body}"),
Self(name, None) => write!(f, "{name}"),
}
}
}
impl<A: Annotation> Display for MatchArm<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(pats, expr) = self;
f.delimit("", fmt!(" => {expr}")).list(pats, " | ")
}
}
impl Display for Pat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Ignore => "_".fmt(f),
Self::Lit(literal) => literal.fmt(f),
Self::MetId(name) => name.fmt(f),
Self::Name(name) => name.fmt(f),
Self::Rest(Some(rest)) => write!(f, "..{rest}"),
Self::Rest(None) => write!(f, ".."),
Self::Tuple(pats) => f.delimit("(", ")").list(pats, ", "),
Self::Slice(pats) => f.delimit("[", "]").list(pats, ", "),
}
}
}
impl<A: Annotation> TryFrom<Expr<A>> for Pat {
type Error = Expr<A>;
fn try_from(value: Expr<A>) -> Result<Self, Self::Error> {
Ok(match value {
Expr::Id(name) if name == "_" => Self::Ignore,
Expr::Id(name) => Self::Name(name),
Expr::MetId(name) => Self::MetId(name),
Expr::Lit(literal) => Self::Lit(literal),
Expr::Op(Op::RangeEx, exprs) if exprs.is_empty() => Self::Rest(None),
Expr::Op(Op::RangeEx, mut exprs) if exprs.len() == 1 => {
Self::Rest(Some(Box::new(Self::try_from(exprs.remove(0))?)))
}
Expr::Op(Op::Tuple, exprs) => Self::Tuple(
exprs
.into_iter()
.map(Self::try_from)
.collect::<Result<_, _>>()?,
),
Expr::Op(Op::Array, exprs) => Self::Slice(
exprs
.into_iter()
.map(Self::try_from)
.collect::<Result<_, _>>()?,
),
other => Err(other)?,
})
}
}
impl<A: Annotation> TryFrom<Anno<Expr<A>, A>> for Pat {
type Error = Expr<A>;
fn try_from(value: Anno<Expr<A>, A>) -> Result<Self, Self::Error> {
Self::try_from(value.0)
}
}

260
src/ast/matcher.rs Normal file
View File

@ -0,0 +1,260 @@
//! Implements pattern matching
use super::*;
use std::collections::HashMap;
/// Stores a substitution from meta-identifiers to values
#[derive(Clone, Debug)]
pub struct Subst<A: Annotation> {
pub exp: HashMap<String, Expr<A>>,
pub pat: HashMap<String, Pat>,
}
impl<A: Annotation> Default for Subst<A> {
fn default() -> Self {
Self { exp: Default::default(), pat: Default::default() }
}
}
pub trait Match<A: Annotation> {
/// Applies a substitution rule from `pat` to `template` on `self`
fn apply_rule(&mut self, pat: &Self, template: &Self) -> bool
where Self: Sized + Clone {
let Some(sub) = self.construct(pat) else {
return false;
};
*self = template.clone();
self.apply(&sub);
true
}
/// With self as the pattern, recursively applies the Subst
fn apply(&mut self, sub: &Subst<A>);
/// Implements recursive Subst-building for Self
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool;
/// Constructs a Subst
fn construct(&self, pat: &Self) -> Option<Subst<A>> {
let mut sub = Subst::default();
Match::recurse(&mut sub, pat, self).then_some(sub)
}
/// Matches self against the provided pattern
fn match_with(&self, pat: &Self, sub: &mut Subst<A>) -> bool {
Match::recurse(sub, pat, self)
}
}
impl<M: Match<A> + Annotation, A: Annotation> Match<A> for Anno<M, A> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
Match::recurse(sub, &pat.0, &expr.0)
}
fn apply(&mut self, sub: &Subst<A>) {
self.0.apply(sub);
}
}
impl<A: Annotation> Match<A> for Expr<A> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
match (pat, expr) {
(Expr::MetId(name), _) if name == "_" => true,
(Expr::MetId(name), _) => sub
.exp
.insert(name.clone(), expr.clone())
.filter(|v| v != expr)
.is_none(),
(Expr::Id(pat), Expr::Id(expr)) => pat == expr,
(Expr::Id(_), _) => false,
(Expr::Lit(pat), Expr::Lit(expr)) => pat == expr,
(Expr::Lit(_), _) => false,
(Expr::Let(pat_pat, pat_expr), Expr::Let(expr_pat, expr_expr)) => {
Match::recurse(sub, pat_pat, expr_pat) && Match::recurse(sub, pat_expr, expr_expr)
}
(Expr::Let(..), _) => false,
(Expr::Const(pat_pat, pat_expr), Expr::Const(expr_pat, expr_expr)) => {
Match::recurse(sub, pat_pat, expr_pat) && Match::recurse(sub, pat_expr, expr_expr)
}
(Expr::Const(..), _) => false,
(Expr::Make(pat, pat_arms), Expr::Make(expr, expr_arms)) => {
Match::recurse(sub, pat, expr) && Match::recurse(sub, pat_arms, expr_arms)
}
(Expr::Make(..), _) => false,
(Expr::Match(pat, pat_arms), Expr::Match(expr, expr_arms)) => {
Match::recurse(sub, pat, expr) && Match::recurse(sub, pat_arms, expr_arms)
}
(Expr::Match(..), _) => false,
(Expr::Fn(pat_pats, pat_expr), Expr::Fn(expr_pats, expr_expr)) => {
Match::recurse(sub, pat_pats, expr_pats) && Match::recurse(sub, pat_expr, expr_expr)
}
(Expr::Fn(..), _) => false,
(Expr::Op(pat_op, pat_exprs), Expr::Op(expr_op, expr_exprs)) => {
Match::recurse(sub, pat_op, expr_op) && Match::recurse(sub, pat_exprs, expr_exprs)
}
(Expr::Op(..), _) => false,
}
}
fn apply(&mut self, sub: &Subst<A>) {
match self {
Expr::MetId(id) => {
if let Some(expr) = sub.exp.get(id) {
*self = expr.clone()
}
}
Expr::Id(_) | Expr::Lit(_) => {}
Expr::Let(pat, expr) => {
pat.apply(sub);
expr.apply(sub);
}
Expr::Const(pat, expr) => {
pat.apply(sub);
expr.apply(sub);
}
Expr::Make(expr, make_arms) => {
expr.apply(sub);
make_arms.apply(sub);
}
Expr::Match(expr, match_arms) => {
expr.apply(sub);
match_arms.apply(sub);
}
Expr::Fn(pats, expr) => {
pats.apply(sub);
expr.apply(sub);
}
Expr::Op(op, exprs) => {
op.apply(sub);
exprs.apply(sub);
}
};
}
}
impl<A: Annotation> Match<A> for MakeArm<A> {
// TODO: order-independent matching for MakeArm specifically.
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
pat.0 == expr.0 && Match::recurse(sub, &pat.1, &expr.1)
}
fn apply(&mut self, sub: &Subst<A>) {
let Self(_, expr) = self;
expr.apply(sub);
}
}
impl<A: Annotation> Match<A> for MatchArm<A> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
Match::recurse(sub, &pat.0, &expr.0) && Match::recurse(sub, &pat.1, &expr.1)
}
fn apply(&mut self, sub: &Subst<A>) {
let Self(pats, expr) = self;
pats.apply(sub);
expr.apply(sub);
}
}
impl<A: Annotation> Match<A> for Pat {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
match (pat, expr) {
(Pat::MetId(name), _) if name == "_" => true,
(Pat::MetId(name), _) => sub
.pat
.insert(name.clone(), expr.clone())
.filter(|v| v != expr)
.is_none(),
(Pat::Ignore, Pat::Ignore) => true,
(Pat::Ignore, _) => false,
(Pat::Name(pat), Pat::Name(expr)) => pat == expr,
(Pat::Name(_), _) => false,
(Pat::Rest(pat), Pat::Rest(expr)) => Match::recurse(sub, pat, expr),
(Pat::Rest(_), _) => false,
(Pat::Lit(pat), Pat::Lit(expr)) => pat == expr,
(Pat::Lit(_), _) => false,
(Pat::Tuple(pat), Pat::Tuple(expr)) => Match::recurse(sub, pat, expr),
(Pat::Tuple(_), _) => false,
(Pat::Slice(pat), Pat::Slice(expr)) => Match::recurse(sub, pat, expr),
(Pat::Slice(_), _) => false,
}
}
fn apply(&mut self, sub: &Subst<A>) {
match self {
Pat::Ignore | Pat::Name(_) | Pat::Lit(_) => {}
Pat::MetId(id) => {
if let Some(expr) = sub.pat.get(id) {
*self = expr.clone()
}
}
Pat::Rest(pat) => pat.apply(sub),
Pat::Tuple(pats) => pats.apply(sub),
Pat::Slice(pats) => pats.apply(sub),
}
}
}
impl<A: Annotation> Match<A> for Op {
fn recurse(_: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
pat == expr
}
fn apply(&mut self, _sub: &Subst<A>) {}
}
impl<A: Annotation, T: Match<A>> Match<A> for [T] {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
if pat.len() != expr.len() {
return false;
}
for (pat, expr) in pat.iter().zip(expr.iter()) {
if !Match::recurse(sub, pat, expr) {
return false;
}
}
true
}
fn apply(&mut self, sub: &Subst<A>) {
for item in self {
item.apply(sub);
}
}
}
impl<A: Annotation, T: Match<A>> Match<A> for Box<T> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
Match::recurse(sub, pat.as_ref(), expr.as_ref())
}
fn apply(&mut self, sub: &Subst<A>) {
self.as_mut().apply(sub);
}
}
impl<A: Annotation, T: Match<A>> Match<A> for Vec<T> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
Match::recurse(sub, pat.as_slice(), expr.as_slice())
}
fn apply(&mut self, sub: &Subst<A>) {
self.as_mut_slice().apply(sub);
}
}
impl<A: Annotation, T: Match<A>> Match<A> for Option<T> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
match (pat, expr) {
(Some(pat), Some(expr)) => Match::recurse(sub, pat, expr),
(None, None) => true,
_ => false,
}
}
fn apply(&mut self, sub: &Subst<A>) {
self.as_mut_slice().apply(sub);
}
}

139
src/fmt.rs Normal file
View File

@ -0,0 +1,139 @@
//! The Conlang format extensions
use std::fmt::{Display, Write};
impl<W: Write + ?Sized> FmtAdapter for W {}
pub trait FmtAdapter: Write {
/// Indents by one level.
fn indent(&mut self) -> Indent<'_, Self> {
Indent::new(self, " ")
}
/// Pastes `indent` after each newline.
fn indent_with(&mut self, indent: &'static str) -> Indent<'_, Self> {
Indent::new(self, indent)
}
/// Delimits a section with `open` and `close`.
fn delimit<O: Display, E: Display>(&mut self, open: O, close: E) -> Delimit<'_, Self, E> {
Delimit::new(self, open, close)
}
/// Delimits a section with `open` and `close`, raising the indent level within.
fn delimit_indented<O: Display, E: Display>(
&mut self,
open: O,
close: E,
) -> DelimitIndent<'_, Self, E> {
DelimitIndent::new(self, open, close)
}
/// Formats bracketed lists of the kind (Item (Comma Item)*)?
#[inline]
fn list<Item: Display, Sep: Display>(&mut self, items: &[Item], sep: Sep) -> std::fmt::Result {
self.list_end(items, sep, "")
}
fn list_end<Item: Display, Sep: Display, End: Display>(
&mut self,
items: &[Item],
sep: Sep,
end: End,
) -> std::fmt::Result {
let mut pats = items;
while let [pat, rest @ ..] = pats {
write!(self, "{pat}")?;
if !rest.is_empty() {
write!(self, "{sep}")?;
}
pats = rest
}
write!(self, "{end}")
}
}
/// Pads text with leading indentation after every newline
pub struct Indent<'f, F: Write + ?Sized> {
indent: &'static str,
needs_indent: bool,
f: &'f mut F,
}
impl<'f, F: Write + ?Sized> Indent<'f, F> {
pub fn new(f: &'f mut F, indent: &'static str) -> Self {
Indent { f, needs_indent: false, indent }
}
}
impl<F: Write + ?Sized> Write for Indent<'_, F> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
for s in s.split_inclusive('\n') {
if self.needs_indent {
self.f.write_str(self.indent)?;
}
self.f.write_str(s)?;
self.needs_indent = s.ends_with('\n');
}
Ok(())
}
fn write_char(&mut self, c: char) -> std::fmt::Result {
if self.needs_indent {
self.f.write_str(" ")?;
}
self.needs_indent = c == '\n';
self.f.write_char(c)
}
}
/// Prints delimiters around anything formatted with this. Implies [Indent]
pub struct Delimit<'f, F: Write + ?Sized, E: Display = &'static str> {
f: &'f mut F,
close: E,
}
impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> {
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
let _ = write!(f, "{open}");
Self { f, close }
}
}
impl<F: Write + ?Sized, E: Display> Drop for Delimit<'_, F, E> {
fn drop(&mut self) {
let Self { f, close, .. } = self;
let _ = write!(f, "{close}");
}
}
impl<F: Write + ?Sized, E: Display> Write for Delimit<'_, F, E> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
self.f.write_str(s)
}
}
/// Prints delimiters around anything formatted with this. Implies [Indent]
pub struct DelimitIndent<'f, F: Write + ?Sized, E: Display = &'static str> {
f: Indent<'f, F>,
close: E,
}
impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> {
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
let mut f = f.indent();
let _ = write!(f, "{open}");
Self { f, close }
}
}
impl<F: Write + ?Sized, E: Display> Drop for DelimitIndent<'_, F, E> {
fn drop(&mut self) {
let Self { f: Indent { f, .. }, close, .. } = self;
let _ = write!(f, "{}", close);
}
}
impl<F: Write + ?Sized, E: Display> Write for DelimitIndent<'_, F, E> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
self.f.write_str(s)
}
}

323
src/lexer.rs Normal file
View File

@ -0,0 +1,323 @@
//! A lobster
use std::ops::Range;
#[allow(dead_code)]
use std::{iter::Peekable, str::CharIndices};
use unicode_ident::{is_xid_continue, is_xid_start};
use crate::{span::Span, token::*};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LexError {
pub pos: u32,
pub res: &'static str,
}
impl std::error::Error for LexError {}
impl std::fmt::Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { pos, res } = self;
write!(f, "{pos}: {res}")
}
}
#[derive(Clone, Debug)]
pub struct Lexer<'t> {
/// The source text
text: &'t str,
/// A peekable iterator over the source text
iter: Peekable<CharIndices<'t>>,
/// The start of the current token
head: u32,
/// The end of the current token
tail: u32,
}
impl<'t> Lexer<'t> {
/// Constructs a new Lexer from some text
pub fn new(text: &'t str) -> Self {
let iter = text.char_indices().peekable();
Self { text, iter, head: 0, tail: 0 }
}
/// Peeks the next character without advancing the lexer
pub fn peek(&mut self) -> Option<char> {
self.iter.peek().map(|&(_, c)| c)
}
fn advance_tail(&mut self) {
match self.iter.peek() {
Some(&(idx, _)) => self.tail = idx as u32,
None => {
self.tail = self.text.len() as _;
}
}
}
/// Takes the last character
pub fn take(&mut self) -> Option<char> {
let (_, c) = self.iter.next()?;
self.advance_tail();
Some(c)
}
pub fn next_if(&mut self, expected: char) -> Option<char> {
let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?;
self.advance_tail();
Some(c)
}
/// Consumes the last-peeked character, advancing the tail
pub fn consume(&mut self) -> &mut Self {
self.iter.next();
self.advance_tail();
self
}
/// Produces a LexError at the start of the current token
pub fn error(&self, res: &'static str) -> LexError {
LexError { pos: self.head, res }
}
/// Produces a Token
pub fn produce(&mut self, kind: TKind) -> Token {
self.advance_tail();
let span = Span(self.head, self.tail);
self.head = self.tail;
Token { lexeme: self.text[Range::from(span)].to_owned(), kind, span }
}
pub fn produce_with_lexeme(&mut self, kind: TKind, lexeme: String) -> Token {
self.advance_tail();
let span = Span(self.head, self.tail);
self.head = self.tail;
Token { lexeme, kind, span }
}
/// Consumes 0 or more whitespace
pub fn skip_whitespace(&mut self) -> &mut Self {
while self.peek().is_some_and(char::is_whitespace) {
let _ = self.consume();
}
self
}
pub fn start_token(&mut self) -> &mut Self {
self.head = self.tail;
self
}
/// Scans forward until it finds the next Token in the input
pub fn scan(&mut self) -> Result<Token, LexError> {
use TKind::*;
// !"#%&'()*+,-./:;<=>?@[\\]^`{|}~
let tok = match self
.skip_whitespace()
.start_token()
.peek()
.ok_or_else(|| self.error("EOF"))?
{
'!' => Bang,
'"' => return self.string(),
'#' => Hash,
'%' => Rem,
'&' => Amp,
'\'' => return self.character(),
'(' => LParen,
')' => RParen,
'*' => Star,
'+' => Plus,
',' => Comma,
'-' => Minus,
'.' => Dot,
'/' => Slash,
'0' => Integer,
'1'..='9' => return self.digits::<10>(),
':' => Colon,
';' => Semi,
'<' => Lt,
'=' => Eq,
'>' => Gt,
'?' => Question,
'@' => At,
'[' => LBrack,
'\\' => Backslash,
']' => RBrack,
'^' => Xor,
'`' => Grave,
'{' => LCurly,
'|' => Bar,
'}' => RCurly,
'~' => Tilde,
'_' => return self.identifier(),
c if is_xid_start(c) => return self.identifier(),
_ => Err(self.error("Invalid"))?,
};
// Handle digraphs
let tok = match (tok, self.consume().peek()) {
(Integer, Some('b')) => return self.consume().digits::<2>(),
(Integer, Some('d')) => return self.consume().digits::<10>(),
(Integer, Some('o')) => return self.consume().digits::<8>(),
(Integer, Some('x')) => return self.consume().digits::<16>(),
(Integer, Some('z')) => return self.consume().digits::<36>(),
(Integer, _) => return self.digits::<10>(),
(Amp, Some('&')) => AmpAmp,
(Amp, Some('=')) => AmpEq,
(Bang, Some('!')) => BangBang,
(Bang, Some('=')) => BangEq,
(Bar, Some('|')) => BarBar,
(Bar, Some('=')) => BarEq,
(Colon, Some(':')) => ColonColon,
(Dot, Some('.')) => DotDot,
(Eq, Some('=')) => EqEq,
(Eq, Some('>')) => FatArrow,
(Gt, Some('=')) => GtEq,
(Gt, Some('>')) => GtGt,
(Hash, Some('!')) => HashBang,
(Lt, Some('=')) => LtEq,
(Lt, Some('<')) => LtLt,
(Minus, Some('=')) => MinusEq,
(Minus, Some('>')) => Arrow,
(Plus, Some('=')) => PlusEq,
(Rem, Some('=')) => RemEq,
(Slash, Some('*')) => return Ok(self.block_comment()?.produce(Comment)),
(Slash, Some('=')) => SlashEq,
(Slash, Some('/')) => return self.line_comment(),
(Star, Some('=')) => StarEq,
(Xor, Some('=')) => XorEq,
(Xor, Some('^')) => XorXor,
_ => return Ok(self.produce(tok)),
};
// Handle trigraphs
let tok = match (tok, self.consume().peek()) {
(HashBang, Some('/')) => return self.line_comment(),
(DotDot, Some('=')) => DotDotEq,
(GtGt, Some('=')) => GtGtEq,
(LtLt, Some('=')) => LtLtEq,
_ => return Ok(self.produce(tok)),
};
Ok(self.consume().produce(tok))
}
pub fn line_comment(&mut self) -> Result<Token, LexError> {
while self.consume().peek().is_some_and(|c| c != '\n') {}
Ok(self.produce(TKind::Comment))
}
pub fn block_comment(&mut self) -> Result<&mut Self, LexError> {
self.consume();
while let Some(c) = self.take() {
match (c, self.peek()) {
('/', Some('*')) => self.block_comment()?,
('*', Some('/')) => return Ok(self.consume()),
_ => continue,
};
}
Err(self.error("Unterminated block comment"))
}
pub fn identifier(&mut self) -> Result<Token, LexError> {
while self.consume().peek().is_some_and(is_xid_continue) {}
let token = self.produce(TKind::Identifier);
Ok(Token {
kind: match token.lexeme.as_str() {
"break" => TKind::Break,
"const" => TKind::Const,
"do" => TKind::Do,
"else" => TKind::Else,
"false" => TKind::False,
"fn" => TKind::Fn,
"if" => TKind::If,
"let" => TKind::Let,
"loop" => TKind::Loop,
"macro" => TKind::Macro,
"match" => TKind::Match,
"return" => TKind::Return,
"then" => TKind::Do,
"true" => TKind::True,
"while" => TKind::While,
_ => token.kind,
},
..token
})
}
pub fn character(&mut self) -> Result<Token, LexError> {
let c = match self.consume().take() {
Some('\\') => self.escape()?,
Some(c) => c,
None => '\0',
};
if self.take().is_some_and(|c| c == '\'') {
Ok(self.produce_with_lexeme(TKind::Character, c.into()))
} else {
Err(self.error("Unterminated character"))
}
}
pub fn string(&mut self) -> Result<Token, LexError> {
let mut lexeme = String::new();
self.consume();
loop {
lexeme.push(match self.take() {
None => Err(self.error("Unterminated string"))?,
Some('\\') => self.escape()?,
Some('"') => break,
Some(c) => c,
})
}
lexeme.shrink_to_fit();
Ok(self.produce_with_lexeme(TKind::String, lexeme))
}
pub fn escape(&mut self) -> Result<char, LexError> {
Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
' ' => '\u{a0}',
'0' => '\0',
'a' => '\x07',
'b' => '\x08',
'e' => '\x1b',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => self.unicode_escape()?,
'x' => self.hex_escape()?,
c => c,
})
}
pub fn hex_escape(&mut self) -> Result<char, LexError> {
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
char::from_u32(out).ok_or(self.error("Invalid digit"))
}
pub fn unicode_escape(&mut self) -> Result<char, LexError> {
self.next_if('{')
.ok_or_else(|| self.error("No unicode escape opener"))?;
let mut out = 0;
while let Some(c) = self.take() {
if c == '}' {
return char::from_u32(out).ok_or_else(|| self.error("Bad unicode value"));
}
out = out * 16 + c.to_digit(16).ok_or_else(|| self.error("Invalid digit"))?;
}
Err(self.error("Unterminated unicode escape"))
}
pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
while self.peek().is_some_and(|c| c.is_digit(BASE)) {
self.consume();
}
Ok(self.produce(TKind::Integer))
}
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
Ok(digit)
} else {
Err(self.error("Invalid digit"))
}
}
}

78
src/lib.rs Normal file
View File

@ -0,0 +1,78 @@
//! The Dough Programming Language
//!
//! A simpler programming language
pub mod fmt;
pub mod span;
pub mod token;
pub mod lexer;
pub mod ast;
pub mod parser;
pub mod typed_ast {
//! The Typed AST defines an interface between the type checker and code generator
use crate::span::Span;
use std::collections::HashMap;
pub struct Table {
/// Fully qualified names, for debugging
pub names: Vec<String>,
/// The unprojected relative stack offset
pub local: HashMap<usize, isize>,
}
/// DefID annotation
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Defn {
pub span: Span,
/// The index of this name in the associated Table
pub defid: usize,
}
}
pub mod typeck {}
pub mod ir {
//! The IR defines an interface between the code generator and interpreter(?)
}
pub mod interpreter {
//! The Doughlang interpreter interprets an AST
use std::sync::{Arc, Mutex};
#[derive(Clone, Debug)]
pub enum Value {
Bool(bool),
ISize(isize),
Adt(Arc<Mutex<Adt>>),
}
impl Value {
pub fn cast(self, as_type: &str) -> Self {
match (self, as_type) {
(Self::ISize(v), "isize") => Self::ISize(v), // redundant cast
(v, _) => v, // invalid cast!
}
}
}
pub enum Adt {
Array(Vec<Value>),
Tuple(Vec<Value>),
}
impl std::fmt::Debug for Adt {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Array(elem) => f.debug_list().entries(elem).finish(),
Self::Tuple(elem) => f.debug_list().entries(elem).finish(),
}
}
}
}

121
src/main.rs Normal file
View File

@ -0,0 +1,121 @@
//! Tests the lexer
#[allow(unused_imports)]
use doughlang::{
ast::{
Expr,
matcher::{Match, Subst},
},
lexer::{LexError, Lexer},
parser::{ParseError, Parser},
span::Span,
token::{TKind, Token},
};
use repline::prebaked::*;
use std::{
error::Error,
io::{IsTerminal, stdin},
};
fn main() -> Result<(), Box<dyn Error>> {
if stdin().is_terminal() {
read_and("\x1b[32m", " >", "?>", |line| match line.trim_end() {
"" => Ok(Response::Continue),
"exit" => Ok(Response::Break),
"clear" => {
print!("\x1b[H\x1b[2J");
Ok(Response::Deny)
}
"pat" => {
if let Err(e) = subst() {
println!("\x1b[31m{e}\x1b[0m");
}
Ok(Response::Deny)
}
_ => {
parse(line);
Ok(Response::Accept)
}
})?;
} else {
let doc = std::io::read_to_string(stdin())?;
lex(&doc);
parse(&doc);
}
Ok(())
}
fn lex(document: &str) {
let mut lexer = Lexer::new(document);
loop {
match lexer.scan() {
Ok(Token { lexeme, kind, span: Span { head, tail } }) => {
println!(
"{kind:?}\x1b[11G {head:<4} {tail:<4} {}",
lexeme.escape_debug()
)
}
Err(e) => {
eprintln!("{e}");
break;
}
}
}
}
fn subst() -> Result<(), Box<dyn Error>> {
let mut rl = repline::Repline::new("\x1b[35mexp", " >", "?>");
let exp = rl.read()?;
let mut exp: Expr = Parser::new(Lexer::new(&exp)).parse(0)?;
println!("\x1b[G\x1b[J{exp}");
rl.accept();
loop {
rl.set_color("\x1b[36mpat");
let pat = rl.read()?;
rl.accept();
print!("\x1b[G\x1b[J");
let mut p = Parser::new(Lexer::new(&pat));
let Ok(pat) = p.parse::<Expr>(0) else {
println!("{exp}");
continue;
};
if p.next_if(TKind::Colon).is_err() {
let Some(Subst { exp, pat }) = exp.construct(&pat) else {
continue;
};
for (name, pat) in pat.iter() {
println!("{name}: {pat}")
}
for (name, expr) in exp.iter() {
println!("{name}: {expr}")
}
continue;
}
let sub: Expr = p.parse(0)?;
if exp.apply_rule(&pat, &sub) {
println!("{exp}");
} else {
println!("No match: {pat} in {exp}\n")
}
}
}
fn parse(document: &str) {
let mut parser = Parser::new(Lexer::new(document));
loop {
match parser.parse::<Expr>(0) {
// Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break,
Err(e) => {
println!("\x1b[31m{e}\x1b[0m");
break;
}
Ok(v) => {
println!("{v}");
}
}
}
}

516
src/parser.rs Normal file
View File

@ -0,0 +1,516 @@
//! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes.
use crate::{
ast::*,
lexer::{LexError, Lexer},
span::Span,
token::{TKind, Token},
};
use std::{error::Error, fmt::Display, vec};
pub mod numeric;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseError {
FromLexer(LexError),
Expected(TKind, Span),
NotPattern(TKind, Span),
NotPrefix(TKind, Span),
NotInfix(TKind, Span),
NotPostfix(TKind, Span),
}
impl Error for ParseError {}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::FromLexer(e) => e.fmt(f),
Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."),
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."),
Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."),
Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."),
}
}
}
pub type PResult<T> = Result<T, ParseError>;
#[derive(Debug)]
pub struct Parser<'t> {
pub lexer: Lexer<'t>,
pub next_tok: Option<Token>,
pub last_loc: Span,
}
impl<'t> Parser<'t> {
/// Constructs a new Parser
pub fn new(lexer: Lexer<'t>) -> Self {
Self { lexer, next_tok: None, last_loc: Span::default() }
}
/// The identity function. This exists to make production chaining easier.
pub fn then<T>(&self, t: T) -> T {
t
}
pub fn span(&self) -> Span {
self.last_loc
}
/// Parses a value that implements the [Parse] trait.
pub fn parse<T: Parse<'t>>(&mut self, level: usize) -> PResult<T> {
Parse::parse(self, level)
}
/// Peeks the next [Token]. Returns [ParseError::FromLexer] on lexer error.
pub fn peek(&mut self) -> PResult<&Token> {
let next_tok = match self.next_tok.take() {
Some(tok) => tok,
None => match self.lexer.scan() {
Ok(tok) => tok,
Err(e) => Err(ParseError::FromLexer(e))?,
},
};
self.last_loc = next_tok.span;
self.next_tok = Some(next_tok);
Ok(self.next_tok.as_ref().expect("should have token"))
}
/// Peeks the next token if it matches the `expected` [TKind]
pub fn peek_if(&mut self, expected: TKind) -> Option<&Token> {
self.peek().into_iter().find(|tok| tok.kind == expected)
}
/// Consumes and returns the currently-peeked [Token].
pub fn take(&mut self) -> Option<Token> {
self.next_tok.take()
}
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
pub fn take_lexeme(&mut self) -> Option<String> {
self.take().map(|tok| tok.lexeme)
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> PResult<Token> {
self.peek()?;
Ok(self.take().expect("should have token here"))
}
/// Consumes and returns the next [Token] if it matches the `expected` [TKind]
pub fn next_if(&mut self, expected: TKind) -> PResult<Token> {
let token = self.peek()?;
if token.kind == expected {
Ok(self.take().expect("should have token here"))
} else {
Err(ParseError::Expected(expected, token.span))
}
}
/// Parses a list of P separated by `sep` tokens, ending in an `end` token.
/// ```nobnf
/// List<T> = (T `sep`)* T? `end` ;
/// ```
pub fn list<P: Parse<'t>>(
&mut self,
mut elems: Vec<P>,
sep: TKind,
end: TKind,
) -> PResult<Vec<P>> {
while self.peek_if(end).is_none() {
elems.push(self.parse(0)?);
if self.next_if(sep).is_err() {
break;
}
}
self.next_if(end)?;
Ok(elems)
}
/// Parses into an [`Option<P>`] if the next token is `next`
pub fn opt_if<P: Parse<'t>>(&mut self, level: usize, next: TKind) -> PResult<Option<P>> {
Ok(match self.next_if(next) {
Ok(_) => Some(self.parse(level)?),
Err(_) => None,
})
}
/// Parses an expression into a vec unless the next token is `end`
pub fn opt<P: Parse<'t>>(&mut self, level: usize, end: TKind) -> PResult<Option<P>> {
let out = match self.peek_if(end) {
None => Some(self.parse(level)?),
Some(_) => None,
};
self.next_if(end)?;
Ok(out)
}
/// Consumes the currently peeked token without returning it.
pub fn consume(&mut self) -> &mut Self {
self.next_tok = None;
self
}
}
pub trait Parse<'t> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
where Self: Sized;
}
impl<'t> Parse<'t> for Literal {
fn parse(p: &mut Parser<'t>, _level: usize) -> PResult<Self> {
let tok = p.peek()?;
Ok(match tok.kind {
TKind::True => p.consume().then(Literal::Bool(true)),
TKind::False => p.consume().then(Literal::Bool(false)),
TKind::Character => {
Literal::Char(p.take_lexeme().expect("should have Token").remove(0))
}
TKind::Integer => {
let Token { lexeme, kind: _, span } = p.take().expect("should have Token");
// TODO: more complex int parsing
let int = lexeme
.parse()
.map_err(|_| ParseError::Expected(TKind::Integer, span))?;
Literal::Int(int)
}
TKind::String => Literal::Str(p.take_lexeme().expect("should have Token")),
_ => Err(ParseError::Expected(TKind::Integer, tok.span))?,
})
}
}
impl<'t> Parse<'t> for Pat {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
let tok = p.peek()?;
match tok.kind {
TKind::Comment => p.consume().parse(level),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
Ok(Pat::Lit(p.parse(0)?))
}
TKind::Identifier => match tok.lexeme.as_str() {
"_" => Ok(p.consume().then(Pat::Ignore)),
_ => Ok(Pat::Name(p.take_lexeme().expect("should have Token"))),
},
TKind::Grave => Ok(Pat::MetId(p.consume().next_if(TKind::Identifier)?.lexeme)),
TKind::DotDot => Ok(Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
Some(_) => Some(p.parse(level)?),
None => None,
})),
TKind::LParen => Ok(Pat::Tuple(p.consume().list(
vec![],
TKind::Comma,
TKind::RParen,
)?)),
TKind::LBrack => Ok(Pat::Slice(p.consume().list(
vec![],
TKind::Comma,
TKind::RBrack,
)?)),
_ => Err(ParseError::NotPattern(tok.kind, tok.span)),
}
}
}
impl<'t> Parse<'t> for MatchArm {
fn parse(p: &mut Parser<'t>, _level: usize) -> PResult<Self> {
p.next_if(TKind::Bar).ok();
Ok(MatchArm(
p.list(vec![], TKind::Bar, TKind::FatArrow)?,
p.parse(0)?,
))
}
}
impl<'t> Parse<'t> for MakeArm {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
Ok(MakeArm(p.next_if(TKind::Identifier)?.lexeme, {
p.next_if(TKind::Colon)
.ok()
.map(|_| p.parse(level))
.transpose()?
}))
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum Prec {
Min,
Do,
Assign,
Tuple,
Make,
Body,
Logical,
LogOr,
LogAnd,
Compare,
Range,
Binary,
Shift,
Factor,
Term,
Project,
Unary,
Extend,
Max,
}
impl Prec {
pub const MIN: usize = Prec::Min.value();
pub const fn value(self) -> usize {
self as usize * 2
}
pub const fn prev(self) -> usize {
match self {
Self::Assign => self.value() + 1,
_ => self.value(),
}
}
pub const fn next(self) -> usize {
match self {
Self::Assign => self.value(),
_ => self.value() + 1,
}
}
}
fn from_prefix(token: &Token) -> PResult<(Op, Prec)> {
Ok(match token.kind {
TKind::Do => (Op::Do, Prec::Do),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
(Op::Lit, Prec::Max)
}
TKind::Identifier => (Op::Id, Prec::Max),
TKind::Grave => (Op::Mid, Prec::Max),
TKind::Fn => (Op::Fn, Prec::Body),
TKind::Match => (Op::Match, Prec::Body),
TKind::Macro => (Op::Macro, Prec::Assign),
TKind::Let => (Op::Let, Prec::Body),
TKind::Const => (Op::Const, Prec::Body),
TKind::Loop => (Op::Loop, Prec::Body),
TKind::If => (Op::If, Prec::Body),
TKind::While => (Op::While, Prec::Body),
TKind::Break => (Op::Break, Prec::Body),
TKind::Return => (Op::Return, Prec::Body),
TKind::LBrack => (Op::Array, Prec::Min),
TKind::RBrack => (Op::End, Prec::Min),
TKind::LCurly => (Op::Block, Prec::Min),
TKind::RCurly => (Op::End, Prec::Min),
TKind::LParen => (Op::Group, Prec::Min),
TKind::RParen => (Op::End, Prec::Min),
TKind::Amp => (Op::Refer, Prec::Max),
// TKind::AmpAmp => todo!("addraddr"),
TKind::Bang => (Op::Not, Prec::Unary),
TKind::BangBang => (Op::Identity, Prec::Unary),
TKind::Bar => (Op::Lambda, Prec::Min),
TKind::BarBar => (Op::Lambda, Prec::Max),
TKind::DotDot => (Op::RangeEx, Prec::Range),
TKind::DotDotEq => (Op::RangeIn, Prec::Range),
TKind::Minus => (Op::Neg, Prec::Unary),
TKind::Plus => (Op::Identity, Prec::Unary),
TKind::Star => (Op::Deref, Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?,
})
}
fn from_infix(token: &Token) -> PResult<(Op, Prec)> {
Ok(match token.kind {
TKind::Semi => (Op::Do, Prec::Do), // the inspiration
TKind::RParen => (Op::End, Prec::Do),
TKind::Comma => (Op::Tuple, Prec::Tuple),
TKind::Eq => (Op::Set, Prec::Assign),
TKind::XorXor => (Op::LogXor, Prec::Logical),
TKind::AmpAmp => (Op::LogAnd, Prec::LogAnd),
TKind::BarBar => (Op::LogOr, Prec::LogOr),
TKind::Lt => (Op::Lt, Prec::Compare),
TKind::LtEq => (Op::Leq, Prec::Compare),
TKind::EqEq => (Op::Eq, Prec::Compare),
TKind::BangEq => (Op::Neq, Prec::Compare),
TKind::GtEq => (Op::Geq, Prec::Compare),
TKind::Gt => (Op::Gt, Prec::Compare),
TKind::DotDot => (Op::RangeEx, Prec::Range),
TKind::DotDotEq => (Op::RangeIn, Prec::Range),
TKind::Amp => (Op::And, Prec::Binary),
TKind::Xor => (Op::Xor, Prec::Binary),
TKind::Bar => (Op::Or, Prec::Binary),
TKind::LtLt => (Op::Shl, Prec::Shift),
TKind::GtGt => (Op::Shr, Prec::Shift),
TKind::Plus => (Op::Add, Prec::Factor),
TKind::Minus => (Op::Sub, Prec::Factor),
TKind::Star => (Op::Mul, Prec::Term),
TKind::Slash => (Op::Div, Prec::Term),
TKind::Rem => (Op::Rem, Prec::Term),
TKind::Dot => (Op::Dot, Prec::Project),
TKind::ColonColon => (Op::Path, Prec::Max),
kind => Err(ParseError::NotInfix(kind, token.span))?,
})
}
fn from_postfix(token: &Token) -> PResult<(Op, Prec)> {
Ok(match token.kind {
TKind::Question => (Op::Try, Prec::Unary),
TKind::LParen => (Op::Call, Prec::Extend),
TKind::LBrack => (Op::Index, Prec::Extend),
TKind::LCurly => (Op::Make, Prec::Make),
kind => Err(ParseError::NotPostfix(kind, token.span))?,
})
}
#[rustfmt::skip]
fn should_coagulate(prev: Op, op: Op) -> bool {
prev == op && (match prev {
Op::Do => true,
Op::Tuple => true,
Op::Dot => false,
Op::Path => true,
Op::Lt => false,
Op::Leq => false,
Op::Eq => false,
Op::Neq => false,
Op::Geq => false,
Op::Gt => false,
_ => false,
})
}
impl<'t> Parse<'t> for Expr {
/// Parses an [Expr]ession.
///
/// The `level` parameter indicates the operator binding level of the expression.
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
const MIN: usize = Prec::MIN;
while p.next_if(TKind::Comment).is_ok() {}
// Prefix
let tok = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, tok.span);
let mut head = match op {
// Empty is returned when a block finisher is an expr prefix.
// It's the only expr that doesn't consume.
Op::End if level == Prec::Do.next() => Expr::Op(Op::Tuple, vec![]),
Op::End => Err(ParseError::NotPrefix(tok.kind, span))?,
Op::Id => Expr::Id(p.take_lexeme().expect("should have ident")),
Op::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme),
Op::Lit => Expr::Lit(p.parse(MIN)?),
Op::Let => Expr::Let(p.consume().parse(MIN)?, p.opt_if(prec.next(), TKind::Eq)?),
Op::Const => Expr::Const(p.consume().parse(prec.next())?, {
p.next_if(TKind::Eq)?;
p.parse(prec.next())?
}),
Op::Macro => Expr::Op(
op,
vec![p.consume().parse(prec.next())?, {
p.next_if(TKind::FatArrow)?;
p.parse(prec.next())?
}],
),
Op::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, {
p.next_if(TKind::LCurly)?;
p.list(vec![], TKind::Comma, TKind::RCurly)?
}),
Op::Block => Expr::Op(
op,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Op::Array => Expr::Op(op, p.consume().list(vec![], TKind::Comma, TKind::RBrack)?),
Op::Group => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
Op::If | Op::While => {
p.consume();
let exprs = vec![
// conditional restricted to Logical operators or above
p.parse(Prec::Logical.value())?,
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?,
_ => Expr::Op(Op::End, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Op::Fn => {
p.consume().next_if(TKind::LParen)?;
Expr::Fn(
p.list(vec![], TKind::Comma, TKind::RParen)?,
p.parse(prec.next())?,
)
}
// dirty hack: There are two closure operators, signaled by returned prec.
Op::Lambda if prec == Prec::Min => Expr::Fn(
p.consume().list(vec![], TKind::Comma, TKind::Bar)?,
p.parse(Prec::Body.next())?,
),
Op::Lambda => Expr::Fn(vec![], p.consume().parse(Prec::Body.next())?),
_ => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
};
// Postfix
while let Ok(tok) = p.peek()
&& let Ok((op, prec)) = from_postfix(tok)
&& level <= prec.prev()
&& op != Op::End
{
let span = span.merge(p.span());
p.consume();
head = match op {
Op::Make => Expr::Make(
head.anno(span).into(),
p.consume().list(vec![], TKind::Comma, TKind::RCurly)?,
),
Op::Index => Expr::Op(
op,
p.list(vec![head.anno(span)], TKind::Comma, TKind::RBrack)?,
),
Op::Call => Expr::Op(
op,
p.list(vec![head.anno(span)], TKind::Comma, TKind::RParen)?,
),
_ => Expr::Op(op, vec![head.anno(span)]),
};
}
// Infix
while let Ok(tok) = p.peek()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Op::End
{
let span = span.merge(p.span());
p.consume();
head = match head {
// controls expression chaining vs coagulating
Expr::Op(prev, mut args) if should_coagulate(prev, op) => {
args.push(p.parse(prec.next())?);
Expr::Op(op, args)
}
head => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]),
}
}
Ok(head)
}
}
impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno<P> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
where Self: Sized {
let start = p.span();
Ok(Anno(p.parse(level)?, start.merge(p.span())))
}
}
impl<'t, P: Parse<'t>> Parse<'t> for Box<P> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
where Self: Sized {
Ok(Box::new(p.parse(level)?))
}
}

1
src/parser/numeric.rs Normal file
View File

@ -0,0 +1 @@

42
src/span.rs Normal file
View File

@ -0,0 +1,42 @@
use std::ops::Range;
/// Stores the start and end byte position
#[derive(Clone, Copy, Default, PartialEq, Eq)]
pub struct Span {
pub head: u32,
pub tail: u32,
}
impl std::fmt::Debug for Span {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { head, tail } = self;
write!(f, "[{head}:{tail}]")
}
}
#[allow(non_snake_case)]
/// Stores the start and end byte position
pub fn Span(head: u32, tail: u32) -> Span {
Span { head, tail }
}
impl Span {
/// Updates `self` to include all but the last byte in `other`
pub fn merge(self, other: Span) -> Span {
Span { head: self.head.min(other.head), tail: self.tail.max(other.head) }
}
}
impl From<Span> for Range<usize> {
fn from(value: Span) -> Self {
let Span { head, tail } = value;
(head as usize)..(tail as usize)
}
}
impl std::fmt::Display for Span {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { head, tail } = self;
write!(f, "{head}:{tail}")
}
}

88
src/token.rs Normal file
View File

@ -0,0 +1,88 @@
//! The Token defines an interface between lexer and parser
use crate::span::Span;
#[derive(Clone, Debug)]
pub struct Token {
pub lexeme: String,
pub kind: TKind,
pub span: Span,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TKind {
Comment,
Break,
Const,
Do,
Else,
False,
Fn,
If,
Let,
Loop,
Macro,
Match,
Return,
True,
While,
Identifier, // or Keyword
Character,
String,
Integer, // 0(x[0-9A-Fa-f]* | d[0-9]* | o[0-7]* | b[0-1]*) | [1-9][0-9]*
LCurly, // {
RCurly, // }
LBrack, // [
RBrack, // ]
LParen, // (
RParen, // )
Amp, // &
AmpAmp, // &&
AmpEq, // &=
Arrow, // ->
At, // @
Backslash, // \
Bang, // !
BangBang, // !!
BangEq, // !=
Bar, // |
BarBar, // ||
BarEq, // |=
Colon, // :
ColonColon, // ::
Comma, // ,
Dot, // .
DotDot, // ..
DotDotEq, // ..=
Eq, // =
EqEq, // ==
FatArrow, // =>
Grave, // `
Gt, // >
GtEq, // >=
GtGt, // >>
GtGtEq, // >>=
Hash, // #
HashBang, // #!
Lt, // <
LtEq, // <=
LtLt, // <<
LtLtEq, // <<=
Minus, // -
MinusEq, // -=
Plus, // +
PlusEq, // +=
Question, // ?
Rem, // %
RemEq, // %=
Semi, // ;
Slash, // /
SlashEq, // /=
Star, // *
StarEq, // *=
Tilde, // ~
Xor, // ^
XorEq, // ^=
XorXor, // ^^
}