Initial Commit

This commit is contained in:
2025-08-28 02:26:06 -04:00
committed by Val
commit c83218d750
17 changed files with 2276 additions and 0 deletions

384
src/ast.rs Normal file
View File

@@ -0,0 +1,384 @@
//! The Abstract Syntax Tree defines an interface between the parser and type checker
pub mod matcher;
/// A value with an annotation.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Anno<T: Annotation, A: Annotation = Span>(pub T, pub A);
/// An annotation: extra data added on to important AST nodes.
pub trait Annotation: Clone + std::fmt::Display + std::fmt::Debug + PartialEq + Eq {}
impl<T: Clone + std::fmt::Debug + std::fmt::Display + PartialEq + Eq> Annotation for T {}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
/// A boolean literal: true | false
Bool(bool),
/// A character literal: 'a', '\u{1f988}'
Char(char),
/// An integer literal: 0, 123, 0x10
Int(i128),
/// A string literal:
Str(String),
}
/// Binding patterns for each kind of matchable [Ty]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pat {
Ignore,
MetId(String),
Name(String),
Rest(Option<Box<Pat>>),
Lit(Literal),
Tuple(Vec<Pat>),
Slice(Vec<Pat>),
}
/// The arms of a make expression
/// ```ignore
/// Identifier (':' Expr)?
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MakeArm<A: Annotation = Span>(pub String, pub Option<Anno<Expr<A>, A>>);
/// The arms of a match expression
/// ```ignore
/// (Pat |)* Pat? => Expr
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MatchArm<A: Annotation = Span>(pub Vec<Pat>, pub Anno<Expr<A>, A>);
/// In-universe types
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Ty {
/// `_`
Infer,
/// `(..Tys)`
Tuple(Vec<Ty>),
/// `[Ty]`
Slice(Box<Ty>),
/// `[Ty; _]`
Array(Box<Ty>, usize),
/// `[Rety, ..Args]`
Fn(Vec<Ty>),
}
/// Expressions: The beating heart of Dough
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Expr<A: Annotation = Span> {
/// An identifier
Id(String),
/// A meta-identifier
MetId(String),
/// A literal bool, string, char, or int
Lit(Literal),
/// let pattern = expr
Let(Pat, Option<Box<Anno<Self, A>>>),
/// `const Pat (= Expr)?` (Basically let rec)
Const(Pat, Box<Anno<Self, A>>),
/// `| Pat,* | Expr` | `|| Expr` | `fn (Pat,*) Expr`
Fn(Vec<Pat>, Box<Anno<Self, A>>),
/// Expr { (Ident (: Expr)?),* }
Make(Box<Anno<Self, A>>, Vec<MakeArm<A>>),
/// match Expr { MatchArm,* }
Match(Box<Anno<Self, A>>, Vec<MatchArm<A>>),
/// Op Expr | Expr Op | Expr (Op Expr)+ | Op Expr Expr else Expr
Op(Op, Vec<Anno<Self, A>>),
}
impl<A: Annotation> Expr<A> {
pub fn anno(self, annotation: A) -> Anno<Expr<A>, A> {
Anno(self, annotation)
}
pub fn is_place(&self) -> bool {
matches!(
self,
Self::Id(_)
| Self::Op(Op::Index, _)
| Self::Op(Op::Dot, _)
| Self::Op(Op::Path, _)
| Self::Op(Op::Deref, _)
)
}
// pub fn is_assignee(&self) -> bool {
// match self {
// Self::Id(_) => todo!(),
// Self::MetId(_) => todo!(),
// Self::Lit(literal) => todo!(),
// Self::Let(pat, anno) => todo!(),
// Self::Const(pat, anno) => todo!(),
// Self::Fn(pats, anno) => todo!(),
// Self::Make(anno, make_arms) => todo!(),
// Self::Match(anno, match_arms) => todo!(),
// Self::Op(Op::Add, annos) => todo!(),
// Self::Op(Op::And, _) => false,
// }
// }
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Op {
// -- fake operators used to assign precedences to special forms
Id, // Identifier
Mid, // MetaIdentifier
Lit, // Literal
Let, // let Pat = Expr
Const, // const Pat = Expr
Fn, // fn ( Pat,* ) Expr
Make, // Expr{ Expr,* }
Macro, // macro Expr => Expr
Match, // match Expr { MatchArm,* }
End, // Produces an empty value.
// -- true operators
Do, // Expr ; Expr
Block, // { Expr }
Array, // [ Expr,* ]
Group, // ( Expr ,?)
Tuple, // ( Expr,* )
Try, // Expr '?'
Index, // Expr [ Expr,* ]
Call, // Expr ( Expr,* )
Lambda, // |Pat?| Expr
Loop, // loop Expr
If, // if Expr Expr (else Expr)?
While, // while Expr Expr (else Expr)?
Break, // break Expr
Return, // return Expr
Dot, // Expr . Expr
Path, // Expr :: Expr
RangeEx, // Expr? ..Expr
RangeIn, // Expr? ..=Expr
Neg, // -Expr
Not, // !Expr
Identity, // !!Expr
Refer, // &Expr
Deref, // *Expr
Mul, // Expr * Expr
Div, // Expr / Expr
Rem, // Expr % Expr
Add, // Expr + Expr
Sub, // Expr - Expr
Shl, // Expr << Expr
Shr, // Expr >> Expr
And, // Expr & Expr
Xor, // Expr ^ Expr
Or, // Expr | Expr
Lt, // Expr < Expr
Leq, // Expr <= Expr
Eq, // Expr == Expr
Neq, // Expr != Expr
Geq, // Expr >= Expr
Gt, // Expr > Expr
LogAnd, // Expr && Expr
LogXor, // Expr ^^ Expr
LogOr, // Expr || Expr
Set, // Expr = Expr
}
use crate::{fmt::FmtAdapter, span::Span};
use std::{fmt::Display, format_args as fmt};
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Bool(v) => v.fmt(f),
Self::Char(c) => write!(f, "'{}'", c.escape_debug()),
Self::Int(i) => i.fmt(f),
Self::Str(s) => write!(f, "\"{}\"", s.escape_debug()),
}
}
}
impl<T: Display + Annotation, A: Annotation> Display for Anno<T, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl<A: Annotation> Display for Expr<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Id(id) => id.fmt(f),
Self::MetId(id) => write!(f, "`{id}"),
Self::Lit(literal) => literal.fmt(f),
Self::Let(pat, Some(expr)) => write!(f, "let {pat} = {expr}"),
Self::Let(pat, None) => write!(f, "let {pat}"),
Self::Const(pat, expr) => write!(f, "const {pat} = {expr}"),
Self::Make(expr, make_arms) => {
f.delimit(fmt!("make {expr} {{"), "}").list(make_arms, ", ")
}
Self::Match(expr, match_arms) => f
.delimit_indented(fmt!("match {expr} {{\n"), "\n}")
.list_end(match_arms, ",\n", ","),
Self::Fn(pats, expr) => f.delimit("fn (", fmt!(") {expr}")).list(pats, ", "),
Self::Op(op @ (Op::If | Op::While), exprs) => match exprs.as_slice() {
[cond, pass, fail] => write!(f, "{op}{cond} {pass} else {fail}"),
other => f.delimit(fmt!("({op}, "), ")").list(other, ", "),
},
Self::Op(Op::Array, exprs) => f.delimit("[", "]").list(exprs, ", "),
Self::Op(Op::Block, exprs) => f.delimit_indented("{\n", "\n}").list(exprs, ", "),
Self::Op(Op::Tuple, exprs) => f.delimit("(", ")").list(exprs, ", "),
Self::Op(op @ Op::Call, exprs) => match exprs.as_slice() {
[callee, args @ ..] => f.delimit(fmt!("{callee}("), ")").list(args, ", "),
[] => write!(f, "{op}"),
},
Self::Op(op @ Op::Index, exprs) => match exprs.as_slice() {
[callee, args @ ..] => f.delimit(fmt!("{callee}["), "]").list(args, ", "),
[] => write!(f, "{op}"),
},
Self::Op(Op::Do, exprs) => f.list(exprs, ";\n"),
Self::Op(op @ Op::Macro, exprs) => f.delimit(op, "").list(exprs, " => "),
Self::Op(op @ Op::Try, exprs) => f.delimit("", op).list(exprs, ", "),
Self::Op(op, exprs) => match exprs.as_slice() {
[_] => f.delimit(op, "").list(exprs, ", "),
many => f.delimit("(", ")").list(many, op),
},
}
}
}
impl Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Op::Do => "; ".fmt(f),
Op::Id => "_".fmt(f),
Op::Mid => "`".fmt(f),
Op::Lit => "##".fmt(f),
Op::Let => "let ".fmt(f),
Op::Const => "const ".fmt(f),
Op::Fn => "fn ".fmt(f),
Op::Macro => "macro ".fmt(f),
Op::Match => "match ".fmt(f),
Op::End => "()".fmt(f),
Op::Block => "{}".fmt(f),
Op::Array => "[]".fmt(f),
Op::Group => "()".fmt(f),
Op::Tuple => "()".fmt(f),
Op::Try => "?".fmt(f),
Op::Index => "".fmt(f),
Op::Call => "".fmt(f),
Op::Make => "".fmt(f),
Op::Lambda => "".fmt(f),
Op::Loop => "loop ".fmt(f),
Op::If => "if ".fmt(f),
Op::While => "while ".fmt(f),
Op::Break => "break ".fmt(f),
Op::Return => "return ".fmt(f),
Op::Dot => ".".fmt(f),
Op::Path => "::".fmt(f),
Op::RangeEx => " .. ".fmt(f),
Op::RangeIn => " ..= ".fmt(f),
Op::Neg => "-".fmt(f),
Op::Not => "!".fmt(f),
Op::Identity => "!!".fmt(f),
Op::Refer => "&".fmt(f),
Op::Deref => "*".fmt(f),
Op::Mul => " * ".fmt(f),
Op::Div => " / ".fmt(f),
Op::Rem => " % ".fmt(f),
Op::Add => " + ".fmt(f),
Op::Sub => " - ".fmt(f),
Op::Shl => " << ".fmt(f),
Op::Shr => " >> ".fmt(f),
Op::And => " & ".fmt(f),
Op::Xor => " ^ ".fmt(f),
Op::Or => " | ".fmt(f),
Op::Lt => " < ".fmt(f),
Op::Leq => " <= ".fmt(f),
Op::Eq => " == ".fmt(f),
Op::Neq => " != ".fmt(f),
Op::Geq => " >= ".fmt(f),
Op::Gt => " > ".fmt(f),
Op::LogAnd => " && ".fmt(f),
Op::LogXor => " ^^ ".fmt(f),
Op::LogOr => " || ".fmt(f),
Op::Set => " = ".fmt(f),
}
}
}
impl<A: Annotation> Display for MakeArm<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self(name, Some(body)) => write!(f, "{name}: {body}"),
Self(name, None) => write!(f, "{name}"),
}
}
}
impl<A: Annotation> Display for MatchArm<A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self(pats, expr) = self;
f.delimit("", fmt!(" => {expr}")).list(pats, " | ")
}
}
impl Display for Pat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Ignore => "_".fmt(f),
Self::Lit(literal) => literal.fmt(f),
Self::MetId(name) => name.fmt(f),
Self::Name(name) => name.fmt(f),
Self::Rest(Some(rest)) => write!(f, "..{rest}"),
Self::Rest(None) => write!(f, ".."),
Self::Tuple(pats) => f.delimit("(", ")").list(pats, ", "),
Self::Slice(pats) => f.delimit("[", "]").list(pats, ", "),
}
}
}
impl<A: Annotation> TryFrom<Expr<A>> for Pat {
type Error = Expr<A>;
fn try_from(value: Expr<A>) -> Result<Self, Self::Error> {
Ok(match value {
Expr::Id(name) if name == "_" => Self::Ignore,
Expr::Id(name) => Self::Name(name),
Expr::MetId(name) => Self::MetId(name),
Expr::Lit(literal) => Self::Lit(literal),
Expr::Op(Op::RangeEx, exprs) if exprs.is_empty() => Self::Rest(None),
Expr::Op(Op::RangeEx, mut exprs) if exprs.len() == 1 => {
Self::Rest(Some(Box::new(Self::try_from(exprs.remove(0))?)))
}
Expr::Op(Op::Tuple, exprs) => Self::Tuple(
exprs
.into_iter()
.map(Self::try_from)
.collect::<Result<_, _>>()?,
),
Expr::Op(Op::Array, exprs) => Self::Slice(
exprs
.into_iter()
.map(Self::try_from)
.collect::<Result<_, _>>()?,
),
other => Err(other)?,
})
}
}
impl<A: Annotation> TryFrom<Anno<Expr<A>, A>> for Pat {
type Error = Expr<A>;
fn try_from(value: Anno<Expr<A>, A>) -> Result<Self, Self::Error> {
Self::try_from(value.0)
}
}

260
src/ast/matcher.rs Normal file
View File

@@ -0,0 +1,260 @@
//! Implements pattern matching
use super::*;
use std::collections::HashMap;
/// Stores a substitution from meta-identifiers to values
#[derive(Clone, Debug)]
pub struct Subst<A: Annotation> {
pub exp: HashMap<String, Expr<A>>,
pub pat: HashMap<String, Pat>,
}
impl<A: Annotation> Default for Subst<A> {
fn default() -> Self {
Self { exp: Default::default(), pat: Default::default() }
}
}
pub trait Match<A: Annotation> {
/// Applies a substitution rule from `pat` to `template` on `self`
fn apply_rule(&mut self, pat: &Self, template: &Self) -> bool
where Self: Sized + Clone {
let Some(sub) = self.construct(pat) else {
return false;
};
*self = template.clone();
self.apply(&sub);
true
}
/// With self as the pattern, recursively applies the Subst
fn apply(&mut self, sub: &Subst<A>);
/// Implements recursive Subst-building for Self
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool;
/// Constructs a Subst
fn construct(&self, pat: &Self) -> Option<Subst<A>> {
let mut sub = Subst::default();
Match::recurse(&mut sub, pat, self).then_some(sub)
}
/// Matches self against the provided pattern
fn match_with(&self, pat: &Self, sub: &mut Subst<A>) -> bool {
Match::recurse(sub, pat, self)
}
}
impl<M: Match<A> + Annotation, A: Annotation> Match<A> for Anno<M, A> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
Match::recurse(sub, &pat.0, &expr.0)
}
fn apply(&mut self, sub: &Subst<A>) {
self.0.apply(sub);
}
}
impl<A: Annotation> Match<A> for Expr<A> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
match (pat, expr) {
(Expr::MetId(name), _) if name == "_" => true,
(Expr::MetId(name), _) => sub
.exp
.insert(name.clone(), expr.clone())
.filter(|v| v != expr)
.is_none(),
(Expr::Id(pat), Expr::Id(expr)) => pat == expr,
(Expr::Id(_), _) => false,
(Expr::Lit(pat), Expr::Lit(expr)) => pat == expr,
(Expr::Lit(_), _) => false,
(Expr::Let(pat_pat, pat_expr), Expr::Let(expr_pat, expr_expr)) => {
Match::recurse(sub, pat_pat, expr_pat) && Match::recurse(sub, pat_expr, expr_expr)
}
(Expr::Let(..), _) => false,
(Expr::Const(pat_pat, pat_expr), Expr::Const(expr_pat, expr_expr)) => {
Match::recurse(sub, pat_pat, expr_pat) && Match::recurse(sub, pat_expr, expr_expr)
}
(Expr::Const(..), _) => false,
(Expr::Make(pat, pat_arms), Expr::Make(expr, expr_arms)) => {
Match::recurse(sub, pat, expr) && Match::recurse(sub, pat_arms, expr_arms)
}
(Expr::Make(..), _) => false,
(Expr::Match(pat, pat_arms), Expr::Match(expr, expr_arms)) => {
Match::recurse(sub, pat, expr) && Match::recurse(sub, pat_arms, expr_arms)
}
(Expr::Match(..), _) => false,
(Expr::Fn(pat_pats, pat_expr), Expr::Fn(expr_pats, expr_expr)) => {
Match::recurse(sub, pat_pats, expr_pats) && Match::recurse(sub, pat_expr, expr_expr)
}
(Expr::Fn(..), _) => false,
(Expr::Op(pat_op, pat_exprs), Expr::Op(expr_op, expr_exprs)) => {
Match::recurse(sub, pat_op, expr_op) && Match::recurse(sub, pat_exprs, expr_exprs)
}
(Expr::Op(..), _) => false,
}
}
fn apply(&mut self, sub: &Subst<A>) {
match self {
Expr::MetId(id) => {
if let Some(expr) = sub.exp.get(id) {
*self = expr.clone()
}
}
Expr::Id(_) | Expr::Lit(_) => {}
Expr::Let(pat, expr) => {
pat.apply(sub);
expr.apply(sub);
}
Expr::Const(pat, expr) => {
pat.apply(sub);
expr.apply(sub);
}
Expr::Make(expr, make_arms) => {
expr.apply(sub);
make_arms.apply(sub);
}
Expr::Match(expr, match_arms) => {
expr.apply(sub);
match_arms.apply(sub);
}
Expr::Fn(pats, expr) => {
pats.apply(sub);
expr.apply(sub);
}
Expr::Op(op, exprs) => {
op.apply(sub);
exprs.apply(sub);
}
};
}
}
impl<A: Annotation> Match<A> for MakeArm<A> {
// TODO: order-independent matching for MakeArm specifically.
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
pat.0 == expr.0 && Match::recurse(sub, &pat.1, &expr.1)
}
fn apply(&mut self, sub: &Subst<A>) {
let Self(_, expr) = self;
expr.apply(sub);
}
}
impl<A: Annotation> Match<A> for MatchArm<A> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
Match::recurse(sub, &pat.0, &expr.0) && Match::recurse(sub, &pat.1, &expr.1)
}
fn apply(&mut self, sub: &Subst<A>) {
let Self(pats, expr) = self;
pats.apply(sub);
expr.apply(sub);
}
}
impl<A: Annotation> Match<A> for Pat {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
match (pat, expr) {
(Pat::MetId(name), _) if name == "_" => true,
(Pat::MetId(name), _) => sub
.pat
.insert(name.clone(), expr.clone())
.filter(|v| v != expr)
.is_none(),
(Pat::Ignore, Pat::Ignore) => true,
(Pat::Ignore, _) => false,
(Pat::Name(pat), Pat::Name(expr)) => pat == expr,
(Pat::Name(_), _) => false,
(Pat::Rest(pat), Pat::Rest(expr)) => Match::recurse(sub, pat, expr),
(Pat::Rest(_), _) => false,
(Pat::Lit(pat), Pat::Lit(expr)) => pat == expr,
(Pat::Lit(_), _) => false,
(Pat::Tuple(pat), Pat::Tuple(expr)) => Match::recurse(sub, pat, expr),
(Pat::Tuple(_), _) => false,
(Pat::Slice(pat), Pat::Slice(expr)) => Match::recurse(sub, pat, expr),
(Pat::Slice(_), _) => false,
}
}
fn apply(&mut self, sub: &Subst<A>) {
match self {
Pat::Ignore | Pat::Name(_) | Pat::Lit(_) => {}
Pat::MetId(id) => {
if let Some(expr) = sub.pat.get(id) {
*self = expr.clone()
}
}
Pat::Rest(pat) => pat.apply(sub),
Pat::Tuple(pats) => pats.apply(sub),
Pat::Slice(pats) => pats.apply(sub),
}
}
}
impl<A: Annotation> Match<A> for Op {
fn recurse(_: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
pat == expr
}
fn apply(&mut self, _sub: &Subst<A>) {}
}
impl<A: Annotation, T: Match<A>> Match<A> for [T] {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
if pat.len() != expr.len() {
return false;
}
for (pat, expr) in pat.iter().zip(expr.iter()) {
if !Match::recurse(sub, pat, expr) {
return false;
}
}
true
}
fn apply(&mut self, sub: &Subst<A>) {
for item in self {
item.apply(sub);
}
}
}
impl<A: Annotation, T: Match<A>> Match<A> for Box<T> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
Match::recurse(sub, pat.as_ref(), expr.as_ref())
}
fn apply(&mut self, sub: &Subst<A>) {
self.as_mut().apply(sub);
}
}
impl<A: Annotation, T: Match<A>> Match<A> for Vec<T> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
Match::recurse(sub, pat.as_slice(), expr.as_slice())
}
fn apply(&mut self, sub: &Subst<A>) {
self.as_mut_slice().apply(sub);
}
}
impl<A: Annotation, T: Match<A>> Match<A> for Option<T> {
fn recurse(sub: &mut Subst<A>, pat: &Self, expr: &Self) -> bool {
match (pat, expr) {
(Some(pat), Some(expr)) => Match::recurse(sub, pat, expr),
(None, None) => true,
_ => false,
}
}
fn apply(&mut self, sub: &Subst<A>) {
self.as_mut_slice().apply(sub);
}
}

139
src/fmt.rs Normal file
View File

@@ -0,0 +1,139 @@
//! The Conlang format extensions
use std::fmt::{Display, Write};
impl<W: Write + ?Sized> FmtAdapter for W {}
pub trait FmtAdapter: Write {
/// Indents by one level.
fn indent(&mut self) -> Indent<'_, Self> {
Indent::new(self, " ")
}
/// Pastes `indent` after each newline.
fn indent_with(&mut self, indent: &'static str) -> Indent<'_, Self> {
Indent::new(self, indent)
}
/// Delimits a section with `open` and `close`.
fn delimit<O: Display, E: Display>(&mut self, open: O, close: E) -> Delimit<'_, Self, E> {
Delimit::new(self, open, close)
}
/// Delimits a section with `open` and `close`, raising the indent level within.
fn delimit_indented<O: Display, E: Display>(
&mut self,
open: O,
close: E,
) -> DelimitIndent<'_, Self, E> {
DelimitIndent::new(self, open, close)
}
/// Formats bracketed lists of the kind (Item (Comma Item)*)?
#[inline]
fn list<Item: Display, Sep: Display>(&mut self, items: &[Item], sep: Sep) -> std::fmt::Result {
self.list_end(items, sep, "")
}
fn list_end<Item: Display, Sep: Display, End: Display>(
&mut self,
items: &[Item],
sep: Sep,
end: End,
) -> std::fmt::Result {
let mut pats = items;
while let [pat, rest @ ..] = pats {
write!(self, "{pat}")?;
if !rest.is_empty() {
write!(self, "{sep}")?;
}
pats = rest
}
write!(self, "{end}")
}
}
/// Pads text with leading indentation after every newline
pub struct Indent<'f, F: Write + ?Sized> {
indent: &'static str,
needs_indent: bool,
f: &'f mut F,
}
impl<'f, F: Write + ?Sized> Indent<'f, F> {
pub fn new(f: &'f mut F, indent: &'static str) -> Self {
Indent { f, needs_indent: false, indent }
}
}
impl<F: Write + ?Sized> Write for Indent<'_, F> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
for s in s.split_inclusive('\n') {
if self.needs_indent {
self.f.write_str(self.indent)?;
}
self.f.write_str(s)?;
self.needs_indent = s.ends_with('\n');
}
Ok(())
}
fn write_char(&mut self, c: char) -> std::fmt::Result {
if self.needs_indent {
self.f.write_str(" ")?;
}
self.needs_indent = c == '\n';
self.f.write_char(c)
}
}
/// Prints delimiters around anything formatted with this. Implies [Indent]
pub struct Delimit<'f, F: Write + ?Sized, E: Display = &'static str> {
f: &'f mut F,
close: E,
}
impl<'f, F: Write + ?Sized, E: Display> Delimit<'f, F, E> {
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
let _ = write!(f, "{open}");
Self { f, close }
}
}
impl<F: Write + ?Sized, E: Display> Drop for Delimit<'_, F, E> {
fn drop(&mut self) {
let Self { f, close, .. } = self;
let _ = write!(f, "{close}");
}
}
impl<F: Write + ?Sized, E: Display> Write for Delimit<'_, F, E> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
self.f.write_str(s)
}
}
/// Prints delimiters around anything formatted with this. Implies [Indent]
pub struct DelimitIndent<'f, F: Write + ?Sized, E: Display = &'static str> {
f: Indent<'f, F>,
close: E,
}
impl<'f, F: Write + ?Sized, E: Display> DelimitIndent<'f, F, E> {
pub fn new<O: Display>(f: &'f mut F, open: O, close: E) -> Self {
let mut f = f.indent();
let _ = write!(f, "{open}");
Self { f, close }
}
}
impl<F: Write + ?Sized, E: Display> Drop for DelimitIndent<'_, F, E> {
fn drop(&mut self) {
let Self { f: Indent { f, .. }, close, .. } = self;
let _ = write!(f, "{}", close);
}
}
impl<F: Write + ?Sized, E: Display> Write for DelimitIndent<'_, F, E> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
self.f.write_str(s)
}
}

323
src/lexer.rs Normal file
View File

@@ -0,0 +1,323 @@
//! A lobster
use std::ops::Range;
#[allow(dead_code)]
use std::{iter::Peekable, str::CharIndices};
use unicode_ident::{is_xid_continue, is_xid_start};
use crate::{span::Span, token::*};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LexError {
pub pos: u32,
pub res: &'static str,
}
impl std::error::Error for LexError {}
impl std::fmt::Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { pos, res } = self;
write!(f, "{pos}: {res}")
}
}
#[derive(Clone, Debug)]
pub struct Lexer<'t> {
/// The source text
text: &'t str,
/// A peekable iterator over the source text
iter: Peekable<CharIndices<'t>>,
/// The start of the current token
head: u32,
/// The end of the current token
tail: u32,
}
impl<'t> Lexer<'t> {
/// Constructs a new Lexer from some text
pub fn new(text: &'t str) -> Self {
let iter = text.char_indices().peekable();
Self { text, iter, head: 0, tail: 0 }
}
/// Peeks the next character without advancing the lexer
pub fn peek(&mut self) -> Option<char> {
self.iter.peek().map(|&(_, c)| c)
}
fn advance_tail(&mut self) {
match self.iter.peek() {
Some(&(idx, _)) => self.tail = idx as u32,
None => {
self.tail = self.text.len() as _;
}
}
}
/// Takes the last character
pub fn take(&mut self) -> Option<char> {
let (_, c) = self.iter.next()?;
self.advance_tail();
Some(c)
}
pub fn next_if(&mut self, expected: char) -> Option<char> {
let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?;
self.advance_tail();
Some(c)
}
/// Consumes the last-peeked character, advancing the tail
pub fn consume(&mut self) -> &mut Self {
self.iter.next();
self.advance_tail();
self
}
/// Produces a LexError at the start of the current token
pub fn error(&self, res: &'static str) -> LexError {
LexError { pos: self.head, res }
}
/// Produces a Token
pub fn produce(&mut self, kind: TKind) -> Token {
self.advance_tail();
let span = Span(self.head, self.tail);
self.head = self.tail;
Token { lexeme: self.text[Range::from(span)].to_owned(), kind, span }
}
pub fn produce_with_lexeme(&mut self, kind: TKind, lexeme: String) -> Token {
self.advance_tail();
let span = Span(self.head, self.tail);
self.head = self.tail;
Token { lexeme, kind, span }
}
/// Consumes 0 or more whitespace
pub fn skip_whitespace(&mut self) -> &mut Self {
while self.peek().is_some_and(char::is_whitespace) {
let _ = self.consume();
}
self
}
pub fn start_token(&mut self) -> &mut Self {
self.head = self.tail;
self
}
/// Scans forward until it finds the next Token in the input
pub fn scan(&mut self) -> Result<Token, LexError> {
use TKind::*;
// !"#%&'()*+,-./:;<=>?@[\\]^`{|}~
let tok = match self
.skip_whitespace()
.start_token()
.peek()
.ok_or_else(|| self.error("EOF"))?
{
'!' => Bang,
'"' => return self.string(),
'#' => Hash,
'%' => Rem,
'&' => Amp,
'\'' => return self.character(),
'(' => LParen,
')' => RParen,
'*' => Star,
'+' => Plus,
',' => Comma,
'-' => Minus,
'.' => Dot,
'/' => Slash,
'0' => Integer,
'1'..='9' => return self.digits::<10>(),
':' => Colon,
';' => Semi,
'<' => Lt,
'=' => Eq,
'>' => Gt,
'?' => Question,
'@' => At,
'[' => LBrack,
'\\' => Backslash,
']' => RBrack,
'^' => Xor,
'`' => Grave,
'{' => LCurly,
'|' => Bar,
'}' => RCurly,
'~' => Tilde,
'_' => return self.identifier(),
c if is_xid_start(c) => return self.identifier(),
_ => Err(self.error("Invalid"))?,
};
// Handle digraphs
let tok = match (tok, self.consume().peek()) {
(Integer, Some('b')) => return self.consume().digits::<2>(),
(Integer, Some('d')) => return self.consume().digits::<10>(),
(Integer, Some('o')) => return self.consume().digits::<8>(),
(Integer, Some('x')) => return self.consume().digits::<16>(),
(Integer, Some('z')) => return self.consume().digits::<36>(),
(Integer, _) => return self.digits::<10>(),
(Amp, Some('&')) => AmpAmp,
(Amp, Some('=')) => AmpEq,
(Bang, Some('!')) => BangBang,
(Bang, Some('=')) => BangEq,
(Bar, Some('|')) => BarBar,
(Bar, Some('=')) => BarEq,
(Colon, Some(':')) => ColonColon,
(Dot, Some('.')) => DotDot,
(Eq, Some('=')) => EqEq,
(Eq, Some('>')) => FatArrow,
(Gt, Some('=')) => GtEq,
(Gt, Some('>')) => GtGt,
(Hash, Some('!')) => HashBang,
(Lt, Some('=')) => LtEq,
(Lt, Some('<')) => LtLt,
(Minus, Some('=')) => MinusEq,
(Minus, Some('>')) => Arrow,
(Plus, Some('=')) => PlusEq,
(Rem, Some('=')) => RemEq,
(Slash, Some('*')) => return Ok(self.block_comment()?.produce(Comment)),
(Slash, Some('=')) => SlashEq,
(Slash, Some('/')) => return self.line_comment(),
(Star, Some('=')) => StarEq,
(Xor, Some('=')) => XorEq,
(Xor, Some('^')) => XorXor,
_ => return Ok(self.produce(tok)),
};
// Handle trigraphs
let tok = match (tok, self.consume().peek()) {
(HashBang, Some('/')) => return self.line_comment(),
(DotDot, Some('=')) => DotDotEq,
(GtGt, Some('=')) => GtGtEq,
(LtLt, Some('=')) => LtLtEq,
_ => return Ok(self.produce(tok)),
};
Ok(self.consume().produce(tok))
}
pub fn line_comment(&mut self) -> Result<Token, LexError> {
while self.consume().peek().is_some_and(|c| c != '\n') {}
Ok(self.produce(TKind::Comment))
}
pub fn block_comment(&mut self) -> Result<&mut Self, LexError> {
self.consume();
while let Some(c) = self.take() {
match (c, self.peek()) {
('/', Some('*')) => self.block_comment()?,
('*', Some('/')) => return Ok(self.consume()),
_ => continue,
};
}
Err(self.error("Unterminated block comment"))
}
pub fn identifier(&mut self) -> Result<Token, LexError> {
while self.consume().peek().is_some_and(is_xid_continue) {}
let token = self.produce(TKind::Identifier);
Ok(Token {
kind: match token.lexeme.as_str() {
"break" => TKind::Break,
"const" => TKind::Const,
"do" => TKind::Do,
"else" => TKind::Else,
"false" => TKind::False,
"fn" => TKind::Fn,
"if" => TKind::If,
"let" => TKind::Let,
"loop" => TKind::Loop,
"macro" => TKind::Macro,
"match" => TKind::Match,
"return" => TKind::Return,
"then" => TKind::Do,
"true" => TKind::True,
"while" => TKind::While,
_ => token.kind,
},
..token
})
}
pub fn character(&mut self) -> Result<Token, LexError> {
let c = match self.consume().take() {
Some('\\') => self.escape()?,
Some(c) => c,
None => '\0',
};
if self.take().is_some_and(|c| c == '\'') {
Ok(self.produce_with_lexeme(TKind::Character, c.into()))
} else {
Err(self.error("Unterminated character"))
}
}
pub fn string(&mut self) -> Result<Token, LexError> {
let mut lexeme = String::new();
self.consume();
loop {
lexeme.push(match self.take() {
None => Err(self.error("Unterminated string"))?,
Some('\\') => self.escape()?,
Some('"') => break,
Some(c) => c,
})
}
lexeme.shrink_to_fit();
Ok(self.produce_with_lexeme(TKind::String, lexeme))
}
pub fn escape(&mut self) -> Result<char, LexError> {
Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
' ' => '\u{a0}',
'0' => '\0',
'a' => '\x07',
'b' => '\x08',
'e' => '\x1b',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => self.unicode_escape()?,
'x' => self.hex_escape()?,
c => c,
})
}
pub fn hex_escape(&mut self) -> Result<char, LexError> {
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
char::from_u32(out).ok_or(self.error("Invalid digit"))
}
pub fn unicode_escape(&mut self) -> Result<char, LexError> {
self.next_if('{')
.ok_or_else(|| self.error("No unicode escape opener"))?;
let mut out = 0;
while let Some(c) = self.take() {
if c == '}' {
return char::from_u32(out).ok_or_else(|| self.error("Bad unicode value"));
}
out = out * 16 + c.to_digit(16).ok_or_else(|| self.error("Invalid digit"))?;
}
Err(self.error("Unterminated unicode escape"))
}
pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
while self.peek().is_some_and(|c| c.is_digit(BASE)) {
self.consume();
}
Ok(self.produce(TKind::Integer))
}
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
Ok(digit)
} else {
Err(self.error("Invalid digit"))
}
}
}

78
src/lib.rs Normal file
View File

@@ -0,0 +1,78 @@
//! The Dough Programming Language
//!
//! A simpler programming language
pub mod fmt;
pub mod span;
pub mod token;
pub mod lexer;
pub mod ast;
pub mod parser;
pub mod typed_ast {
//! The Typed AST defines an interface between the type checker and code generator
use crate::span::Span;
use std::collections::HashMap;
pub struct Table {
/// Fully qualified names, for debugging
pub names: Vec<String>,
/// The unprojected relative stack offset
pub local: HashMap<usize, isize>,
}
/// DefID annotation
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Defn {
pub span: Span,
/// The index of this name in the associated Table
pub defid: usize,
}
}
pub mod typeck {}
pub mod ir {
//! The IR defines an interface between the code generator and interpreter(?)
}
pub mod interpreter {
//! The Doughlang interpreter interprets an AST
use std::sync::{Arc, Mutex};
#[derive(Clone, Debug)]
pub enum Value {
Bool(bool),
ISize(isize),
Adt(Arc<Mutex<Adt>>),
}
impl Value {
pub fn cast(self, as_type: &str) -> Self {
match (self, as_type) {
(Self::ISize(v), "isize") => Self::ISize(v), // redundant cast
(v, _) => v, // invalid cast!
}
}
}
pub enum Adt {
Array(Vec<Value>),
Tuple(Vec<Value>),
}
impl std::fmt::Debug for Adt {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Array(elem) => f.debug_list().entries(elem).finish(),
Self::Tuple(elem) => f.debug_list().entries(elem).finish(),
}
}
}
}

121
src/main.rs Normal file
View File

@@ -0,0 +1,121 @@
//! Tests the lexer
#[allow(unused_imports)]
use doughlang::{
ast::{
Expr,
matcher::{Match, Subst},
},
lexer::{LexError, Lexer},
parser::{ParseError, Parser},
span::Span,
token::{TKind, Token},
};
use repline::prebaked::*;
use std::{
error::Error,
io::{IsTerminal, stdin},
};
fn main() -> Result<(), Box<dyn Error>> {
if stdin().is_terminal() {
read_and("\x1b[32m", " >", "?>", |line| match line.trim_end() {
"" => Ok(Response::Continue),
"exit" => Ok(Response::Break),
"clear" => {
print!("\x1b[H\x1b[2J");
Ok(Response::Deny)
}
"pat" => {
if let Err(e) = subst() {
println!("\x1b[31m{e}\x1b[0m");
}
Ok(Response::Deny)
}
_ => {
parse(line);
Ok(Response::Accept)
}
})?;
} else {
let doc = std::io::read_to_string(stdin())?;
lex(&doc);
parse(&doc);
}
Ok(())
}
fn lex(document: &str) {
let mut lexer = Lexer::new(document);
loop {
match lexer.scan() {
Ok(Token { lexeme, kind, span: Span { head, tail } }) => {
println!(
"{kind:?}\x1b[11G {head:<4} {tail:<4} {}",
lexeme.escape_debug()
)
}
Err(e) => {
eprintln!("{e}");
break;
}
}
}
}
fn subst() -> Result<(), Box<dyn Error>> {
let mut rl = repline::Repline::new("\x1b[35mexp", " >", "?>");
let exp = rl.read()?;
let mut exp: Expr = Parser::new(Lexer::new(&exp)).parse(0)?;
println!("\x1b[G\x1b[J{exp}");
rl.accept();
loop {
rl.set_color("\x1b[36mpat");
let pat = rl.read()?;
rl.accept();
print!("\x1b[G\x1b[J");
let mut p = Parser::new(Lexer::new(&pat));
let Ok(pat) = p.parse::<Expr>(0) else {
println!("{exp}");
continue;
};
if p.next_if(TKind::Colon).is_err() {
let Some(Subst { exp, pat }) = exp.construct(&pat) else {
continue;
};
for (name, pat) in pat.iter() {
println!("{name}: {pat}")
}
for (name, expr) in exp.iter() {
println!("{name}: {expr}")
}
continue;
}
let sub: Expr = p.parse(0)?;
if exp.apply_rule(&pat, &sub) {
println!("{exp}");
} else {
println!("No match: {pat} in {exp}\n")
}
}
}
fn parse(document: &str) {
let mut parser = Parser::new(Lexer::new(document));
loop {
match parser.parse::<Expr>(0) {
// Err(ParseError::FromLexer(LexError { res: "EOF", .. })) => break,
Err(e) => {
println!("\x1b[31m{e}\x1b[0m");
break;
}
Ok(v) => {
println!("{v}");
}
}
}
}

516
src/parser.rs Normal file
View File

@@ -0,0 +1,516 @@
//! The parser takes a stream of [Token]s from the [Lexer], and turns them into [crate::ast] nodes.
use crate::{
ast::*,
lexer::{LexError, Lexer},
span::Span,
token::{TKind, Token},
};
use std::{error::Error, fmt::Display, vec};
pub mod numeric;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParseError {
FromLexer(LexError),
Expected(TKind, Span),
NotPattern(TKind, Span),
NotPrefix(TKind, Span),
NotInfix(TKind, Span),
NotPostfix(TKind, Span),
}
impl Error for ParseError {}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::FromLexer(e) => e.fmt(f),
Self::Expected(tk, loc) => write!(f, "{loc}: Expected {tk:?}."),
Self::NotPattern(tk, loc) => write!(f, "{loc}: {tk:?} is not valid in a pattern."),
Self::NotPrefix(tk, loc) => write!(f, "{loc}: {tk:?} is not a prefix operator."),
Self::NotInfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a infix operator."),
Self::NotPostfix(tk, loc) => write!(f, "{loc}: {tk:?} is not a postfix operator."),
}
}
}
pub type PResult<T> = Result<T, ParseError>;
#[derive(Debug)]
pub struct Parser<'t> {
pub lexer: Lexer<'t>,
pub next_tok: Option<Token>,
pub last_loc: Span,
}
impl<'t> Parser<'t> {
/// Constructs a new Parser
pub fn new(lexer: Lexer<'t>) -> Self {
Self { lexer, next_tok: None, last_loc: Span::default() }
}
/// The identity function. This exists to make production chaining easier.
pub fn then<T>(&self, t: T) -> T {
t
}
pub fn span(&self) -> Span {
self.last_loc
}
/// Parses a value that implements the [Parse] trait.
pub fn parse<T: Parse<'t>>(&mut self, level: usize) -> PResult<T> {
Parse::parse(self, level)
}
/// Peeks the next [Token]. Returns [ParseError::FromLexer] on lexer error.
pub fn peek(&mut self) -> PResult<&Token> {
let next_tok = match self.next_tok.take() {
Some(tok) => tok,
None => match self.lexer.scan() {
Ok(tok) => tok,
Err(e) => Err(ParseError::FromLexer(e))?,
},
};
self.last_loc = next_tok.span;
self.next_tok = Some(next_tok);
Ok(self.next_tok.as_ref().expect("should have token"))
}
/// Peeks the next token if it matches the `expected` [TKind]
pub fn peek_if(&mut self, expected: TKind) -> Option<&Token> {
self.peek().into_iter().find(|tok| tok.kind == expected)
}
/// Consumes and returns the currently-peeked [Token].
pub fn take(&mut self) -> Option<Token> {
self.next_tok.take()
}
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
pub fn take_lexeme(&mut self) -> Option<String> {
self.take().map(|tok| tok.lexeme)
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> PResult<Token> {
self.peek()?;
Ok(self.take().expect("should have token here"))
}
/// Consumes and returns the next [Token] if it matches the `expected` [TKind]
pub fn next_if(&mut self, expected: TKind) -> PResult<Token> {
let token = self.peek()?;
if token.kind == expected {
Ok(self.take().expect("should have token here"))
} else {
Err(ParseError::Expected(expected, token.span))
}
}
/// Parses a list of P separated by `sep` tokens, ending in an `end` token.
/// ```nobnf
/// List<T> = (T `sep`)* T? `end` ;
/// ```
pub fn list<P: Parse<'t>>(
&mut self,
mut elems: Vec<P>,
sep: TKind,
end: TKind,
) -> PResult<Vec<P>> {
while self.peek_if(end).is_none() {
elems.push(self.parse(0)?);
if self.next_if(sep).is_err() {
break;
}
}
self.next_if(end)?;
Ok(elems)
}
/// Parses into an [`Option<P>`] if the next token is `next`
pub fn opt_if<P: Parse<'t>>(&mut self, level: usize, next: TKind) -> PResult<Option<P>> {
Ok(match self.next_if(next) {
Ok(_) => Some(self.parse(level)?),
Err(_) => None,
})
}
/// Parses an expression into a vec unless the next token is `end`
pub fn opt<P: Parse<'t>>(&mut self, level: usize, end: TKind) -> PResult<Option<P>> {
let out = match self.peek_if(end) {
None => Some(self.parse(level)?),
Some(_) => None,
};
self.next_if(end)?;
Ok(out)
}
/// Consumes the currently peeked token without returning it.
pub fn consume(&mut self) -> &mut Self {
self.next_tok = None;
self
}
}
pub trait Parse<'t> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
where Self: Sized;
}
impl<'t> Parse<'t> for Literal {
fn parse(p: &mut Parser<'t>, _level: usize) -> PResult<Self> {
let tok = p.peek()?;
Ok(match tok.kind {
TKind::True => p.consume().then(Literal::Bool(true)),
TKind::False => p.consume().then(Literal::Bool(false)),
TKind::Character => {
Literal::Char(p.take_lexeme().expect("should have Token").remove(0))
}
TKind::Integer => {
let Token { lexeme, kind: _, span } = p.take().expect("should have Token");
// TODO: more complex int parsing
let int = lexeme
.parse()
.map_err(|_| ParseError::Expected(TKind::Integer, span))?;
Literal::Int(int)
}
TKind::String => Literal::Str(p.take_lexeme().expect("should have Token")),
_ => Err(ParseError::Expected(TKind::Integer, tok.span))?,
})
}
}
impl<'t> Parse<'t> for Pat {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
let tok = p.peek()?;
match tok.kind {
TKind::Comment => p.consume().parse(level),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
Ok(Pat::Lit(p.parse(0)?))
}
TKind::Identifier => match tok.lexeme.as_str() {
"_" => Ok(p.consume().then(Pat::Ignore)),
_ => Ok(Pat::Name(p.take_lexeme().expect("should have Token"))),
},
TKind::Grave => Ok(Pat::MetId(p.consume().next_if(TKind::Identifier)?.lexeme)),
TKind::DotDot => Ok(Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
Some(_) => Some(p.parse(level)?),
None => None,
})),
TKind::LParen => Ok(Pat::Tuple(p.consume().list(
vec![],
TKind::Comma,
TKind::RParen,
)?)),
TKind::LBrack => Ok(Pat::Slice(p.consume().list(
vec![],
TKind::Comma,
TKind::RBrack,
)?)),
_ => Err(ParseError::NotPattern(tok.kind, tok.span)),
}
}
}
impl<'t> Parse<'t> for MatchArm {
fn parse(p: &mut Parser<'t>, _level: usize) -> PResult<Self> {
p.next_if(TKind::Bar).ok();
Ok(MatchArm(
p.list(vec![], TKind::Bar, TKind::FatArrow)?,
p.parse(0)?,
))
}
}
impl<'t> Parse<'t> for MakeArm {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
Ok(MakeArm(p.next_if(TKind::Identifier)?.lexeme, {
p.next_if(TKind::Colon)
.ok()
.map(|_| p.parse(level))
.transpose()?
}))
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum Prec {
Min,
Do,
Assign,
Tuple,
Make,
Body,
Logical,
LogOr,
LogAnd,
Compare,
Range,
Binary,
Shift,
Factor,
Term,
Project,
Unary,
Extend,
Max,
}
impl Prec {
pub const MIN: usize = Prec::Min.value();
pub const fn value(self) -> usize {
self as usize * 2
}
pub const fn prev(self) -> usize {
match self {
Self::Assign => self.value() + 1,
_ => self.value(),
}
}
pub const fn next(self) -> usize {
match self {
Self::Assign => self.value(),
_ => self.value() + 1,
}
}
}
fn from_prefix(token: &Token) -> PResult<(Op, Prec)> {
Ok(match token.kind {
TKind::Do => (Op::Do, Prec::Do),
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
(Op::Lit, Prec::Max)
}
TKind::Identifier => (Op::Id, Prec::Max),
TKind::Grave => (Op::Mid, Prec::Max),
TKind::Fn => (Op::Fn, Prec::Body),
TKind::Match => (Op::Match, Prec::Body),
TKind::Macro => (Op::Macro, Prec::Assign),
TKind::Let => (Op::Let, Prec::Body),
TKind::Const => (Op::Const, Prec::Body),
TKind::Loop => (Op::Loop, Prec::Body),
TKind::If => (Op::If, Prec::Body),
TKind::While => (Op::While, Prec::Body),
TKind::Break => (Op::Break, Prec::Body),
TKind::Return => (Op::Return, Prec::Body),
TKind::LBrack => (Op::Array, Prec::Min),
TKind::RBrack => (Op::End, Prec::Min),
TKind::LCurly => (Op::Block, Prec::Min),
TKind::RCurly => (Op::End, Prec::Min),
TKind::LParen => (Op::Group, Prec::Min),
TKind::RParen => (Op::End, Prec::Min),
TKind::Amp => (Op::Refer, Prec::Max),
// TKind::AmpAmp => todo!("addraddr"),
TKind::Bang => (Op::Not, Prec::Unary),
TKind::BangBang => (Op::Identity, Prec::Unary),
TKind::Bar => (Op::Lambda, Prec::Min),
TKind::BarBar => (Op::Lambda, Prec::Max),
TKind::DotDot => (Op::RangeEx, Prec::Range),
TKind::DotDotEq => (Op::RangeIn, Prec::Range),
TKind::Minus => (Op::Neg, Prec::Unary),
TKind::Plus => (Op::Identity, Prec::Unary),
TKind::Star => (Op::Deref, Prec::Unary),
kind => Err(ParseError::NotPrefix(kind, token.span))?,
})
}
fn from_infix(token: &Token) -> PResult<(Op, Prec)> {
Ok(match token.kind {
TKind::Semi => (Op::Do, Prec::Do), // the inspiration
TKind::RParen => (Op::End, Prec::Do),
TKind::Comma => (Op::Tuple, Prec::Tuple),
TKind::Eq => (Op::Set, Prec::Assign),
TKind::XorXor => (Op::LogXor, Prec::Logical),
TKind::AmpAmp => (Op::LogAnd, Prec::LogAnd),
TKind::BarBar => (Op::LogOr, Prec::LogOr),
TKind::Lt => (Op::Lt, Prec::Compare),
TKind::LtEq => (Op::Leq, Prec::Compare),
TKind::EqEq => (Op::Eq, Prec::Compare),
TKind::BangEq => (Op::Neq, Prec::Compare),
TKind::GtEq => (Op::Geq, Prec::Compare),
TKind::Gt => (Op::Gt, Prec::Compare),
TKind::DotDot => (Op::RangeEx, Prec::Range),
TKind::DotDotEq => (Op::RangeIn, Prec::Range),
TKind::Amp => (Op::And, Prec::Binary),
TKind::Xor => (Op::Xor, Prec::Binary),
TKind::Bar => (Op::Or, Prec::Binary),
TKind::LtLt => (Op::Shl, Prec::Shift),
TKind::GtGt => (Op::Shr, Prec::Shift),
TKind::Plus => (Op::Add, Prec::Factor),
TKind::Minus => (Op::Sub, Prec::Factor),
TKind::Star => (Op::Mul, Prec::Term),
TKind::Slash => (Op::Div, Prec::Term),
TKind::Rem => (Op::Rem, Prec::Term),
TKind::Dot => (Op::Dot, Prec::Project),
TKind::ColonColon => (Op::Path, Prec::Max),
kind => Err(ParseError::NotInfix(kind, token.span))?,
})
}
fn from_postfix(token: &Token) -> PResult<(Op, Prec)> {
Ok(match token.kind {
TKind::Question => (Op::Try, Prec::Unary),
TKind::LParen => (Op::Call, Prec::Extend),
TKind::LBrack => (Op::Index, Prec::Extend),
TKind::LCurly => (Op::Make, Prec::Make),
kind => Err(ParseError::NotPostfix(kind, token.span))?,
})
}
#[rustfmt::skip]
fn should_coagulate(prev: Op, op: Op) -> bool {
prev == op && (match prev {
Op::Do => true,
Op::Tuple => true,
Op::Dot => false,
Op::Path => true,
Op::Lt => false,
Op::Leq => false,
Op::Eq => false,
Op::Neq => false,
Op::Geq => false,
Op::Gt => false,
_ => false,
})
}
impl<'t> Parse<'t> for Expr {
/// Parses an [Expr]ession.
///
/// The `level` parameter indicates the operator binding level of the expression.
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self> {
const MIN: usize = Prec::MIN;
while p.next_if(TKind::Comment).is_ok() {}
// Prefix
let tok = p.peek()?;
let ((op, prec), span) = (from_prefix(tok)?, tok.span);
let mut head = match op {
// Empty is returned when a block finisher is an expr prefix.
// It's the only expr that doesn't consume.
Op::End if level == Prec::Do.next() => Expr::Op(Op::Tuple, vec![]),
Op::End => Err(ParseError::NotPrefix(tok.kind, span))?,
Op::Id => Expr::Id(p.take_lexeme().expect("should have ident")),
Op::Mid => Expr::MetId(p.consume().next_if(TKind::Identifier)?.lexeme),
Op::Lit => Expr::Lit(p.parse(MIN)?),
Op::Let => Expr::Let(p.consume().parse(MIN)?, p.opt_if(prec.next(), TKind::Eq)?),
Op::Const => Expr::Const(p.consume().parse(prec.next())?, {
p.next_if(TKind::Eq)?;
p.parse(prec.next())?
}),
Op::Macro => Expr::Op(
op,
vec![p.consume().parse(prec.next())?, {
p.next_if(TKind::FatArrow)?;
p.parse(prec.next())?
}],
),
Op::Match => Expr::Match(p.consume().parse(Prec::Logical.value())?, {
p.next_if(TKind::LCurly)?;
p.list(vec![], TKind::Comma, TKind::RCurly)?
}),
Op::Block => Expr::Op(
op,
p.consume().opt(MIN, TKind::RCurly)?.into_iter().collect(),
),
Op::Array => Expr::Op(op, p.consume().list(vec![], TKind::Comma, TKind::RBrack)?),
Op::Group => match p.consume().opt(MIN, TKind::RParen)? {
Some(value) => Expr::Op(Op::Group, vec![value]),
None => Expr::Op(Op::Tuple, vec![]),
},
Op::If | Op::While => {
p.consume();
let exprs = vec![
// conditional restricted to Logical operators or above
p.parse(Prec::Logical.value())?,
p.parse(prec.next())?,
match p.peek() {
Ok(Token { kind: TKind::Else, .. }) => p.consume().parse(prec.next())?,
_ => Expr::Op(Op::End, vec![]).anno(span.merge(p.span())),
},
];
Expr::Op(op, exprs)
}
Op::Fn => {
p.consume().next_if(TKind::LParen)?;
Expr::Fn(
p.list(vec![], TKind::Comma, TKind::RParen)?,
p.parse(prec.next())?,
)
}
// dirty hack: There are two closure operators, signaled by returned prec.
Op::Lambda if prec == Prec::Min => Expr::Fn(
p.consume().list(vec![], TKind::Comma, TKind::Bar)?,
p.parse(Prec::Body.next())?,
),
Op::Lambda => Expr::Fn(vec![], p.consume().parse(Prec::Body.next())?),
_ => Expr::Op(op, vec![p.consume().parse(prec.next())?]),
};
// Postfix
while let Ok(tok) = p.peek()
&& let Ok((op, prec)) = from_postfix(tok)
&& level <= prec.prev()
&& op != Op::End
{
let span = span.merge(p.span());
p.consume();
head = match op {
Op::Make => Expr::Make(
head.anno(span).into(),
p.consume().list(vec![], TKind::Comma, TKind::RCurly)?,
),
Op::Index => Expr::Op(
op,
p.list(vec![head.anno(span)], TKind::Comma, TKind::RBrack)?,
),
Op::Call => Expr::Op(
op,
p.list(vec![head.anno(span)], TKind::Comma, TKind::RParen)?,
),
_ => Expr::Op(op, vec![head.anno(span)]),
};
}
// Infix
while let Ok(tok) = p.peek()
&& let Ok((op, prec)) = from_infix(tok)
&& level <= prec.prev()
&& op != Op::End
{
let span = span.merge(p.span());
p.consume();
head = match head {
// controls expression chaining vs coagulating
Expr::Op(prev, mut args) if should_coagulate(prev, op) => {
args.push(p.parse(prec.next())?);
Expr::Op(op, args)
}
head => Expr::Op(op, vec![head.anno(span), p.parse(prec.next())?]),
}
}
Ok(head)
}
}
impl<'t, P: Parse<'t> + Annotation> Parse<'t> for Anno<P> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
where Self: Sized {
let start = p.span();
Ok(Anno(p.parse(level)?, start.merge(p.span())))
}
}
impl<'t, P: Parse<'t>> Parse<'t> for Box<P> {
fn parse(p: &mut Parser<'t>, level: usize) -> PResult<Self>
where Self: Sized {
Ok(Box::new(p.parse(level)?))
}
}

1
src/parser/numeric.rs Normal file
View File

@@ -0,0 +1 @@

42
src/span.rs Normal file
View File

@@ -0,0 +1,42 @@
use std::ops::Range;
/// Stores the start and end byte position
#[derive(Clone, Copy, Default, PartialEq, Eq)]
pub struct Span {
pub head: u32,
pub tail: u32,
}
impl std::fmt::Debug for Span {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { head, tail } = self;
write!(f, "[{head}:{tail}]")
}
}
#[allow(non_snake_case)]
/// Stores the start and end byte position
pub fn Span(head: u32, tail: u32) -> Span {
Span { head, tail }
}
impl Span {
/// Updates `self` to include all but the last byte in `other`
pub fn merge(self, other: Span) -> Span {
Span { head: self.head.min(other.head), tail: self.tail.max(other.head) }
}
}
impl From<Span> for Range<usize> {
fn from(value: Span) -> Self {
let Span { head, tail } = value;
(head as usize)..(tail as usize)
}
}
impl std::fmt::Display for Span {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { head, tail } = self;
write!(f, "{head}:{tail}")
}
}

88
src/token.rs Normal file
View File

@@ -0,0 +1,88 @@
//! The Token defines an interface between lexer and parser
use crate::span::Span;
#[derive(Clone, Debug)]
pub struct Token {
pub lexeme: String,
pub kind: TKind,
pub span: Span,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TKind {
Comment,
Break,
Const,
Do,
Else,
False,
Fn,
If,
Let,
Loop,
Macro,
Match,
Return,
True,
While,
Identifier, // or Keyword
Character,
String,
Integer, // 0(x[0-9A-Fa-f]* | d[0-9]* | o[0-7]* | b[0-1]*) | [1-9][0-9]*
LCurly, // {
RCurly, // }
LBrack, // [
RBrack, // ]
LParen, // (
RParen, // )
Amp, // &
AmpAmp, // &&
AmpEq, // &=
Arrow, // ->
At, // @
Backslash, // \
Bang, // !
BangBang, // !!
BangEq, // !=
Bar, // |
BarBar, // ||
BarEq, // |=
Colon, // :
ColonColon, // ::
Comma, // ,
Dot, // .
DotDot, // ..
DotDotEq, // ..=
Eq, // =
EqEq, // ==
FatArrow, // =>
Grave, // `
Gt, // >
GtEq, // >=
GtGt, // >>
GtGtEq, // >>=
Hash, // #
HashBang, // #!
Lt, // <
LtEq, // <=
LtLt, // <<
LtLtEq, // <<=
Minus, // -
MinusEq, // -=
Plus, // +
PlusEq, // +=
Question, // ?
Rem, // %
RemEq, // %=
Semi, // ;
Slash, // /
SlashEq, // /=
Star, // *
StarEq, // *=
Tilde, // ~
Xor, // ^
XorEq, // ^=
XorXor, // ^^
}