commit d1d8c45bdbe6a372b04d82b0b46ae5ec7b9190c9
Author: John <j@soft.fish>
Date:   Sat Apr 6 05:22:21 2024 -0500

    Implement a dead simple pseudo-EBNF to Pest translator, which works on Conlang's EBNF

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..1c90227
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,16 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "grammatical"
+version = "0.1.0"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..90a72de
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "grammatical"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+unicode-ident = "1.0.12"
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..d31c42b
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,252 @@
+use std::{iter::Peekable, str::CharIndices};
+
+use unicode_ident::*;
+
+/// Rule        = ident '=' Either? ';' ;
+#[derive(Debug, Default)]
+pub struct Rule<'a> {
+    pub comment: Option<&'a str>,
+    pub name: &'a str,
+    pub body: RuleKind<'a>,
+}
+
+#[derive(Debug, Default)]
+pub enum RuleKind<'a> {
+    /// Either      = Follow ('|' Follow)* ;
+    Either(Vec<RuleKind<'a>>),
+    /// Follow      = (Any | Many | Maybe | Not)+ ;
+    Follow(Vec<RuleKind<'a>>),
+    /// Any         = Not '*' ;
+    Any(Box<RuleKind<'a>>),
+    /// Many        = Not '+' ;
+    Many(Box<RuleKind<'a>>),
+    /// Maybe       = Not '?' ;
+    Maybe(Box<RuleKind<'a>>),
+    /// Not         = '!'? Prime ;
+    Not(Box<RuleKind<'a>>),
+    /// Group       = '(' Either ')' ;
+    Group(Box<RuleKind<'a>>),
+    /// ident       = XID_START XID_CONTINUE* ;
+    Ident(&'a str),
+    /// str         = '"' (!'"' ANY)* '"' ;
+    Chr(&'a str),
+    /// chr         = "'" (!"'" ANY)* "'" ;
+    Str(&'a str),
+    /// Nothing
+    #[default]
+    Empty,
+}
+
+impl std::fmt::Display for RuleKind<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            RuleKind::Either(r) => r.iter().enumerate().try_for_each(|(i, r)| {
+                if i != 0 {
+                    write!(f, " | ")?;
+                }
+                write!(f, "{r}")
+            }),
+            RuleKind::Follow(r) => r.iter().enumerate().try_for_each(|(i, r)| {
+                if i != 0 {
+                    write!(f, " ~ ")?;
+                }
+                write!(f, "{r}")
+            }),
+            RuleKind::Any(r) => write!(f, "{r}*"),
+            RuleKind::Many(r) => write!(f, "{r}+"),
+            RuleKind::Maybe(r) => write!(f, "{r}?"),
+            RuleKind::Not(r) => write!(f, "!{r}"),
+            RuleKind::Group(r) => write!(f, "({r})"),
+            RuleKind::Ident(r) => write!(f, "{r}"),
+            RuleKind::Chr(r) => write!(f, "{r:?}"),
+            RuleKind::Str(r) => write!(f, "{r:?}"),
+            RuleKind::Empty => write!(f, "(!ANY)?"),
+        }
+    }
+}
+impl std::fmt::Display for Rule<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self {
+            comment: _,
+            name,
+            body,
+        } = self;
+        // if let Some(comment) = comment {
+        //     writeln!(f, "/* {comment} */")?;
+        // }
+        write!(f, "{name} = {{ {body} }}")
+    }
+}
+
+pub struct Parser<'a> {
+    text: &'a str,
+    chars: Peekable<CharIndices<'a>>,
+    head: usize,
+    tail: usize,
+}
+
+impl<'a> Parser<'a> {
+    pub fn new(text: &'a str) -> Self {
+        Self {
+            text,
+            chars: text.char_indices().peekable(),
+            head: 0,
+            tail: 0,
+        }
+    }
+    pub fn start(&mut self) -> &mut Self {
+        self.space();
+        self.head = self.tail;
+        self
+    }
+    pub fn fragment(&self) -> &'a str {
+        let &Self {
+            text, head, tail, ..
+        } = self;
+        &text[head..tail]
+    }
+    pub fn peek(&mut self) -> Option<char> {
+        self.chars.peek().map(|(_, c)| *c)
+    }
+    pub fn take(&mut self) -> Option<(usize, char)> {
+        let out = self.chars.next();
+        self.tail = match out {
+            Some((i, _)) => i + 1,
+            None => self.text.len(),
+        };
+        out
+    }
+    pub fn take_one(&mut self, f: fn(char) -> bool) -> Option<&mut Self> {
+        self.chars.peek().filter(|(_, c)| f(*c)).is_some().then(|| {
+            self.take();
+            self
+        })
+    }
+    pub fn take_many(&mut self, f: fn(char) -> bool) -> &mut Self {
+        while self.take_one(f).is_some() {}
+        self
+    }
+    pub fn space(&mut self) -> &mut Self {
+        self.take_many(char::is_whitespace)
+    }
+}
+
+impl<'a> Parser<'a> {
+    pub fn rule(&mut self) -> Option<Rule<'a>> {
+        let out = Rule {
+            comment: self.comment(),
+            name: self.ident()?,
+            body: {
+                self.space()
+                    .take_one(|c| '=' == c)?
+                    .either()
+                    .unwrap_or_default()
+            },
+        };
+        if self.space().take_one(|c| ';' == c).is_none() {
+            panic!("Rule should end in ';': {}..{}", self.head, self.tail)
+        }
+        Some(out)
+    }
+    pub fn either(&mut self) -> Option<RuleKind<'a>> {
+        let mut out = vec![self.follow()?];
+        while self.space().take_one(|c| '|' == c).is_some() {
+            out.push(self.follow()?)
+        }
+        match out.len() {
+            1 => out.pop(),
+            _ => Some(RuleKind::Either(out)),
+        }
+    }
+    pub fn follow(&mut self) -> Option<RuleKind<'a>> {
+        let mut out = vec![];
+        while let Some(rule) = self.repeat() {
+            out.push(rule)
+        }
+        match out.len() {
+            1 => out.pop(),
+            _ => Some(RuleKind::Follow(out)),
+        }
+    }
+    pub fn repeat(&mut self) -> Option<RuleKind<'a>> {
+        let out = self.not()?;
+        let out = match self.space().peek() {
+            Some('*') => RuleKind::Any(out.into()),
+            Some('+') => RuleKind::Many(out.into()),
+            Some('?') => RuleKind::Maybe(out.into()),
+            _ => return Some(out),
+        };
+        self.take();
+        Some(out)
+    }
+    pub fn not(&mut self) -> Option<RuleKind<'a>> {
+        match self.space().take_one(|c| '!' == c) {
+            Some(_) => Some(RuleKind::Not(self.prime()?.into())),
+            _ => self.prime(),
+        }
+    }
+    pub fn prime(&mut self) -> Option<RuleKind<'a>> {
+        Some(match self.space().peek()? {
+            '(' => return self.group(),
+            '"' => RuleKind::Str(self.str()?),
+            '\'' => RuleKind::Chr(self.chr()?),
+            _ => RuleKind::Ident(self.ident()?),
+        })
+    }
+    pub fn group(&mut self) -> Option<RuleKind<'a>> {
+        self.take_one(|c| '(' == c)?;
+        let out = self.either()?;
+        if self.take_one(|c| ')' == c).is_none() {
+            panic!("Groups should have terminating ')': {}", self.tail)
+        }
+        Some(RuleKind::Group(out.into()))
+    }
+    pub fn ident(&mut self) -> Option<&'a str> {
+        self.start().take_one(is_xid_start)?;
+        self.take_many(is_xid_continue);
+        Some(self.fragment())
+    }
+    pub fn chr(&mut self) -> Option<&'a str> {
+        self.space().take_one(|c| '\'' == c)?;
+        self.start().take_many(|c| '\'' != c);
+        let out = self.fragment();
+        if self.take_one(|c| '\'' == c).is_none() {
+            panic!("chr should have terminating '\'': {}", self.tail)
+        }
+        Some(out)
+    }
+    pub fn str(&mut self) -> Option<&'a str> {
+        self.space().take_one(|c| '\"' == c)?;
+        self.start().take_many(|c| '\"' != c);
+        let out = self.fragment();
+        if self.take_one(|c| '\"' == c).is_none() {
+            panic!("str should have terminating '\"': {}", self.tail)
+        }
+        Some(out)
+    }
+
+    pub fn comment(&mut self) -> Option<&'a str> {
+        let start = self.tail;
+        while self.space().take_one(|c| '(' == c).is_some() {
+            self.take_one(|c| '*' == c)?;
+            while let Some(c) = self.peek() {
+                match c {
+                    '*' => {
+                        self.take_one(|c| '*' == c)?;
+                        if self.take_one(|c| ')' == c).is_some() {
+                            break;
+                        }
+                    }
+                    '(' => {
+                        self.comment();
+                    }
+                    _ => {
+                        self.take();
+                    }
+                }
+            }
+        }
+        let out = &self.text[start..self.tail];
+        (out.len() > 1).then_some(out)
+    }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..3609a92
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,22 @@
+use std::error::Error;
+
+use grammatical::*;
+
+fn main() -> Result<(), Box<dyn Error>> {
+    for file in std::env::args().skip(1) {
+        let file = std::fs::read_to_string(file)?;
+        let mut p = Parser::new(&file);
+        while let Some(rule) = p.rule() {
+            println!("{rule}");
+        }
+    }
+
+    for line in std::io::stdin().lines() {
+        let line = line?;
+        let mut p = Parser::new(&line);
+        while let Some(rule) = p.rule() {
+            println!("{} = {{ {} }}", rule.name, rule.body);
+        }
+    }
+    Ok(())
+}
diff --git a/test.grammatical b/test.grammatical
new file mode 100644
index 0000000..206ac69
--- /dev/null
+++ b/test.grammatical
@@ -0,0 +1,18 @@
+(* Grammatical EBNF *)
+Ruleset     = (COMMENT? Rule)* EOI ;
+Rule        = ident '=' Either? ';' ;
+Either      = Follow ('|' Follow)* ;
+Follow      = (Any | Many | Maybe | Not)+ ;
+Any         = Not '*' ;
+Many        = Not '+' ;
+Maybe       = Not '?' ;
+Not         = '!'? Prime ;
+Prime       = Group | chr | str | ident ;
+Group       = '(' Either ')' ;
+
+ident       = XID_START XID_CONTINUE* ;
+str         = '"' (!'"' ANY)* '"' ;
+chr         = "'" (!"'" ANY)* "'" ;
+
+WHITESPACE  = WHITE_SPACE ;
+COMMENT     = "(*" (COMMENT | !"*)" ANY)* "*)" ;