Implement a dead simple pseudo-EBNF to Pest translator, which works on Conlang's EBNF

This commit is contained in:
John 2024-04-06 05:22:21 -05:00
commit d1d8c45bdb
6 changed files with 316 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

16
Cargo.lock generated Normal file
View File

@ -0,0 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "grammatical"
version = "0.1.0"
dependencies = [
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

7
Cargo.toml Normal file
View File

@ -0,0 +1,7 @@
[package]
name = "grammatical"
version = "0.1.0"
edition = "2021"
[dependencies]
unicode-ident = "1.0.12"

252
src/lib.rs Normal file
View File

@ -0,0 +1,252 @@
use std::{iter::Peekable, str::CharIndices};
use unicode_ident::*;
/// Rule = ident '=' Either? ';' ;
#[derive(Debug, Default)]
pub struct Rule<'a> {
pub comment: Option<&'a str>,
pub name: &'a str,
pub body: RuleKind<'a>,
}
#[derive(Debug, Default)]
pub enum RuleKind<'a> {
/// Either = Follow ('|' Follow)* ;
Either(Vec<RuleKind<'a>>),
/// Follow = (Any | Many | Maybe | Not)+ ;
Follow(Vec<RuleKind<'a>>),
/// Any = Not '*' ;
Any(Box<RuleKind<'a>>),
/// Many = Not '+' ;
Many(Box<RuleKind<'a>>),
/// Maybe = Not '?' ;
Maybe(Box<RuleKind<'a>>),
/// Not = '!'? Prime ;
Not(Box<RuleKind<'a>>),
/// Group = '(' Either ')' ;
Group(Box<RuleKind<'a>>),
/// ident = XID_START XID_CONTINUE* ;
Ident(&'a str),
/// str = '"' (!'"' ANY)* '"' ;
Chr(&'a str),
/// chr = "'" (!"'" ANY)* "'" ;
Str(&'a str),
/// Nothing
#[default]
Empty,
}
impl std::fmt::Display for RuleKind<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RuleKind::Either(r) => r.iter().enumerate().try_for_each(|(i, r)| {
if i != 0 {
write!(f, " | ")?;
}
write!(f, "{r}")
}),
RuleKind::Follow(r) => r.iter().enumerate().try_for_each(|(i, r)| {
if i != 0 {
write!(f, " ~ ")?;
}
write!(f, "{r}")
}),
RuleKind::Any(r) => write!(f, "{r}*"),
RuleKind::Many(r) => write!(f, "{r}+"),
RuleKind::Maybe(r) => write!(f, "{r}?"),
RuleKind::Not(r) => write!(f, "!{r}"),
RuleKind::Group(r) => write!(f, "({r})"),
RuleKind::Ident(r) => write!(f, "{r}"),
RuleKind::Chr(r) => write!(f, "{r:?}"),
RuleKind::Str(r) => write!(f, "{r:?}"),
RuleKind::Empty => write!(f, "(!ANY)?"),
}
}
}
impl std::fmt::Display for Rule<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self {
comment: _,
name,
body,
} = self;
// if let Some(comment) = comment {
// writeln!(f, "/* {comment} */")?;
// }
write!(f, "{name} = {{ {body} }}")
}
}
pub struct Parser<'a> {
text: &'a str,
chars: Peekable<CharIndices<'a>>,
head: usize,
tail: usize,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
Self {
text,
chars: text.char_indices().peekable(),
head: 0,
tail: 0,
}
}
pub fn start(&mut self) -> &mut Self {
self.space();
self.head = self.tail;
self
}
pub fn fragment(&self) -> &'a str {
let &Self {
text, head, tail, ..
} = self;
&text[head..tail]
}
pub fn peek(&mut self) -> Option<char> {
self.chars.peek().map(|(_, c)| *c)
}
pub fn take(&mut self) -> Option<(usize, char)> {
let out = self.chars.next();
self.tail = match out {
Some((i, _)) => i + 1,
None => self.text.len(),
};
out
}
pub fn take_one(&mut self, f: fn(char) -> bool) -> Option<&mut Self> {
self.chars.peek().filter(|(_, c)| f(*c)).is_some().then(|| {
self.take();
self
})
}
pub fn take_many(&mut self, f: fn(char) -> bool) -> &mut Self {
while self.take_one(f).is_some() {}
self
}
pub fn space(&mut self) -> &mut Self {
self.take_many(char::is_whitespace)
}
}
impl<'a> Parser<'a> {
pub fn rule(&mut self) -> Option<Rule<'a>> {
let out = Rule {
comment: self.comment(),
name: self.ident()?,
body: {
self.space()
.take_one(|c| '=' == c)?
.either()
.unwrap_or_default()
},
};
if self.space().take_one(|c| ';' == c).is_none() {
panic!("Rule should end in ';': {}..{}", self.head, self.tail)
}
Some(out)
}
pub fn either(&mut self) -> Option<RuleKind<'a>> {
let mut out = vec![self.follow()?];
while self.space().take_one(|c| '|' == c).is_some() {
out.push(self.follow()?)
}
match out.len() {
1 => out.pop(),
_ => Some(RuleKind::Either(out)),
}
}
pub fn follow(&mut self) -> Option<RuleKind<'a>> {
let mut out = vec![];
while let Some(rule) = self.repeat() {
out.push(rule)
}
match out.len() {
1 => out.pop(),
_ => Some(RuleKind::Follow(out)),
}
}
pub fn repeat(&mut self) -> Option<RuleKind<'a>> {
let out = self.not()?;
let out = match self.space().peek() {
Some('*') => RuleKind::Any(out.into()),
Some('+') => RuleKind::Many(out.into()),
Some('?') => RuleKind::Maybe(out.into()),
_ => return Some(out),
};
self.take();
Some(out)
}
pub fn not(&mut self) -> Option<RuleKind<'a>> {
match self.space().take_one(|c| '!' == c) {
Some(_) => Some(RuleKind::Not(self.prime()?.into())),
_ => self.prime(),
}
}
pub fn prime(&mut self) -> Option<RuleKind<'a>> {
Some(match self.space().peek()? {
'(' => return self.group(),
'"' => RuleKind::Str(self.str()?),
'\'' => RuleKind::Chr(self.chr()?),
_ => RuleKind::Ident(self.ident()?),
})
}
pub fn group(&mut self) -> Option<RuleKind<'a>> {
self.take_one(|c| '(' == c)?;
let out = self.either()?;
if self.take_one(|c| ')' == c).is_none() {
panic!("Groups should have terminating ')': {}", self.tail)
}
Some(RuleKind::Group(out.into()))
}
pub fn ident(&mut self) -> Option<&'a str> {
self.start().take_one(is_xid_start)?;
self.take_many(is_xid_continue);
Some(self.fragment())
}
pub fn chr(&mut self) -> Option<&'a str> {
self.space().take_one(|c| '\'' == c)?;
self.start().take_many(|c| '\'' != c);
let out = self.fragment();
if self.take_one(|c| '\'' == c).is_none() {
panic!("chr should have terminating '\'': {}", self.tail)
}
Some(out)
}
pub fn str(&mut self) -> Option<&'a str> {
self.space().take_one(|c| '\"' == c)?;
self.start().take_many(|c| '\"' != c);
let out = self.fragment();
if self.take_one(|c| '\"' == c).is_none() {
panic!("str should have terminating '\"': {}", self.tail)
}
Some(out)
}
pub fn comment(&mut self) -> Option<&'a str> {
let start = self.tail;
while self.space().take_one(|c| '(' == c).is_some() {
self.take_one(|c| '*' == c)?;
while let Some(c) = self.peek() {
match c {
'*' => {
self.take_one(|c| '*' == c)?;
if self.take_one(|c| ')' == c).is_some() {
break;
}
}
'(' => {
self.comment();
}
_ => {
self.take();
}
}
}
}
let out = &self.text[start..self.tail];
(out.len() > 1).then_some(out)
}
}

22
src/main.rs Normal file
View File

@ -0,0 +1,22 @@
use std::error::Error;
use grammatical::*;
fn main() -> Result<(), Box<dyn Error>> {
for file in std::env::args().skip(1) {
let file = std::fs::read_to_string(file)?;
let mut p = Parser::new(&file);
while let Some(rule) = p.rule() {
println!("{rule}");
}
}
for line in std::io::stdin().lines() {
let line = line?;
let mut p = Parser::new(&line);
while let Some(rule) = p.rule() {
println!("{} = {{ {} }}", rule.name, rule.body);
}
}
Ok(())
}

18
test.grammatical Normal file
View File

@ -0,0 +1,18 @@
(* Grammatical EBNF *)
Ruleset = (COMMENT? Rule)* EOI ;
Rule = ident '=' Either? ';' ;
Either = Follow ('|' Follow)* ;
Follow = (Any | Many | Maybe | Not)+ ;
Any = Not '*' ;
Many = Not '+' ;
Maybe = Not '?' ;
Not = '!'? Prime ;
Prime = Group | chr | str | ident ;
Group = '(' Either ')' ;
ident = XID_START XID_CONTINUE* ;
str = '"' (!'"' ANY)* '"' ;
chr = "'" (!"'" ANY)* "'" ;
WHITESPACE = WHITE_SPACE ;
COMMENT = "(*" (COMMENT | !"*)" ANY)* "*)" ;