Implement a dead simple pseudo-EBNF to Pest translator, which works on Conlang's EBNF
This commit is contained in:
commit
d1d8c45bdb
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
16
Cargo.lock
generated
Normal file
16
Cargo.lock
generated
Normal file
@ -0,0 +1,16 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "grammatical"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
7
Cargo.toml
Normal file
7
Cargo.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[package]
|
||||
name = "grammatical"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
unicode-ident = "1.0.12"
|
252
src/lib.rs
Normal file
252
src/lib.rs
Normal file
@ -0,0 +1,252 @@
|
||||
use std::{iter::Peekable, str::CharIndices};
|
||||
|
||||
use unicode_ident::*;
|
||||
|
||||
/// Rule = ident '=' Either? ';' ;
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Rule<'a> {
|
||||
pub comment: Option<&'a str>,
|
||||
pub name: &'a str,
|
||||
pub body: RuleKind<'a>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub enum RuleKind<'a> {
|
||||
/// Either = Follow ('|' Follow)* ;
|
||||
Either(Vec<RuleKind<'a>>),
|
||||
/// Follow = (Any | Many | Maybe | Not)+ ;
|
||||
Follow(Vec<RuleKind<'a>>),
|
||||
/// Any = Not '*' ;
|
||||
Any(Box<RuleKind<'a>>),
|
||||
/// Many = Not '+' ;
|
||||
Many(Box<RuleKind<'a>>),
|
||||
/// Maybe = Not '?' ;
|
||||
Maybe(Box<RuleKind<'a>>),
|
||||
/// Not = '!'? Prime ;
|
||||
Not(Box<RuleKind<'a>>),
|
||||
/// Group = '(' Either ')' ;
|
||||
Group(Box<RuleKind<'a>>),
|
||||
/// ident = XID_START XID_CONTINUE* ;
|
||||
Ident(&'a str),
|
||||
/// str = '"' (!'"' ANY)* '"' ;
|
||||
Chr(&'a str),
|
||||
/// chr = "'" (!"'" ANY)* "'" ;
|
||||
Str(&'a str),
|
||||
/// Nothing
|
||||
#[default]
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RuleKind<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
RuleKind::Either(r) => r.iter().enumerate().try_for_each(|(i, r)| {
|
||||
if i != 0 {
|
||||
write!(f, " | ")?;
|
||||
}
|
||||
write!(f, "{r}")
|
||||
}),
|
||||
RuleKind::Follow(r) => r.iter().enumerate().try_for_each(|(i, r)| {
|
||||
if i != 0 {
|
||||
write!(f, " ~ ")?;
|
||||
}
|
||||
write!(f, "{r}")
|
||||
}),
|
||||
RuleKind::Any(r) => write!(f, "{r}*"),
|
||||
RuleKind::Many(r) => write!(f, "{r}+"),
|
||||
RuleKind::Maybe(r) => write!(f, "{r}?"),
|
||||
RuleKind::Not(r) => write!(f, "!{r}"),
|
||||
RuleKind::Group(r) => write!(f, "({r})"),
|
||||
RuleKind::Ident(r) => write!(f, "{r}"),
|
||||
RuleKind::Chr(r) => write!(f, "{r:?}"),
|
||||
RuleKind::Str(r) => write!(f, "{r:?}"),
|
||||
RuleKind::Empty => write!(f, "(!ANY)?"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl std::fmt::Display for Rule<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self {
|
||||
comment: _,
|
||||
name,
|
||||
body,
|
||||
} = self;
|
||||
// if let Some(comment) = comment {
|
||||
// writeln!(f, "/* {comment} */")?;
|
||||
// }
|
||||
write!(f, "{name} = {{ {body} }}")
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
text: &'a str,
|
||||
chars: Peekable<CharIndices<'a>>,
|
||||
head: usize,
|
||||
tail: usize,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(text: &'a str) -> Self {
|
||||
Self {
|
||||
text,
|
||||
chars: text.char_indices().peekable(),
|
||||
head: 0,
|
||||
tail: 0,
|
||||
}
|
||||
}
|
||||
pub fn start(&mut self) -> &mut Self {
|
||||
self.space();
|
||||
self.head = self.tail;
|
||||
self
|
||||
}
|
||||
pub fn fragment(&self) -> &'a str {
|
||||
let &Self {
|
||||
text, head, tail, ..
|
||||
} = self;
|
||||
&text[head..tail]
|
||||
}
|
||||
pub fn peek(&mut self) -> Option<char> {
|
||||
self.chars.peek().map(|(_, c)| *c)
|
||||
}
|
||||
pub fn take(&mut self) -> Option<(usize, char)> {
|
||||
let out = self.chars.next();
|
||||
self.tail = match out {
|
||||
Some((i, _)) => i + 1,
|
||||
None => self.text.len(),
|
||||
};
|
||||
out
|
||||
}
|
||||
pub fn take_one(&mut self, f: fn(char) -> bool) -> Option<&mut Self> {
|
||||
self.chars.peek().filter(|(_, c)| f(*c)).is_some().then(|| {
|
||||
self.take();
|
||||
self
|
||||
})
|
||||
}
|
||||
pub fn take_many(&mut self, f: fn(char) -> bool) -> &mut Self {
|
||||
while self.take_one(f).is_some() {}
|
||||
self
|
||||
}
|
||||
pub fn space(&mut self) -> &mut Self {
|
||||
self.take_many(char::is_whitespace)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn rule(&mut self) -> Option<Rule<'a>> {
|
||||
let out = Rule {
|
||||
comment: self.comment(),
|
||||
name: self.ident()?,
|
||||
body: {
|
||||
self.space()
|
||||
.take_one(|c| '=' == c)?
|
||||
.either()
|
||||
.unwrap_or_default()
|
||||
},
|
||||
};
|
||||
if self.space().take_one(|c| ';' == c).is_none() {
|
||||
panic!("Rule should end in ';': {}..{}", self.head, self.tail)
|
||||
}
|
||||
Some(out)
|
||||
}
|
||||
pub fn either(&mut self) -> Option<RuleKind<'a>> {
|
||||
let mut out = vec![self.follow()?];
|
||||
while self.space().take_one(|c| '|' == c).is_some() {
|
||||
out.push(self.follow()?)
|
||||
}
|
||||
match out.len() {
|
||||
1 => out.pop(),
|
||||
_ => Some(RuleKind::Either(out)),
|
||||
}
|
||||
}
|
||||
pub fn follow(&mut self) -> Option<RuleKind<'a>> {
|
||||
let mut out = vec![];
|
||||
while let Some(rule) = self.repeat() {
|
||||
out.push(rule)
|
||||
}
|
||||
match out.len() {
|
||||
1 => out.pop(),
|
||||
_ => Some(RuleKind::Follow(out)),
|
||||
}
|
||||
}
|
||||
pub fn repeat(&mut self) -> Option<RuleKind<'a>> {
|
||||
let out = self.not()?;
|
||||
let out = match self.space().peek() {
|
||||
Some('*') => RuleKind::Any(out.into()),
|
||||
Some('+') => RuleKind::Many(out.into()),
|
||||
Some('?') => RuleKind::Maybe(out.into()),
|
||||
_ => return Some(out),
|
||||
};
|
||||
self.take();
|
||||
Some(out)
|
||||
}
|
||||
pub fn not(&mut self) -> Option<RuleKind<'a>> {
|
||||
match self.space().take_one(|c| '!' == c) {
|
||||
Some(_) => Some(RuleKind::Not(self.prime()?.into())),
|
||||
_ => self.prime(),
|
||||
}
|
||||
}
|
||||
pub fn prime(&mut self) -> Option<RuleKind<'a>> {
|
||||
Some(match self.space().peek()? {
|
||||
'(' => return self.group(),
|
||||
'"' => RuleKind::Str(self.str()?),
|
||||
'\'' => RuleKind::Chr(self.chr()?),
|
||||
_ => RuleKind::Ident(self.ident()?),
|
||||
})
|
||||
}
|
||||
pub fn group(&mut self) -> Option<RuleKind<'a>> {
|
||||
self.take_one(|c| '(' == c)?;
|
||||
let out = self.either()?;
|
||||
if self.take_one(|c| ')' == c).is_none() {
|
||||
panic!("Groups should have terminating ')': {}", self.tail)
|
||||
}
|
||||
Some(RuleKind::Group(out.into()))
|
||||
}
|
||||
pub fn ident(&mut self) -> Option<&'a str> {
|
||||
self.start().take_one(is_xid_start)?;
|
||||
self.take_many(is_xid_continue);
|
||||
Some(self.fragment())
|
||||
}
|
||||
pub fn chr(&mut self) -> Option<&'a str> {
|
||||
self.space().take_one(|c| '\'' == c)?;
|
||||
self.start().take_many(|c| '\'' != c);
|
||||
let out = self.fragment();
|
||||
if self.take_one(|c| '\'' == c).is_none() {
|
||||
panic!("chr should have terminating '\'': {}", self.tail)
|
||||
}
|
||||
Some(out)
|
||||
}
|
||||
pub fn str(&mut self) -> Option<&'a str> {
|
||||
self.space().take_one(|c| '\"' == c)?;
|
||||
self.start().take_many(|c| '\"' != c);
|
||||
let out = self.fragment();
|
||||
if self.take_one(|c| '\"' == c).is_none() {
|
||||
panic!("str should have terminating '\"': {}", self.tail)
|
||||
}
|
||||
Some(out)
|
||||
}
|
||||
|
||||
pub fn comment(&mut self) -> Option<&'a str> {
|
||||
let start = self.tail;
|
||||
while self.space().take_one(|c| '(' == c).is_some() {
|
||||
self.take_one(|c| '*' == c)?;
|
||||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'*' => {
|
||||
self.take_one(|c| '*' == c)?;
|
||||
if self.take_one(|c| ')' == c).is_some() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
'(' => {
|
||||
self.comment();
|
||||
}
|
||||
_ => {
|
||||
self.take();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let out = &self.text[start..self.tail];
|
||||
(out.len() > 1).then_some(out)
|
||||
}
|
||||
}
|
22
src/main.rs
Normal file
22
src/main.rs
Normal file
@ -0,0 +1,22 @@
|
||||
use std::error::Error;
|
||||
|
||||
use grammatical::*;
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
for file in std::env::args().skip(1) {
|
||||
let file = std::fs::read_to_string(file)?;
|
||||
let mut p = Parser::new(&file);
|
||||
while let Some(rule) = p.rule() {
|
||||
println!("{rule}");
|
||||
}
|
||||
}
|
||||
|
||||
for line in std::io::stdin().lines() {
|
||||
let line = line?;
|
||||
let mut p = Parser::new(&line);
|
||||
while let Some(rule) = p.rule() {
|
||||
println!("{} = {{ {} }}", rule.name, rule.body);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
18
test.grammatical
Normal file
18
test.grammatical
Normal file
@ -0,0 +1,18 @@
|
||||
(* Grammatical EBNF *)
|
||||
Ruleset = (COMMENT? Rule)* EOI ;
|
||||
Rule = ident '=' Either? ';' ;
|
||||
Either = Follow ('|' Follow)* ;
|
||||
Follow = (Any | Many | Maybe | Not)+ ;
|
||||
Any = Not '*' ;
|
||||
Many = Not '+' ;
|
||||
Maybe = Not '?' ;
|
||||
Not = '!'? Prime ;
|
||||
Prime = Group | chr | str | ident ;
|
||||
Group = '(' Either ')' ;
|
||||
|
||||
ident = XID_START XID_CONTINUE* ;
|
||||
str = '"' (!'"' ANY)* '"' ;
|
||||
chr = "'" (!"'" ANY)* "'" ;
|
||||
|
||||
WHITESPACE = WHITE_SPACE ;
|
||||
COMMENT = "(*" (COMMENT | !"*)" ANY)* "*)" ;
|
Loading…
Reference in New Issue
Block a user