Implement a dead simple pseudo-EBNF to Pest translator, which works on Conlang's EBNF
This commit is contained in:
		
							
								
								
									
										252
									
								
								src/lib.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										252
									
								
								src/lib.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,252 @@ | ||||
| use std::{iter::Peekable, str::CharIndices}; | ||||
|  | ||||
| use unicode_ident::*; | ||||
|  | ||||
| /// Rule        = ident '=' Either? ';' ; | ||||
| #[derive(Debug, Default)] | ||||
| pub struct Rule<'a> { | ||||
|     pub comment: Option<&'a str>, | ||||
|     pub name: &'a str, | ||||
|     pub body: RuleKind<'a>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Default)] | ||||
| pub enum RuleKind<'a> { | ||||
|     /// Either      = Follow ('|' Follow)* ; | ||||
|     Either(Vec<RuleKind<'a>>), | ||||
|     /// Follow      = (Any | Many | Maybe | Not)+ ; | ||||
|     Follow(Vec<RuleKind<'a>>), | ||||
|     /// Any         = Not '*' ; | ||||
|     Any(Box<RuleKind<'a>>), | ||||
|     /// Many        = Not '+' ; | ||||
|     Many(Box<RuleKind<'a>>), | ||||
|     /// Maybe       = Not '?' ; | ||||
|     Maybe(Box<RuleKind<'a>>), | ||||
|     /// Not         = '!'? Prime ; | ||||
|     Not(Box<RuleKind<'a>>), | ||||
|     /// Group       = '(' Either ')' ; | ||||
|     Group(Box<RuleKind<'a>>), | ||||
|     /// ident       = XID_START XID_CONTINUE* ; | ||||
|     Ident(&'a str), | ||||
|     /// str         = '"' (!'"' ANY)* '"' ; | ||||
|     Chr(&'a str), | ||||
|     /// chr         = "'" (!"'" ANY)* "'" ; | ||||
|     Str(&'a str), | ||||
|     /// Nothing | ||||
|     #[default] | ||||
|     Empty, | ||||
| } | ||||
|  | ||||
| impl std::fmt::Display for RuleKind<'_> { | ||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
|         match self { | ||||
|             RuleKind::Either(r) => r.iter().enumerate().try_for_each(|(i, r)| { | ||||
|                 if i != 0 { | ||||
|                     write!(f, " | ")?; | ||||
|                 } | ||||
|                 write!(f, "{r}") | ||||
|             }), | ||||
|             RuleKind::Follow(r) => r.iter().enumerate().try_for_each(|(i, r)| { | ||||
|                 if i != 0 { | ||||
|                     write!(f, " ~ ")?; | ||||
|                 } | ||||
|                 write!(f, "{r}") | ||||
|             }), | ||||
|             RuleKind::Any(r) => write!(f, "{r}*"), | ||||
|             RuleKind::Many(r) => write!(f, "{r}+"), | ||||
|             RuleKind::Maybe(r) => write!(f, "{r}?"), | ||||
|             RuleKind::Not(r) => write!(f, "!{r}"), | ||||
|             RuleKind::Group(r) => write!(f, "({r})"), | ||||
|             RuleKind::Ident(r) => write!(f, "{r}"), | ||||
|             RuleKind::Chr(r) => write!(f, "{r:?}"), | ||||
|             RuleKind::Str(r) => write!(f, "{r:?}"), | ||||
|             RuleKind::Empty => write!(f, "(!ANY)?"), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| impl std::fmt::Display for Rule<'_> { | ||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
|         let Self { | ||||
|             comment: _, | ||||
|             name, | ||||
|             body, | ||||
|         } = self; | ||||
|         // if let Some(comment) = comment { | ||||
|         //     writeln!(f, "/* {comment} */")?; | ||||
|         // } | ||||
|         write!(f, "{name} = {{ {body} }}") | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Parser<'a> { | ||||
|     text: &'a str, | ||||
|     chars: Peekable<CharIndices<'a>>, | ||||
|     head: usize, | ||||
|     tail: usize, | ||||
| } | ||||
|  | ||||
| impl<'a> Parser<'a> { | ||||
|     pub fn new(text: &'a str) -> Self { | ||||
|         Self { | ||||
|             text, | ||||
|             chars: text.char_indices().peekable(), | ||||
|             head: 0, | ||||
|             tail: 0, | ||||
|         } | ||||
|     } | ||||
|     pub fn start(&mut self) -> &mut Self { | ||||
|         self.space(); | ||||
|         self.head = self.tail; | ||||
|         self | ||||
|     } | ||||
|     pub fn fragment(&self) -> &'a str { | ||||
|         let &Self { | ||||
|             text, head, tail, .. | ||||
|         } = self; | ||||
|         &text[head..tail] | ||||
|     } | ||||
|     pub fn peek(&mut self) -> Option<char> { | ||||
|         self.chars.peek().map(|(_, c)| *c) | ||||
|     } | ||||
|     pub fn take(&mut self) -> Option<(usize, char)> { | ||||
|         let out = self.chars.next(); | ||||
|         self.tail = match out { | ||||
|             Some((i, _)) => i + 1, | ||||
|             None => self.text.len(), | ||||
|         }; | ||||
|         out | ||||
|     } | ||||
|     pub fn take_one(&mut self, f: fn(char) -> bool) -> Option<&mut Self> { | ||||
|         self.chars.peek().filter(|(_, c)| f(*c)).is_some().then(|| { | ||||
|             self.take(); | ||||
|             self | ||||
|         }) | ||||
|     } | ||||
|     pub fn take_many(&mut self, f: fn(char) -> bool) -> &mut Self { | ||||
|         while self.take_one(f).is_some() {} | ||||
|         self | ||||
|     } | ||||
|     pub fn space(&mut self) -> &mut Self { | ||||
|         self.take_many(char::is_whitespace) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> Parser<'a> { | ||||
|     pub fn rule(&mut self) -> Option<Rule<'a>> { | ||||
|         let out = Rule { | ||||
|             comment: self.comment(), | ||||
|             name: self.ident()?, | ||||
|             body: { | ||||
|                 self.space() | ||||
|                     .take_one(|c| '=' == c)? | ||||
|                     .either() | ||||
|                     .unwrap_or_default() | ||||
|             }, | ||||
|         }; | ||||
|         if self.space().take_one(|c| ';' == c).is_none() { | ||||
|             panic!("Rule should end in ';': {}..{}", self.head, self.tail) | ||||
|         } | ||||
|         Some(out) | ||||
|     } | ||||
|     pub fn either(&mut self) -> Option<RuleKind<'a>> { | ||||
|         let mut out = vec![self.follow()?]; | ||||
|         while self.space().take_one(|c| '|' == c).is_some() { | ||||
|             out.push(self.follow()?) | ||||
|         } | ||||
|         match out.len() { | ||||
|             1 => out.pop(), | ||||
|             _ => Some(RuleKind::Either(out)), | ||||
|         } | ||||
|     } | ||||
|     pub fn follow(&mut self) -> Option<RuleKind<'a>> { | ||||
|         let mut out = vec![]; | ||||
|         while let Some(rule) = self.repeat() { | ||||
|             out.push(rule) | ||||
|         } | ||||
|         match out.len() { | ||||
|             1 => out.pop(), | ||||
|             _ => Some(RuleKind::Follow(out)), | ||||
|         } | ||||
|     } | ||||
|     pub fn repeat(&mut self) -> Option<RuleKind<'a>> { | ||||
|         let out = self.not()?; | ||||
|         let out = match self.space().peek() { | ||||
|             Some('*') => RuleKind::Any(out.into()), | ||||
|             Some('+') => RuleKind::Many(out.into()), | ||||
|             Some('?') => RuleKind::Maybe(out.into()), | ||||
|             _ => return Some(out), | ||||
|         }; | ||||
|         self.take(); | ||||
|         Some(out) | ||||
|     } | ||||
|     pub fn not(&mut self) -> Option<RuleKind<'a>> { | ||||
|         match self.space().take_one(|c| '!' == c) { | ||||
|             Some(_) => Some(RuleKind::Not(self.prime()?.into())), | ||||
|             _ => self.prime(), | ||||
|         } | ||||
|     } | ||||
|     pub fn prime(&mut self) -> Option<RuleKind<'a>> { | ||||
|         Some(match self.space().peek()? { | ||||
|             '(' => return self.group(), | ||||
|             '"' => RuleKind::Str(self.str()?), | ||||
|             '\'' => RuleKind::Chr(self.chr()?), | ||||
|             _ => RuleKind::Ident(self.ident()?), | ||||
|         }) | ||||
|     } | ||||
|     pub fn group(&mut self) -> Option<RuleKind<'a>> { | ||||
|         self.take_one(|c| '(' == c)?; | ||||
|         let out = self.either()?; | ||||
|         if self.take_one(|c| ')' == c).is_none() { | ||||
|             panic!("Groups should have terminating ')': {}", self.tail) | ||||
|         } | ||||
|         Some(RuleKind::Group(out.into())) | ||||
|     } | ||||
|     pub fn ident(&mut self) -> Option<&'a str> { | ||||
|         self.start().take_one(is_xid_start)?; | ||||
|         self.take_many(is_xid_continue); | ||||
|         Some(self.fragment()) | ||||
|     } | ||||
|     pub fn chr(&mut self) -> Option<&'a str> { | ||||
|         self.space().take_one(|c| '\'' == c)?; | ||||
|         self.start().take_many(|c| '\'' != c); | ||||
|         let out = self.fragment(); | ||||
|         if self.take_one(|c| '\'' == c).is_none() { | ||||
|             panic!("chr should have terminating '\'': {}", self.tail) | ||||
|         } | ||||
|         Some(out) | ||||
|     } | ||||
|     pub fn str(&mut self) -> Option<&'a str> { | ||||
|         self.space().take_one(|c| '\"' == c)?; | ||||
|         self.start().take_many(|c| '\"' != c); | ||||
|         let out = self.fragment(); | ||||
|         if self.take_one(|c| '\"' == c).is_none() { | ||||
|             panic!("str should have terminating '\"': {}", self.tail) | ||||
|         } | ||||
|         Some(out) | ||||
|     } | ||||
|  | ||||
|     pub fn comment(&mut self) -> Option<&'a str> { | ||||
|         let start = self.tail; | ||||
|         while self.space().take_one(|c| '(' == c).is_some() { | ||||
|             self.take_one(|c| '*' == c)?; | ||||
|             while let Some(c) = self.peek() { | ||||
|                 match c { | ||||
|                     '*' => { | ||||
|                         self.take_one(|c| '*' == c)?; | ||||
|                         if self.take_one(|c| ')' == c).is_some() { | ||||
|                             break; | ||||
|                         } | ||||
|                     } | ||||
|                     '(' => { | ||||
|                         self.comment(); | ||||
|                     } | ||||
|                     _ => { | ||||
|                         self.take(); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         let out = &self.text[start..self.tail]; | ||||
|         (out.len() > 1).then_some(out) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										22
									
								
								src/main.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								src/main.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| use std::error::Error; | ||||
|  | ||||
| use grammatical::*; | ||||
|  | ||||
| fn main() -> Result<(), Box<dyn Error>> { | ||||
|     for file in std::env::args().skip(1) { | ||||
|         let file = std::fs::read_to_string(file)?; | ||||
|         let mut p = Parser::new(&file); | ||||
|         while let Some(rule) = p.rule() { | ||||
|             println!("{rule}"); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     for line in std::io::stdin().lines() { | ||||
|         let line = line?; | ||||
|         let mut p = Parser::new(&line); | ||||
|         while let Some(rule) = p.rule() { | ||||
|             println!("{} = {{ {} }}", rule.name, rule.body); | ||||
|         } | ||||
|     } | ||||
|     Ok(()) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user