diff --git a/src/lib.rs b/src/lib.rs index d31c42b..db9050c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,4 @@ use std::{iter::Peekable, str::CharIndices}; - use unicode_ident::*; /// Rule = ident '=' Either? ';' ; @@ -94,6 +93,9 @@ impl<'a> Parser<'a> { tail: 0, } } + pub fn error(&mut self, kind: ErrorKind) -> error::Error { + error::Error::new(self.head, self.tail, kind) + } pub fn start(&mut self) -> &mut Self { self.space(); self.head = self.tail; @@ -105,8 +107,11 @@ impl<'a> Parser<'a> { } = self; &text[head..tail] } - pub fn peek(&mut self) -> Option { - self.chars.peek().map(|(_, c)| *c) + pub fn peek(&mut self) -> Result { + self.chars + .peek() + .map(|(_, c)| *c) + .ok_or_else(|| self.error(ErrorKind::EndOfInput)) } pub fn take(&mut self) -> Option<(usize, char)> { let out = self.chars.next(); @@ -116,14 +121,19 @@ impl<'a> Parser<'a> { }; out } - pub fn take_one(&mut self, f: fn(char) -> bool) -> Option<&mut Self> { - self.chars.peek().filter(|(_, c)| f(*c)).is_some().then(|| { + pub fn take_one(&mut self, f: fn(char) -> bool) -> Result<&mut Self> { + let Some(&(_, c)) = self.chars.peek() else { + Err(self.error(ErrorKind::EndOfInput))? + }; + if f(c) { self.take(); - self - }) + Ok(self) + } else { + Err(self.error(ErrorKind::Unexpected(c))) + } } pub fn take_many(&mut self, f: fn(char) -> bool) -> &mut Self { - while self.take_one(f).is_some() {} + while self.take_one(f).is_ok() {} self } pub fn space(&mut self) -> &mut Self { @@ -132,9 +142,9 @@ impl<'a> Parser<'a> { } impl<'a> Parser<'a> { - pub fn rule(&mut self) -> Option> { + pub fn rule(&mut self) -> Result> { let out = Rule { - comment: self.comment(), + comment: self.comment().ok(), name: self.ident()?, body: { self.space() @@ -143,102 +153,91 @@ impl<'a> Parser<'a> { .unwrap_or_default() }, }; - if self.space().take_one(|c| ';' == c).is_none() { - panic!("Rule should end in ';': {}..{}", self.head, self.tail) - } - Some(out) + self.space().take_one(|c| ';' == c).map(|_| out) } - pub fn either(&mut self) -> Option> { + pub fn either(&mut self) -> Result> { let mut out = vec![self.follow()?]; - while self.space().take_one(|c| '|' == c).is_some() { + while self.space().take_one(|c| '|' == c).is_ok() { out.push(self.follow()?) } - match out.len() { - 1 => out.pop(), - _ => Some(RuleKind::Either(out)), - } + Ok(match out.len() { + 1 => out.pop().expect("pop should succeed when length is 1"), + _ => RuleKind::Either(out), + }) } - pub fn follow(&mut self) -> Option> { + pub fn follow(&mut self) -> Result> { let mut out = vec![]; - while let Some(rule) = self.repeat() { + while let Ok(rule) = self.repeat() { out.push(rule) } - match out.len() { - 1 => out.pop(), - _ => Some(RuleKind::Follow(out)), - } + Ok(match out.len() { + 1 => out.pop().expect("pop should succeed when length is 1"), + _ => RuleKind::Follow(out), + }) } - pub fn repeat(&mut self) -> Option> { + pub fn repeat(&mut self) -> Result> { let out = self.not()?; let out = match self.space().peek() { - Some('*') => RuleKind::Any(out.into()), - Some('+') => RuleKind::Many(out.into()), - Some('?') => RuleKind::Maybe(out.into()), - _ => return Some(out), + Ok('*') => RuleKind::Any(out.into()), + Ok('+') => RuleKind::Many(out.into()), + Ok('?') => RuleKind::Maybe(out.into()), + _ => return Ok(out), }; self.take(); - Some(out) + Ok(out) } - pub fn not(&mut self) -> Option> { + pub fn not(&mut self) -> Result> { match self.space().take_one(|c| '!' == c) { - Some(_) => Some(RuleKind::Not(self.prime()?.into())), + Ok(_) => Ok(RuleKind::Not(self.prime()?.into())), _ => self.prime(), } } - pub fn prime(&mut self) -> Option> { - Some(match self.space().peek()? { + pub fn prime(&mut self) -> Result> { + Ok(match self.space().peek()? { '(' => return self.group(), '"' => RuleKind::Str(self.str()?), '\'' => RuleKind::Chr(self.chr()?), _ => RuleKind::Ident(self.ident()?), }) } - pub fn group(&mut self) -> Option> { + pub fn group(&mut self) -> Result> { self.take_one(|c| '(' == c)?; let out = self.either()?; - if self.take_one(|c| ')' == c).is_none() { - panic!("Groups should have terminating ')': {}", self.tail) - } - Some(RuleKind::Group(out.into())) + self.take_one(|c| ')' == c) + .map(|_| RuleKind::Group(out.into())) } - pub fn ident(&mut self) -> Option<&'a str> { + pub fn ident(&mut self) -> Result<&'a str> { self.start().take_one(is_xid_start)?; self.take_many(is_xid_continue); - Some(self.fragment()) + Ok(self.fragment()) } - pub fn chr(&mut self) -> Option<&'a str> { + pub fn chr(&mut self) -> Result<&'a str> { self.space().take_one(|c| '\'' == c)?; self.start().take_many(|c| '\'' != c); let out = self.fragment(); - if self.take_one(|c| '\'' == c).is_none() { - panic!("chr should have terminating '\'': {}", self.tail) - } - Some(out) + self.take_one(|c| '\'' == c).map(|_| out) } - pub fn str(&mut self) -> Option<&'a str> { + pub fn str(&mut self) -> Result<&'a str> { self.space().take_one(|c| '\"' == c)?; self.start().take_many(|c| '\"' != c); let out = self.fragment(); - if self.take_one(|c| '\"' == c).is_none() { - panic!("str should have terminating '\"': {}", self.tail) - } - Some(out) + self.take_one(|c| '"' == c).map(|_| out) } - pub fn comment(&mut self) -> Option<&'a str> { + pub fn comment(&mut self) -> Result<&'a str> { let start = self.tail; - while self.space().take_one(|c| '(' == c).is_some() { + while self.space().take_one(|c| '(' == c).is_ok() { self.take_one(|c| '*' == c)?; - while let Some(c) = self.peek() { - match c { + loop { + match self.peek()? { '*' => { self.take_one(|c| '*' == c)?; - if self.take_one(|c| ')' == c).is_some() { + if self.take_one(|c| ')' == c).is_ok() { break; } } '(' => { - self.comment(); + let _ = self.comment(); } _ => { self.take(); @@ -247,6 +246,69 @@ impl<'a> Parser<'a> { } } let out = &self.text[start..self.tail]; - (out.len() > 1).then_some(out) + if out.len() < 2 { + Ok(out) + } else { + Err(self.error(ErrorKind::NoComment)) + } + } +} + +use error::{ErrorKind, Result}; +pub mod error { + + pub type Result = std::result::Result; + + #[derive(Debug)] + pub struct Error { + place: (usize, usize), + kind: ErrorKind, + } + impl Error { + pub fn new(start: usize, end: usize, kind: ErrorKind) -> Self { + Self { + place: (start, end), + kind, + } + } + pub fn is_end(&self) -> bool { + matches!(self.kind, ErrorKind::EndOfInput) + } + } + impl std::error::Error for Error {} + impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { + place: (start, end), + kind, + } = self; + write!(f, "[{start}..{end}]: {kind} at {end}") + } + } + + #[derive(Debug)] + pub enum ErrorKind { + Unterminated(Terminable), + Unexpected(char), + EndOfInput, + NoComment, + } + impl std::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ErrorKind::Unterminated(t) => write!(f, "unterminated {t:?}"), + ErrorKind::Unexpected(c) => write!(f, "unexpected character {c}"), + ErrorKind::EndOfInput => write!(f, "end of input"), + ErrorKind::NoComment => write!(f, "no comment"), + } + } + } + + #[derive(Debug)] + pub enum Terminable { + Comment, + Group, + Str, + Chr, } } diff --git a/src/main.rs b/src/main.rs index 3609a92..66bae46 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,17 +6,27 @@ fn main() -> Result<(), Box> { for file in std::env::args().skip(1) { let file = std::fs::read_to_string(file)?; let mut p = Parser::new(&file); - while let Some(rule) = p.rule() { - println!("{rule}"); - } + parse(&mut p); } for line in std::io::stdin().lines() { let line = line?; let mut p = Parser::new(&line); - while let Some(rule) = p.rule() { - println!("{} = {{ {} }}", rule.name, rule.body); - } + parse(&mut p); } Ok(()) } + +fn parse(p: &mut Parser) { + loop { + match p.rule() { + Ok(rule) => { + println!("{rule}"); + continue; + } + Err(e) if e.is_end() => {} + Err(e) => eprintln!("Error {e}"), + } + break; + } +}