From 247bb1f8f85133caac674b64d2f9f3f685ed91aa Mon Sep 17 00:00:00 2001 From: John Date: Mon, 25 Sep 2023 16:43:50 -0500 Subject: [PATCH] integer: Match based on base, if present --- lerox/src/lib.rs | 11 +++++++++++ libconlang/src/lib.rs | 30 ++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/lerox/src/lib.rs b/lerox/src/lib.rs index 3ceb6fc..d66a238 100644 --- a/lerox/src/lib.rs +++ b/lerox/src/lib.rs @@ -59,4 +59,15 @@ pub trait Combinator: Sized { where Self: Clone { self.clone().and(f).or(|_| g(self)) } + + /// Returns the result of the first f that succeeds, or self + fn and_one_of(mut self, f: &[&dyn Fn(Self) -> Self]) -> Self { + for &f in f { + self = self.into_alright().and(f); + if self.is_alright() { + break; + } + } + self + } } diff --git a/libconlang/src/lib.rs b/libconlang/src/lib.rs index b436fe6..c2bb274 100644 --- a/libconlang/src/lib.rs +++ b/libconlang/src/lib.rs @@ -58,6 +58,11 @@ pub mod lexer { pub fn new(text: &'t str) -> Self { Self { text, cursor: 0 } } + fn skip_whitespace(&mut self) { + if let Some(len) = Rule::new(self.text()).and_any(Rule::whitespace).end() { + self.cursor += len + } + } fn produce_token(&mut self, ty: Type, len: usize) -> Option { let start = self.cursor; self.cursor += len; @@ -66,9 +71,6 @@ pub mod lexer { fn text(&self) -> &str { &self.text[self.cursor..] } - fn skip_whitespace(&mut self) { - self.cursor += Rule::new(self.text).whitespace().end().unwrap_or_default() - } // functions for lexing individual tokens pub fn line_comment(&mut self) -> Option { // line_comment := "//" ~ (^newline)* @@ -120,8 +122,13 @@ pub mod lexer { self.produce_token( Type::Integer, Rule::new(self.text()) - .and_maybe(|rule| rule.char('0').char_fn(|c| "xdob".contains(c))) - .and_many(|this| this.char_fn(|c| c.is_ascii_hexdigit())) + .and_one_of(&[ + &|rule| rule.str("0x").and_any(Rule::hex_digit), + &|rule| rule.str("0d").and_any(Rule::dec_digit), + &|rule| rule.str("0o").and_any(Rule::oct_digit), + &|rule| rule.str("0b").and_any(Rule::bin_digit), + &|rule| rule.and_many(Rule::dec_digit), + ]) .end()?, ) } @@ -192,6 +199,18 @@ pub mod lexer { use unicode_xid::UnicodeXID; self.char_fn(UnicodeXID::is_xid_continue) } + pub fn hex_digit(self) -> Self { + self.char_fn(|c| c.is_ascii_hexdigit()) + } + pub fn dec_digit(self) -> Self { + self.char_fn(|c| c.is_ascii_digit()) + } + pub fn oct_digit(self) -> Self { + self.char_between('0', '7') + } + pub fn bin_digit(self) -> Self { + self.char_between('0', '1') + } fn has(self, condition: impl Fn(&Self) -> bool, len: usize) -> Self { let len = next_utf8(self.text, len); self.and(|rule| match condition(&rule) && !rule.text.is_empty() { @@ -335,7 +354,6 @@ mod tests { assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer); assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer); assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer); - assert_whole_input_is_token("123456789ABCDEF0", Lexer::integer, Type::Integer); } #[test] fn base16() {