integer: Match based on base, if present

2023-09-25 16:43:50 -05:00 · 2023-09-25 16:43:50 -05:00 · 247bb1f8f8
commit 247bb1f8f8
parent 03660fd641
2 changed files with 35 additions and 6 deletions
--- a/lerox/src/lib.rs
+++ b/lerox/src/lib.rs
@ -59,4 +59,15 @@ pub trait Combinator: Sized {
    where Self: Clone {
        self.clone().and(f).or(|_| g(self))
    }
    /// Returns the result of the first f that succeeds, or self
    fn and_one_of(mut self, f: &[&dyn Fn(Self) -> Self]) -> Self {
        for &f in f {
            self = self.into_alright().and(f);
            if self.is_alright() {
                break;
            }
        }
        self
    }
 }
--- a/libconlang/src/lib.rs
+++ b/libconlang/src/lib.rs
@ -58,6 +58,11 @@ pub mod lexer {
        pub fn new(text: &'t str) -> Self {
            Self { text, cursor: 0 }
        }
        fn skip_whitespace(&mut self) {
            if let Some(len) = Rule::new(self.text()).and_any(Rule::whitespace).end() {
                self.cursor += len
            }
        }
        fn produce_token(&mut self, ty: Type, len: usize) -> Option<Token> {
            let start = self.cursor;
            self.cursor += len;
@ -66,9 +71,6 @@ pub mod lexer {
        fn text(&self) -> &str {
            &self.text[self.cursor..]
        }
        fn skip_whitespace(&mut self) {
            self.cursor += Rule::new(self.text).whitespace().end().unwrap_or_default()
        }
        // functions for lexing individual tokens
        pub fn line_comment(&mut self) -> Option<Token> {
            // line_comment := "//" ~ (^newline)*
@ -120,8 +122,13 @@ pub mod lexer {
            self.produce_token(
                Type::Integer,
                Rule::new(self.text())
-                    .and_maybe(|rule| rule.char('0').char_fn(|c| "xdob".contains(c)))
+                    .and_one_of(&[
-                    .and_many(|this| this.char_fn(|c| c.is_ascii_hexdigit()))
+                        &|rule| rule.str("0x").and_any(Rule::hex_digit),
                        &|rule| rule.str("0d").and_any(Rule::dec_digit),
                        &|rule| rule.str("0o").and_any(Rule::oct_digit),
                        &|rule| rule.str("0b").and_any(Rule::bin_digit),
                        &|rule| rule.and_many(Rule::dec_digit),
                    ])
                    .end()?,
            )
        }
@ -192,6 +199,18 @@ pub mod lexer {
            use unicode_xid::UnicodeXID;
            self.char_fn(UnicodeXID::is_xid_continue)
        }
        pub fn hex_digit(self) -> Self {
            self.char_fn(|c| c.is_ascii_hexdigit())
        }
        pub fn dec_digit(self) -> Self {
            self.char_fn(|c| c.is_ascii_digit())
        }
        pub fn oct_digit(self) -> Self {
            self.char_between('0', '7')
        }
        pub fn bin_digit(self) -> Self {
            self.char_between('0', '1')
        }
        fn has(self, condition: impl Fn(&Self) -> bool, len: usize) -> Self {
            let len = next_utf8(self.text, len);
            self.and(|rule| match condition(&rule) && !rule.text.is_empty() {
@ -335,7 +354,6 @@ mod tests {
                assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer);
                assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer);
                assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer);
                assert_whole_input_is_token("123456789ABCDEF0", Lexer::integer, Type::Integer);
            }
            #[test]
            fn base16() {