From 6f6d8ec9166dc857c9e745ffce8f62fdd6cf8165 Mon Sep 17 00:00:00 2001 From: John Date: Mon, 25 Sep 2023 14:25:00 -0500 Subject: [PATCH] lexer: Tokenize integer literals (resolves #3) --- libconlang/src/lib.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/libconlang/src/lib.rs b/libconlang/src/lib.rs index 642a305..7702213 100644 --- a/libconlang/src/lib.rs +++ b/libconlang/src/lib.rs @@ -9,6 +9,7 @@ pub mod token { pub enum Type { Comment, Identifier, + Integer, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Token { @@ -113,6 +114,16 @@ pub mod lexer { .end()?, ) } + pub fn integer(&mut self) -> Option { + self.skip_whitespace(); + self.produce_token( + Type::Integer, + Rule::new(self.text()) + .and_maybe(|rule| rule.char('0').char_fn(|c| "xdob".contains(c))) + .and_many(|this| this.char_fn(|c| c.is_ascii_hexdigit())) + .end()?, + ) + } } #[derive(Clone, Debug, PartialEq, Eq)] @@ -305,6 +316,33 @@ mod tests { assert_whole_input_is_token("123456789", Lexer::identifier, Type::Identifier); } } + mod integer { + use super::*; + #[test] + fn bare() { + assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer); + assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer); + assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer); + assert_whole_input_is_token("123456789ABCDEF0", Lexer::integer, Type::Integer); + } + #[test] + fn base16() { + assert_has_type_and_len("0x1234", Lexer::integer, Type::Integer, 6); + assert_has_type_and_len("0x1234 \"hello\"", Lexer::integer, Type::Integer, 6); + } + #[test] + fn base10() { + assert_whole_input_is_token("0d1234", Lexer::integer, Type::Integer); + } + #[test] + fn base8() { + assert_whole_input_is_token("0o1234", Lexer::integer, Type::Integer); + } + #[test] + fn base2() { + assert_whole_input_is_token("0b1010", Lexer::integer, Type::Integer); + } + } } mod parser { // TODO