From e49a3e9fecfc62c4aa8906f0a0fd5392a9c3ee9f Mon Sep 17 00:00:00 2001 From: John Date: Mon, 25 Sep 2023 14:26:56 -0500 Subject: [PATCH] lexer: Tokenize string literals (resolves #5) --- libconlang/src/lib.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/libconlang/src/lib.rs b/libconlang/src/lib.rs index 7702213..8e89659 100644 --- a/libconlang/src/lib.rs +++ b/libconlang/src/lib.rs @@ -10,6 +10,7 @@ pub mod token { Comment, Identifier, Integer, + String, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Token { @@ -124,6 +125,17 @@ pub mod lexer { .end()?, ) } + pub fn string(&mut self) -> Option { + self.skip_whitespace(); + self.produce_token( + Type::String, + Rule::new(self.text()) + .char('"') + .and_any(|rule| rule.str(r#"\""#).or(|rule| rule.not_char('"'))) + .char('"') + .end()?, + ) + } } #[derive(Clone, Debug, PartialEq, Eq)] @@ -343,6 +355,25 @@ mod tests { assert_whole_input_is_token("0b1010", Lexer::integer, Type::Integer); } } + mod string { + use super::*; + #[test] + fn empty_string() { + assert_whole_input_is_token("\"\"", Lexer::string, Type::String); + } + #[test] + fn unicode_string() { + assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::string, Type::String); + } + #[test] + fn escape_string() { + assert_whole_input_is_token( + r#"" \"This is a quote\" ""#, + Lexer::string, + Type::String, + ); + } + } } mod parser { // TODO