lexer: Tokenize string literals (resolves #5)

This commit is contained in:
John 2023-09-25 14:26:56 -05:00
parent 6f6d8ec916
commit e49a3e9fec

View File

@ -10,6 +10,7 @@ pub mod token {
Comment,
Identifier,
Integer,
String,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Token {
@ -124,6 +125,17 @@ pub mod lexer {
.end()?,
)
}
pub fn string(&mut self) -> Option<Token> {
self.skip_whitespace();
self.produce_token(
Type::String,
Rule::new(self.text())
.char('"')
.and_any(|rule| rule.str(r#"\""#).or(|rule| rule.not_char('"')))
.char('"')
.end()?,
)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
@ -343,6 +355,25 @@ mod tests {
assert_whole_input_is_token("0b1010", Lexer::integer, Type::Integer);
}
}
mod string {
use super::*;
#[test]
fn empty_string() {
assert_whole_input_is_token("\"\"", Lexer::string, Type::String);
}
#[test]
fn unicode_string() {
assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::string, Type::String);
}
#[test]
fn escape_string() {
assert_whole_input_is_token(
r#"" \"This is a quote\" ""#,
Lexer::string,
Type::String,
);
}
}
}
mod parser {
// TODO