token: Rename literal tokens & assoc. functions

This commit is contained in:
John 2023-10-16 22:14:08 -05:00
parent c15490847a
commit 9c993b31a1

View File

@ -23,9 +23,10 @@ pub mod token {
KwTrue, KwTrue,
KwWhile, KwWhile,
// Literals // Literals
LitInteger, Integer,
LitFloat, Float,
LitString, String,
Character,
// Delimiters // Delimiters
LCurly, LCurly,
RCurly, RCurly,
@ -200,9 +201,10 @@ pub mod lexer {
} }
/// Attempts to produce a [Type::LitString], [Type::LitFloat], or [Type::LitInteger] /// Attempts to produce a [Type::LitString], [Type::LitFloat], or [Type::LitInteger]
pub fn literal(&mut self) -> Option<Token> { pub fn literal(&mut self) -> Option<Token> {
None.or_else(|| self.lit_string()) None.or_else(|| self.string())
.or_else(|| self.lit_float()) .or_else(|| self.character())
.or_else(|| self.lit_integer()) .or_else(|| self.float())
.or_else(|| self.integer())
} }
/// Evaluates delimiter rules /// Evaluates delimiter rules
pub fn delimiter(&mut self) -> Option<Token> { pub fn delimiter(&mut self) -> Option<Token> {
@ -304,14 +306,17 @@ pub mod lexer {
self.map_rule(|r| r.identifier(), Type::Identifier) self.map_rule(|r| r.identifier(), Type::Identifier)
} }
// literals // literals
pub fn lit_integer(&mut self) -> Option<Token> { pub fn integer(&mut self) -> Option<Token> {
self.map_rule(|r| r.integer(), Type::LitInteger) self.map_rule(|r| r.integer(), Type::Integer)
} }
pub fn lit_float(&mut self) -> Option<Token> { pub fn float(&mut self) -> Option<Token> {
self.map_rule(|r| r.float(), Type::LitFloat) self.map_rule(|r| r.float(), Type::Float)
} }
pub fn lit_string(&mut self) -> Option<Token> { pub fn string(&mut self) -> Option<Token> {
self.map_rule(|r| r.string(), Type::LitString) self.map_rule(|r| r.string(), Type::String)
}
pub fn character(&mut self) -> Option<Token> {
self.map_rule(|r| r.character(), Type::Character)
} }
// delimiters // delimiters
pub fn l_brack(&mut self) -> Option<Token> { pub fn l_brack(&mut self) -> Option<Token> {
@ -509,7 +514,7 @@ pub mod lexer {
.and_any(Rule::xid_continue) .and_any(Rule::xid_continue)
} }
/// Matches a Rust-style base-prefixed int literal /// Matches a Rust-style base-prefixed int literal
fn int_literal_kind(self, prefix: &str, digit: impl Fn(Self) -> Self) -> Self { fn integer_kind(self, prefix: &str, digit: impl Fn(Self) -> Self) -> Self {
// int_kind<Prefix, Digit> := Prefix '_'* Digit (Digit | '_')* // int_kind<Prefix, Digit> := Prefix '_'* Digit (Digit | '_')*
self.str(prefix) self.str(prefix)
.and_any(|r| r.char('_')) .and_any(|r| r.char('_'))
@ -521,10 +526,10 @@ pub mod lexer {
// integer = (int_kind<0d, dec_digit> | int_kind<0x, hex_digit> // integer = (int_kind<0d, dec_digit> | int_kind<0x, hex_digit>
// | int_kind<0o, oct_digit> | int_kind<0b, bin_digit> | dec_digit (dec_digit | '_')*) // | int_kind<0o, oct_digit> | int_kind<0b, bin_digit> | dec_digit (dec_digit | '_')*)
self.and_one_of(&[ self.and_one_of(&[
&|rule| rule.int_literal_kind("0d", Rule::dec_digit), &|rule| rule.integer_kind("0d", Rule::dec_digit),
&|rule| rule.int_literal_kind("0x", Rule::hex_digit), &|rule| rule.integer_kind("0x", Rule::hex_digit),
&|rule| rule.int_literal_kind("0o", Rule::oct_digit), &|rule| rule.integer_kind("0o", Rule::oct_digit),
&|rule| rule.int_literal_kind("0b", Rule::bin_digit), &|rule| rule.integer_kind("0b", Rule::bin_digit),
&|rule| { &|rule| {
rule.dec_digit() rule.dec_digit()
.and_any(|r| r.dec_digit().or(|r| r.char('_'))) .and_any(|r| r.dec_digit().or(|r| r.char('_')))
@ -538,6 +543,13 @@ pub mod lexer {
.char('.') .char('.')
.and_many(Rule::dec_digit) .and_many(Rule::dec_digit)
} }
/// Matches one apostrophe-delimited char literal
pub fn character(self) -> Self {
self.char('\'').character_continue().char('\'')
}
pub fn character_continue(self) -> Self {
self.and(|rule| rule.string_escape().or(|rule| rule.not_char('\'')))
}
/// Matches one quote-delimited string literal /// Matches one quote-delimited string literal
pub fn string(self) -> Self { pub fn string(self) -> Self {
self.char('"').and_any(Rule::string_continue).char('"') self.char('"').and_any(Rule::string_continue).char('"')
@ -821,81 +833,104 @@ mod tests {
use super::*; use super::*;
#[test] #[test]
fn literal_class() { fn literal_class() {
assert_whole_input_is_token("1_00000", Lexer::literal, Type::LitInteger); assert_whole_input_is_token("1_00000", Lexer::literal, Type::Integer);
assert_whole_input_is_token("1.00000", Lexer::literal, Type::LitFloat); assert_whole_input_is_token("1.00000", Lexer::literal, Type::Float);
assert_whole_input_is_token("\"1.0\"", Lexer::literal, Type::LitString); assert_whole_input_is_token("\"1.0\"", Lexer::literal, Type::String);
assert_whole_input_is_token("'\"'", Lexer::literal, Type::Character);
} }
mod integer { mod integer {
use super::*; use super::*;
#[test] #[test]
fn bare() { fn bare() {
assert_whole_input_is_token("10010110", Lexer::lit_integer, Type::LitInteger); assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer);
assert_whole_input_is_token("12345670", Lexer::lit_integer, Type::LitInteger); assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer);
assert_whole_input_is_token("1234567890", Lexer::lit_integer, Type::LitInteger); assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer);
} }
#[test] #[test]
fn base16() { fn base16() {
assert_has_type_and_range("0x1234", Lexer::lit_integer, Type::LitInteger, 0..6); assert_has_type_and_range("0x1234", Lexer::integer, Type::Integer, 0..6);
assert_has_type_and_range( assert_has_type_and_range(
"0x1234 \"hello\"", "0x1234 \"hello\"",
Lexer::lit_integer, Lexer::integer,
Type::LitInteger, Type::Integer,
0..6, 0..6,
); );
} }
#[test] #[test]
fn base10() { fn base10() {
assert_whole_input_is_token("0d1234", Lexer::lit_integer, Type::LitInteger); assert_whole_input_is_token("0d1234", Lexer::integer, Type::Integer);
} }
#[test] #[test]
fn base8() { fn base8() {
assert_whole_input_is_token("0o1234", Lexer::lit_integer, Type::LitInteger); assert_whole_input_is_token("0o1234", Lexer::integer, Type::Integer);
} }
#[test] #[test]
fn base2() { fn base2() {
assert_whole_input_is_token("0b1010", Lexer::lit_integer, Type::LitInteger); assert_whole_input_is_token("0b1010", Lexer::integer, Type::Integer);
} }
} }
mod float { mod float {
use super::*; use super::*;
#[test] #[test]
fn number_dot_number_is_float() { fn number_dot_number_is_float() {
assert_whole_input_is_token("1.0", Lexer::lit_float, Type::LitFloat); assert_whole_input_is_token("1.0", Lexer::float, Type::Float);
} }
#[test] #[test]
fn nothing_dot_number_is_float() { fn nothing_dot_number_is_float() {
assert_whole_input_is_token(".0", Lexer::lit_float, Type::LitFloat); assert_whole_input_is_token(".0", Lexer::float, Type::Float);
} }
#[test] #[test]
#[should_panic] #[should_panic]
fn number_dot_nothing_is_not_float() { fn number_dot_nothing_is_not_float() {
assert_whole_input_is_token("1.", Lexer::lit_float, Type::LitFloat); assert_whole_input_is_token("1.", Lexer::float, Type::Float);
} }
#[test] #[test]
#[should_panic] #[should_panic]
fn nothing_dot_nothing_is_not_float() { fn nothing_dot_nothing_is_not_float() {
assert_whole_input_is_token(".", Lexer::lit_float, Type::LitFloat); assert_whole_input_is_token(".", Lexer::float, Type::Float);
} }
} }
mod string { mod string {
use super::*; use super::*;
#[test] #[test]
fn empty_string() { fn empty_string() {
assert_whole_input_is_token("\"\"", Lexer::lit_string, Type::LitString); assert_whole_input_is_token("\"\"", Lexer::string, Type::String);
} }
#[test] #[test]
fn unicode_string() { fn unicode_string() {
assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::lit_string, Type::LitString); assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::string, Type::String);
} }
#[test] #[test]
fn escape_string() { fn escape_string() {
assert_whole_input_is_token( assert_whole_input_is_token(
"\" \\\"This is a quote\\\" \"", "\" \\\"This is a quote\\\" \"",
Lexer::lit_string, Lexer::string,
Type::LitString, Type::String,
); );
} }
} }
mod char {
use super::*;
#[test]
fn plain_char() {
assert_whole_input_is_token("'A'", Lexer::character, Type::Character);
assert_whole_input_is_token("'a'", Lexer::character, Type::Character);
assert_whole_input_is_token("'#'", Lexer::character, Type::Character);
}
#[test]
fn unicode_char() {
assert_whole_input_is_token("'ε'", Lexer::character, Type::Character);
}
#[test]
fn escaped_char() {
assert_whole_input_is_token("'\\n'", Lexer::character, Type::Character);
}
#[test]
#[should_panic]
fn no_char() {
assert_whole_input_is_token("''", Lexer::character, Type::Character);
}
}
} }
mod delimiter { mod delimiter {
use super::*; use super::*;