conlang: Rename literals; split, compose, and document Rules
- Renamed literal Types to reflect their literal nature - This allows for consistent naming across future non-literal Types - Complicated lexer Rules have been split into composable sub-rules, and moved into the Rule struct. - This improves modularity, and allows sharing of sub-rules across rules. - Documented each lexer rule with (at least) a one-line blurb describing its function
This commit is contained in:
parent
46e72e4889
commit
097e2c4f11
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
**/Cargo.lock
|
**/Cargo.lock
|
||||||
target
|
target
|
||||||
|
*.pest
|
5
dummy.cl
5
dummy.cl
@ -2,3 +2,8 @@
|
|||||||
// This is an example Conlang file.
|
// This is an example Conlang file.
|
||||||
|
|
||||||
/* Conlang supports block comments! */
|
/* Conlang supports block comments! */
|
||||||
|
ident // Identifier
|
||||||
|
.1 // literal float
|
||||||
|
0.1 // literal float
|
||||||
|
0x1234 // literal integer
|
||||||
|
"str" // literal string
|
||||||
|
@ -1,18 +1,16 @@
|
|||||||
//! This example grabs input from stdin, lexes it, and prints which lexer rules matched
|
//! This example grabs input from stdin, lexes it, and prints which lexer rules matched
|
||||||
#![allow(unused_imports)]
|
#![allow(unused_imports)]
|
||||||
use conlang::lexer::Lexer;
|
use conlang::lexer::Lexer;
|
||||||
use std::{io::stdin, error::Error};
|
use std::{error::Error, io::stdin};
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn Error>>{
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
// get input from stdin
|
// get input from stdin
|
||||||
for line in stdin().lines() {
|
for line in stdin().lines() {
|
||||||
let line = line?;
|
let line = line?;
|
||||||
// lex the line
|
let mut lexer = Lexer::new(&line);
|
||||||
for func in [Lexer::line_comment, Lexer::block_comment, Lexer::shebang_comment, Lexer::identifier, Lexer::integer, Lexer::float, Lexer::string] {
|
while let Some(token) = lexer.any() {
|
||||||
if let Some(token) = func(&mut Lexer::new(&line)) {
|
|
||||||
println!("{:?}: {}", token, &line[token.range()])
|
println!("{:?}: {}", token, &line[token.range()])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
@ -9,10 +9,12 @@ pub mod token {
|
|||||||
pub enum Type {
|
pub enum Type {
|
||||||
Comment,
|
Comment,
|
||||||
Identifier,
|
Identifier,
|
||||||
Integer,
|
// Literals
|
||||||
Float,
|
LitInteger,
|
||||||
String,
|
LitFloat,
|
||||||
|
LitString,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
ty: Type,
|
ty: Type,
|
||||||
@ -59,104 +61,60 @@ pub mod lexer {
|
|||||||
pub fn new(text: &'t str) -> Self {
|
pub fn new(text: &'t str) -> Self {
|
||||||
Self { text, cursor: 0 }
|
Self { text, cursor: 0 }
|
||||||
}
|
}
|
||||||
|
/// Skips whitespace in the text
|
||||||
fn skip_whitespace(&mut self) {
|
fn skip_whitespace(&mut self) {
|
||||||
if let Some(len) = Rule::new(self.text()).and_any(Rule::whitespace).end() {
|
if let Some(len) = Rule::new(self.text()).and_any(Rule::whitespace).end() {
|
||||||
self.cursor += len
|
self.cursor += len
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/// Advances the cursor and produces a token
|
||||||
fn produce_token(&mut self, ty: Type, len: usize) -> Option<Token> {
|
fn produce_token(&mut self, ty: Type, len: usize) -> Option<Token> {
|
||||||
let start = self.cursor;
|
let start = self.cursor;
|
||||||
self.cursor += len;
|
self.cursor += len;
|
||||||
Some(Token::new(ty, start, self.cursor))
|
Some(Token::new(ty, start, self.cursor))
|
||||||
}
|
}
|
||||||
|
/// Gets a slice of text beginning at the cursor
|
||||||
fn text(&self) -> &str {
|
fn text(&self) -> &str {
|
||||||
&self.text[self.cursor..]
|
&self.text[self.cursor..]
|
||||||
}
|
}
|
||||||
|
// classifies a single arbitrary token
|
||||||
|
pub fn any(&mut self) -> Option<Token> {
|
||||||
|
None.or_else(|| self.comment())
|
||||||
|
.or_else(|| self.identifier())
|
||||||
|
.or_else(|| self.literal())
|
||||||
|
}
|
||||||
|
pub fn literal(&mut self) -> Option<Token> {
|
||||||
|
None.or_else(|| self.lit_string())
|
||||||
|
.or_else(|| self.lit_float())
|
||||||
|
.or_else(|| self.lit_integer())
|
||||||
|
}
|
||||||
// functions for lexing individual tokens
|
// functions for lexing individual tokens
|
||||||
pub fn line_comment(&mut self) -> Option<Token> {
|
// comments
|
||||||
// line_comment := "//" ~ (^newline)*
|
pub fn comment(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.skip_whitespace();
|
||||||
self.produce_token(
|
self.produce_token(Type::Comment, Rule::new(self.text()).comment().end()?)
|
||||||
Type::Comment,
|
|
||||||
Rule::new(self.text())
|
|
||||||
.str("//")
|
|
||||||
.and_any(|rule| rule.not_char('\n'))
|
|
||||||
.end()?,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
pub fn block_comment(&mut self) -> Option<Token> {
|
|
||||||
// block_comment := "/*" ~ (block_comment | all_but("*/"))* ~ "*/"
|
|
||||||
self.skip_whitespace();
|
|
||||||
self.produce_token(
|
|
||||||
Type::Comment,
|
|
||||||
Rule::new(self.text())
|
|
||||||
.str("/*")
|
|
||||||
.and_any(|rule| rule.not_str("*/"))
|
|
||||||
.str("*/")
|
|
||||||
.end()?,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
pub fn shebang_comment(&mut self) -> Option<Token> {
|
|
||||||
// shebang_comment := "#!/" ~ (^newline)*
|
|
||||||
self.skip_whitespace();
|
|
||||||
self.produce_token(
|
|
||||||
Type::Comment,
|
|
||||||
Rule::new(self.text())
|
|
||||||
.str("#!/")
|
|
||||||
.and_any(|rule| rule.not_char('\n'))
|
|
||||||
.end()?,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
// identifiers
|
||||||
pub fn identifier(&mut self) -> Option<Token> {
|
pub fn identifier(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.skip_whitespace();
|
||||||
self.produce_token(
|
self.produce_token(Type::Identifier, Rule::new(self.text()).identifier().end()?)
|
||||||
Type::Identifier,
|
|
||||||
Rule::new(self.text())
|
|
||||||
.char('_')
|
|
||||||
.or(Rule::xid_start)
|
|
||||||
.and_any(Rule::xid_continue)
|
|
||||||
.end()?,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
pub fn integer(&mut self) -> Option<Token> {
|
// literals
|
||||||
|
pub fn lit_integer(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.skip_whitespace();
|
||||||
self.produce_token(
|
self.produce_token(Type::LitInteger, Rule::new(self.text()).integer().end()?)
|
||||||
Type::Integer,
|
|
||||||
Rule::new(self.text())
|
|
||||||
.and_one_of(&[
|
|
||||||
&|rule| rule.str("0x").and_any(Rule::hex_digit),
|
|
||||||
&|rule| rule.str("0d").and_any(Rule::dec_digit),
|
|
||||||
&|rule| rule.str("0o").and_any(Rule::oct_digit),
|
|
||||||
&|rule| rule.str("0b").and_any(Rule::bin_digit),
|
|
||||||
&|rule| rule.and_many(Rule::dec_digit),
|
|
||||||
])
|
|
||||||
.end()?,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
pub fn float(&mut self) -> Option<Token> {
|
pub fn lit_float(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.skip_whitespace();
|
||||||
self.produce_token(
|
self.produce_token(Type::LitFloat, Rule::new(self.text()).float().end()?)
|
||||||
Type::Float,
|
|
||||||
Rule::new(self.text())
|
|
||||||
.and_any(Rule::dec_digit)
|
|
||||||
.char('.')
|
|
||||||
.and_many(Rule::dec_digit)
|
|
||||||
.end()?,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
pub fn string(&mut self) -> Option<Token> {
|
pub fn lit_string(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.skip_whitespace();
|
||||||
self.produce_token(
|
self.produce_token(Type::LitString, Rule::new(self.text()).string().end()?)
|
||||||
Type::String,
|
|
||||||
Rule::new(self.text())
|
|
||||||
.char('"')
|
|
||||||
.and_any(|rule| rule.and(Rule::string_escape).or(|rule| rule.not_char('"')))
|
|
||||||
.char('"')
|
|
||||||
.end()?,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A lexer [Rule] matches patterns in text in a declarative manner
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct Rule<'t> {
|
pub struct Rule<'t> {
|
||||||
text: &'t str,
|
text: &'t str,
|
||||||
@ -176,53 +134,135 @@ pub mod lexer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> Rule<'t> {
|
impl<'t> Rule<'t> {
|
||||||
|
/// Matches a block, line, or shebang comment
|
||||||
|
pub fn comment(self) -> Self {
|
||||||
|
self.and_either(Self::line_comment, Self::block_comment)
|
||||||
|
}
|
||||||
|
/// Matches a line or shebang comment
|
||||||
|
fn line_comment(self) -> Self {
|
||||||
|
// line_comment := ("//" | "#!/") (!newline)*
|
||||||
|
self.str("//")
|
||||||
|
.or(|r| r.str("#!/"))
|
||||||
|
.and_any(|r| r.not_char('\n'))
|
||||||
|
}
|
||||||
|
/// Matches a block comment
|
||||||
|
fn block_comment(self) -> Self {
|
||||||
|
// block_comment := "/*" (block_comment | all_but("*/"))* "*/"
|
||||||
|
self.str("/*")
|
||||||
|
.and_any(|r| r.and_either(|f| f.block_comment(), |g| g.not_str("*/")))
|
||||||
|
.str("*/")
|
||||||
|
}
|
||||||
|
/// Matches a Rust-style identifier
|
||||||
|
pub fn identifier(self) -> Self {
|
||||||
|
// identifier := ('_' | XID_START) ~ XID_CONTINUE*
|
||||||
|
self.char('_')
|
||||||
|
.or(Rule::xid_start)
|
||||||
|
.and_any(Rule::xid_continue)
|
||||||
|
}
|
||||||
|
/// Matches a Rust-style base-prefixed int literal
|
||||||
|
fn int_literal_kind(self, prefix: &str, digit: impl Fn(Self) -> Self) -> Self {
|
||||||
|
// int_kind<Prefix, Digit> := Prefix '_'* Digit (Digit | '_')*
|
||||||
|
self.str(prefix)
|
||||||
|
.and_any(|r| r.char('_'))
|
||||||
|
.and(&digit)
|
||||||
|
.and_any(|r| r.and(&digit).or(|r| r.char('_')))
|
||||||
|
}
|
||||||
|
/// Matches a Rust-style integer literal
|
||||||
|
pub fn integer(self) -> Self {
|
||||||
|
// integer = (int_kind<0d, dec_digit> | int_kind<0x, hex_digit>
|
||||||
|
// | int_kind<0o, oct_digit> | int_kind<0b, bin_digit> | dec_digit (dec_digit | '_')*)
|
||||||
|
self.and_one_of(&[
|
||||||
|
&|rule| rule.int_literal_kind("0d", Rule::dec_digit),
|
||||||
|
&|rule| rule.int_literal_kind("0x", Rule::hex_digit),
|
||||||
|
&|rule| rule.int_literal_kind("0o", Rule::oct_digit),
|
||||||
|
&|rule| rule.int_literal_kind("0b", Rule::bin_digit),
|
||||||
|
&|rule| {
|
||||||
|
rule.dec_digit()
|
||||||
|
.and_any(|r| r.dec_digit().or(|r| r.char('_')))
|
||||||
|
},
|
||||||
|
])
|
||||||
|
}
|
||||||
|
/// Matches a float literal
|
||||||
|
// TODO: exponent form
|
||||||
|
pub fn float(self) -> Self {
|
||||||
|
self.and_any(Rule::dec_digit)
|
||||||
|
.char('.')
|
||||||
|
.and_many(Rule::dec_digit)
|
||||||
|
}
|
||||||
|
/// Matches one quote-delimited string literal
|
||||||
|
pub fn string(self) -> Self {
|
||||||
|
self.char('"').and_any(Rule::string_continue).char('"')
|
||||||
|
}
|
||||||
|
/// Matches one string escape sequence or non-`"` characcter
|
||||||
|
pub fn string_continue(self) -> Self {
|
||||||
|
self.and(Rule::string_escape).or(|rule| rule.not_char('"'))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> Rule<'t> {
|
||||||
|
/// Matches a char lexicographically between start and end
|
||||||
pub fn char_between(self, start: char, end: char) -> Self {
|
pub fn char_between(self, start: char, end: char) -> Self {
|
||||||
self.char_fn(|c| start <= c && c <= end)
|
self.char_fn(|c| start <= c && c <= end)
|
||||||
}
|
}
|
||||||
|
/// Matches a single char
|
||||||
pub fn char(self, c: char) -> Self {
|
pub fn char(self, c: char) -> Self {
|
||||||
self.has(|rule| rule.text.starts_with(c), 1)
|
self.has(|rule| rule.text.starts_with(c), 1)
|
||||||
}
|
}
|
||||||
|
/// Matches the entirety of a string slice
|
||||||
pub fn str(self, s: &str) -> Self {
|
pub fn str(self, s: &str) -> Self {
|
||||||
self.has(|rule| rule.text.starts_with(s), s.len())
|
self.has(|rule| rule.text.starts_with(s), s.len())
|
||||||
}
|
}
|
||||||
|
/// Matches a char based on the output of a function
|
||||||
pub fn char_fn(self, f: impl Fn(char) -> bool) -> Self {
|
pub fn char_fn(self, f: impl Fn(char) -> bool) -> Self {
|
||||||
self.and(|rule| match rule.text.strip_prefix(&f) {
|
self.and(|rule| match rule.text.strip_prefix(&f) {
|
||||||
Some(text) => Self { text, taken: rule.taken + next_utf8(rule.text, 1), ..rule },
|
Some(text) => Self { text, taken: rule.taken + next_utf8(rule.text, 1), ..rule },
|
||||||
None => Self { is_alright: false, ..rule },
|
None => Self { is_alright: false, ..rule },
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
/// Matches a single char except c
|
||||||
pub fn not_char(self, c: char) -> Self {
|
pub fn not_char(self, c: char) -> Self {
|
||||||
self.has(|rule| !rule.text.starts_with(c), 1)
|
self.has(|rule| !rule.text.starts_with(c), 1)
|
||||||
}
|
}
|
||||||
|
/// Matches a single char unless the text starts with s
|
||||||
pub fn not_str(self, s: &str) -> Self {
|
pub fn not_str(self, s: &str) -> Self {
|
||||||
self.has(|rule| !rule.text.starts_with(s), 1)
|
self.has(|rule| !rule.text.starts_with(s), 1)
|
||||||
}
|
}
|
||||||
|
// commonly used character classes
|
||||||
|
/// Matches one of any character
|
||||||
pub fn any(self) -> Self {
|
pub fn any(self) -> Self {
|
||||||
self.has(|_| true, 1)
|
self.has(|_| true, 1)
|
||||||
}
|
}
|
||||||
|
/// Matches one whitespace
|
||||||
pub fn whitespace(self) -> Self {
|
pub fn whitespace(self) -> Self {
|
||||||
self.char_fn(|c| c.is_whitespace())
|
self.char_fn(|c| c.is_whitespace())
|
||||||
}
|
}
|
||||||
|
/// Matches one XID_START
|
||||||
pub fn xid_start(self) -> Self {
|
pub fn xid_start(self) -> Self {
|
||||||
use unicode_xid::UnicodeXID;
|
use unicode_xid::UnicodeXID;
|
||||||
self.char_fn(UnicodeXID::is_xid_start)
|
self.char_fn(UnicodeXID::is_xid_start)
|
||||||
}
|
}
|
||||||
|
/// Matches one XID_CONTINUE
|
||||||
pub fn xid_continue(self) -> Self {
|
pub fn xid_continue(self) -> Self {
|
||||||
use unicode_xid::UnicodeXID;
|
use unicode_xid::UnicodeXID;
|
||||||
self.char_fn(UnicodeXID::is_xid_continue)
|
self.char_fn(UnicodeXID::is_xid_continue)
|
||||||
}
|
}
|
||||||
|
/// Matches one hexadecimal digit
|
||||||
pub fn hex_digit(self) -> Self {
|
pub fn hex_digit(self) -> Self {
|
||||||
self.char_fn(|c| c.is_ascii_hexdigit())
|
self.char_fn(|c| c.is_ascii_hexdigit())
|
||||||
}
|
}
|
||||||
|
/// Matches one decimal digit
|
||||||
pub fn dec_digit(self) -> Self {
|
pub fn dec_digit(self) -> Self {
|
||||||
self.char_fn(|c| c.is_ascii_digit())
|
self.char_fn(|c| c.is_ascii_digit())
|
||||||
}
|
}
|
||||||
|
/// Matches one octal digit
|
||||||
pub fn oct_digit(self) -> Self {
|
pub fn oct_digit(self) -> Self {
|
||||||
self.char_between('0', '7')
|
self.char_between('0', '7')
|
||||||
}
|
}
|
||||||
|
/// Matches one binary digit
|
||||||
pub fn bin_digit(self) -> Self {
|
pub fn bin_digit(self) -> Self {
|
||||||
self.char_between('0', '1')
|
self.char_between('0', '1')
|
||||||
}
|
}
|
||||||
|
/// Matches any string escape "\."
|
||||||
pub fn string_escape(self) -> Self {
|
pub fn string_escape(self) -> Self {
|
||||||
self.char('\\').and(Rule::any)
|
self.char('\\').and(Rule::any)
|
||||||
}
|
}
|
||||||
@ -281,6 +321,8 @@ mod tests {
|
|||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
mod lexer {
|
mod lexer {
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
lexer::*,
|
lexer::*,
|
||||||
token::{Token, Type},
|
token::{Token, Type},
|
||||||
@ -288,11 +330,13 @@ mod tests {
|
|||||||
|
|
||||||
fn assert_whole_input_is_token<'t, F>(input: &'t str, f: F, ty: Type)
|
fn assert_whole_input_is_token<'t, F>(input: &'t str, f: F, ty: Type)
|
||||||
where F: FnOnce(&mut Lexer<'t>) -> Option<Token> {
|
where F: FnOnce(&mut Lexer<'t>) -> Option<Token> {
|
||||||
assert_has_type_and_len(input, f, ty, input.len())
|
assert_has_type_and_range(input, f, ty, 0..input.len())
|
||||||
}
|
}
|
||||||
fn assert_has_type_and_len<'t, F>(input: &'t str, f: F, ty: Type, len: usize)
|
fn assert_has_type_and_range<'t, F>(input: &'t str, f: F, ty: Type, range: Range<usize>)
|
||||||
where F: FnOnce(&mut Lexer<'t>) -> Option<Token> {
|
where F: FnOnce(&mut Lexer<'t>) -> Option<Token> {
|
||||||
assert_eq!(Some(Token::new(ty, 0, len)), f(&mut Lexer::new(input)),)
|
let tok = f(&mut Lexer::new(input)).unwrap();
|
||||||
|
assert_eq!(ty, tok.ty());
|
||||||
|
assert_eq!(range, tok.range());
|
||||||
}
|
}
|
||||||
|
|
||||||
mod comment {
|
mod comment {
|
||||||
@ -300,42 +344,47 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn line_comment() {
|
fn line_comment() {
|
||||||
|
assert_whole_input_is_token("// comment!", Lexer::comment, Type::Comment);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
#[should_panic]
|
||||||
|
fn not_line_comment() {
|
||||||
|
assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn block_comment() {
|
||||||
|
assert_whole_input_is_token("/* comment! */", Lexer::comment, Type::Comment);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn nested_block_comment() {
|
||||||
assert_whole_input_is_token(
|
assert_whole_input_is_token(
|
||||||
"// this is a comment",
|
"/* a /* nested */ comment */",
|
||||||
Lexer::line_comment,
|
Lexer::comment,
|
||||||
Type::Comment,
|
Type::Comment,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
#[should_panic]
|
#[should_panic]
|
||||||
fn not_line_comment() {
|
fn unclosed_nested_comment() {
|
||||||
assert_whole_input_is_token("fn main() {}", Lexer::line_comment, Type::Comment);
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn block_comment() {
|
|
||||||
assert_whole_input_is_token(
|
assert_whole_input_is_token(
|
||||||
"/* this is a comment */",
|
"/* improperly /* nested */ comment",
|
||||||
Lexer::block_comment,
|
Lexer::comment,
|
||||||
Type::Comment,
|
Type::Comment,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
#[should_panic]
|
#[should_panic]
|
||||||
fn not_block_comment() {
|
fn not_block_comment() {
|
||||||
assert_whole_input_is_token("fn main() {}", Lexer::block_comment, Type::Comment);
|
assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn shebang_comment() {
|
fn shebang_comment() {
|
||||||
assert_whole_input_is_token(
|
assert_whole_input_is_token("#!/ comment!", Lexer::comment, Type::Comment);
|
||||||
"#!/ this is a comment",
|
|
||||||
Lexer::shebang_comment,
|
|
||||||
Type::Comment,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
#[should_panic]
|
#[should_panic]
|
||||||
fn not_shebang_comment() {
|
fn not_shebang_comment() {
|
||||||
assert_whole_input_is_token("fn main() {}", Lexer::shebang_comment, Type::Comment);
|
assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mod identifier {
|
mod identifier {
|
||||||
@ -366,65 +415,70 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
fn bare() {
|
fn bare() {
|
||||||
assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer);
|
assert_whole_input_is_token("10010110", Lexer::lit_integer, Type::LitInteger);
|
||||||
assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer);
|
assert_whole_input_is_token("12345670", Lexer::lit_integer, Type::LitInteger);
|
||||||
assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer);
|
assert_whole_input_is_token("1234567890", Lexer::lit_integer, Type::LitInteger);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn base16() {
|
fn base16() {
|
||||||
assert_has_type_and_len("0x1234", Lexer::integer, Type::Integer, 6);
|
assert_has_type_and_range("0x1234", Lexer::lit_integer, Type::LitInteger, 0..6);
|
||||||
assert_has_type_and_len("0x1234 \"hello\"", Lexer::integer, Type::Integer, 6);
|
assert_has_type_and_range(
|
||||||
|
"0x1234 \"hello\"",
|
||||||
|
Lexer::lit_integer,
|
||||||
|
Type::LitInteger,
|
||||||
|
0..6,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn base10() {
|
fn base10() {
|
||||||
assert_whole_input_is_token("0d1234", Lexer::integer, Type::Integer);
|
assert_whole_input_is_token("0d1234", Lexer::lit_integer, Type::LitInteger);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn base8() {
|
fn base8() {
|
||||||
assert_whole_input_is_token("0o1234", Lexer::integer, Type::Integer);
|
assert_whole_input_is_token("0o1234", Lexer::lit_integer, Type::LitInteger);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn base2() {
|
fn base2() {
|
||||||
assert_whole_input_is_token("0b1010", Lexer::integer, Type::Integer);
|
assert_whole_input_is_token("0b1010", Lexer::lit_integer, Type::LitInteger);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mod float {
|
mod float {
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
fn number_dot_number_is_float() {
|
fn number_dot_number_is_float() {
|
||||||
assert_whole_input_is_token("1.0", Lexer::float, Type::Float);
|
assert_whole_input_is_token("1.0", Lexer::lit_float, Type::LitFloat);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn nothing_dot_number_is_float() {
|
fn nothing_dot_number_is_float() {
|
||||||
assert_whole_input_is_token(".0", Lexer::float, Type::Float);
|
assert_whole_input_is_token(".0", Lexer::lit_float, Type::LitFloat);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
#[should_panic]
|
#[should_panic]
|
||||||
fn number_dot_nothing_is_not_float() {
|
fn number_dot_nothing_is_not_float() {
|
||||||
assert_whole_input_is_token("1.", Lexer::float, Type::Float);
|
assert_whole_input_is_token("1.", Lexer::lit_float, Type::LitFloat);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
#[should_panic]
|
#[should_panic]
|
||||||
fn nothing_dot_nothing_is_not_float() {
|
fn nothing_dot_nothing_is_not_float() {
|
||||||
assert_whole_input_is_token(".", Lexer::float, Type::Float);
|
assert_whole_input_is_token(".", Lexer::lit_float, Type::LitFloat);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mod string {
|
mod string {
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_string() {
|
fn empty_string() {
|
||||||
assert_whole_input_is_token("\"\"", Lexer::string, Type::String);
|
assert_whole_input_is_token("\"\"", Lexer::lit_string, Type::LitString);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn unicode_string() {
|
fn unicode_string() {
|
||||||
assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::string, Type::String);
|
assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::lit_string, Type::LitString);
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn escape_string() {
|
fn escape_string() {
|
||||||
assert_whole_input_is_token(
|
assert_whole_input_is_token(
|
||||||
r#"" \"This is a quote\" ""#,
|
r#"" \"This is a quote\" ""#,
|
||||||
Lexer::string,
|
Lexer::lit_string,
|
||||||
Type::String,
|
Type::LitString,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user