cl-lexer: Move lexer into its own crate
This commit is contained in:
@@ -1,546 +0,0 @@
|
||||
//! Converts a text file into tokens
|
||||
use cl_token::*;
|
||||
use cl_structures::span::Loc;
|
||||
use std::{
|
||||
iter::Peekable,
|
||||
str::{Chars, FromStr},
|
||||
};
|
||||
use unicode_xid::UnicodeXID;
|
||||
|
||||
pub mod lexer_iter {
|
||||
//! Iterator over a [`Lexer`], returning [`LResult<Token>`]s
|
||||
use super::{
|
||||
error::{LResult, Reason},
|
||||
Lexer, Token,
|
||||
};
|
||||
|
||||
/// Iterator over a [`Lexer`], returning [`LResult<Token>`]s
|
||||
pub struct LexerIter<'t> {
|
||||
lexer: Lexer<'t>,
|
||||
}
|
||||
impl<'t> Iterator for LexerIter<'t> {
|
||||
type Item = LResult<Token>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.lexer.scan() {
|
||||
Ok(v) => Some(Ok(v)),
|
||||
Err(e) => {
|
||||
if e.reason == Reason::EndOfFile {
|
||||
None
|
||||
} else {
|
||||
Some(Err(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> IntoIterator for Lexer<'t> {
|
||||
type Item = LResult<Token>;
|
||||
type IntoIter = LexerIter<'t>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
LexerIter { lexer: self }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The Lexer iterates over the characters in a body of text, searching for [Tokens](Token).
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// # use conlang::lexer::Lexer;
|
||||
/// // Read in your code from somewhere
|
||||
/// let some_code = "
|
||||
/// fn main () {
|
||||
/// // TODO: code goes here!
|
||||
/// }
|
||||
/// ";
|
||||
/// // Create a lexer over your code
|
||||
/// let mut lexer = Lexer::new(some_code);
|
||||
/// // Scan for a single token
|
||||
/// let first_token = lexer.scan().unwrap();
|
||||
/// println!("{first_token:?}");
|
||||
/// // Loop over all the rest of the tokens
|
||||
/// for token in lexer {
|
||||
/// # let token: Result<_,()> = Ok(token.unwrap());
|
||||
/// match token {
|
||||
/// Ok(token) => println!("{token:?}"),
|
||||
/// Err(e) => eprintln!("{e:?}"),
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Lexer<'t> {
|
||||
iter: Peekable<Chars<'t>>,
|
||||
start: usize,
|
||||
start_loc: (u32, u32),
|
||||
current: usize,
|
||||
current_loc: (u32, u32),
|
||||
}
|
||||
|
||||
impl<'t> Lexer<'t> {
|
||||
/// Creates a new [Lexer] over a [str]
|
||||
pub fn new(text: &'t str) -> Self {
|
||||
Self {
|
||||
iter: text.chars().peekable(),
|
||||
start: 0,
|
||||
start_loc: (1, 1),
|
||||
current: 0,
|
||||
current_loc: (1, 1),
|
||||
}
|
||||
}
|
||||
/// Scans through the text, searching for the next [Token]
|
||||
pub fn scan(&mut self) -> LResult<Token> {
|
||||
match self.skip_whitespace().peek()? {
|
||||
'{' => self.consume()?.produce(Type::LCurly, ()),
|
||||
'}' => self.consume()?.produce(Type::RCurly, ()),
|
||||
'[' => self.consume()?.produce(Type::LBrack, ()),
|
||||
']' => self.consume()?.produce(Type::RBrack, ()),
|
||||
'(' => self.consume()?.produce(Type::LParen, ()),
|
||||
')' => self.consume()?.produce(Type::RParen, ()),
|
||||
'&' => self.consume()?.amp(),
|
||||
'@' => self.consume()?.produce(Type::At, ()),
|
||||
'\\' => self.consume()?.produce(Type::Backslash, ()),
|
||||
'!' => self.consume()?.bang(),
|
||||
'|' => self.consume()?.bar(),
|
||||
':' => self.consume()?.colon(),
|
||||
',' => self.consume()?.produce(Type::Comma, ()),
|
||||
'.' => self.consume()?.dot(),
|
||||
'=' => self.consume()?.equal(),
|
||||
'`' => self.consume()?.produce(Type::Grave, ()),
|
||||
'>' => self.consume()?.greater(),
|
||||
'#' => self.consume()?.hash(),
|
||||
'<' => self.consume()?.less(),
|
||||
'-' => self.consume()?.minus(),
|
||||
'+' => self.consume()?.plus(),
|
||||
'?' => self.consume()?.produce(Type::Question, ()),
|
||||
'%' => self.consume()?.rem(),
|
||||
';' => self.consume()?.produce(Type::Semi, ()),
|
||||
'/' => self.consume()?.slash(),
|
||||
'*' => self.consume()?.star(),
|
||||
'~' => self.consume()?.produce(Type::Tilde, ()),
|
||||
'^' => self.consume()?.xor(),
|
||||
'0' => self.consume()?.int_with_base(),
|
||||
'1'..='9' => self.digits::<10>(),
|
||||
'"' => self.consume()?.string(),
|
||||
'\'' => self.consume()?.character(),
|
||||
'_' => self.identifier(),
|
||||
i if i.is_xid_start() => self.identifier(),
|
||||
e => {
|
||||
let err = Err(Error::unexpected_char(e, self.line(), self.col()));
|
||||
let _ = self.consume();
|
||||
err
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Returns the current line
|
||||
pub fn line(&self) -> u32 {
|
||||
self.start_loc.0
|
||||
}
|
||||
/// Returns the current column
|
||||
pub fn col(&self) -> u32 {
|
||||
self.start_loc.1
|
||||
}
|
||||
fn next(&mut self) -> LResult<char> {
|
||||
let out = self.peek();
|
||||
self.consume()?;
|
||||
out
|
||||
}
|
||||
fn peek(&mut self) -> LResult<char> {
|
||||
self.iter
|
||||
.peek()
|
||||
.copied()
|
||||
.ok_or(Error::end_of_file(self.line(), self.col()))
|
||||
}
|
||||
fn produce(&mut self, ty: Type, data: impl Into<Data>) -> LResult<Token> {
|
||||
let loc = self.start_loc;
|
||||
self.start_loc = self.current_loc;
|
||||
self.start = self.current;
|
||||
Ok(Token::new(ty, data, loc.0, loc.1))
|
||||
}
|
||||
fn skip_whitespace(&mut self) -> &mut Self {
|
||||
while let Ok(c) = self.peek() {
|
||||
if !c.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
let _ = self.consume();
|
||||
}
|
||||
self.start = self.current;
|
||||
self.start_loc = self.current_loc;
|
||||
self
|
||||
}
|
||||
fn consume(&mut self) -> LResult<&mut Self> {
|
||||
self.current += 1;
|
||||
match self.iter.next() {
|
||||
Some('\n') => {
|
||||
let (line, col) = &mut self.current_loc;
|
||||
*line += 1;
|
||||
*col = 1;
|
||||
}
|
||||
Some(_) => self.current_loc.1 += 1,
|
||||
None => Err(Error::end_of_file(self.line(), self.col()))?,
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
/// Digraphs and trigraphs
|
||||
impl<'t> Lexer<'t> {
|
||||
fn amp(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('&') => self.consume()?.produce(Type::AmpAmp, ()),
|
||||
Ok('=') => self.consume()?.produce(Type::AmpEq, ()),
|
||||
_ => self.produce(Type::Amp, ()),
|
||||
}
|
||||
}
|
||||
fn bang(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('!') => self.consume()?.produce(Type::BangBang, ()),
|
||||
Ok('=') => self.consume()?.produce(Type::BangEq, ()),
|
||||
_ => self.produce(Type::Bang, ()),
|
||||
}
|
||||
}
|
||||
fn bar(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('|') => self.consume()?.produce(Type::BarBar, ()),
|
||||
Ok('=') => self.consume()?.produce(Type::BarEq, ()),
|
||||
_ => self.produce(Type::Bar, ()),
|
||||
}
|
||||
}
|
||||
fn colon(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok(':') => self.consume()?.produce(Type::ColonColon, ()),
|
||||
_ => self.produce(Type::Colon, ()),
|
||||
}
|
||||
}
|
||||
fn dot(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('.') => {
|
||||
if let Ok('=') = self.consume()?.peek() {
|
||||
self.consume()?.produce(Type::DotDotEq, ())
|
||||
} else {
|
||||
self.produce(Type::DotDot, ())
|
||||
}
|
||||
}
|
||||
_ => self.produce(Type::Dot, ()),
|
||||
}
|
||||
}
|
||||
fn equal(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::EqEq, ()),
|
||||
Ok('>') => self.consume()?.produce(Type::FatArrow, ()),
|
||||
_ => self.produce(Type::Eq, ()),
|
||||
}
|
||||
}
|
||||
fn greater(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::GtEq, ()),
|
||||
Ok('>') => {
|
||||
if let Ok('=') = self.consume()?.peek() {
|
||||
self.consume()?.produce(Type::GtGtEq, ())
|
||||
} else {
|
||||
self.produce(Type::GtGt, ())
|
||||
}
|
||||
}
|
||||
_ => self.produce(Type::Gt, ()),
|
||||
}
|
||||
}
|
||||
fn hash(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('!') => self.consume()?.produce(Type::HashBang, ()),
|
||||
_ => self.produce(Type::Hash, ()),
|
||||
}
|
||||
}
|
||||
fn less(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::LtEq, ()),
|
||||
Ok('<') => {
|
||||
if let Ok('=') = self.consume()?.peek() {
|
||||
self.consume()?.produce(Type::LtLtEq, ())
|
||||
} else {
|
||||
self.produce(Type::LtLt, ())
|
||||
}
|
||||
}
|
||||
_ => self.produce(Type::Lt, ()),
|
||||
}
|
||||
}
|
||||
fn minus(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::MinusEq, ()),
|
||||
Ok('>') => self.consume()?.produce(Type::Arrow, ()),
|
||||
_ => self.produce(Type::Minus, ()),
|
||||
}
|
||||
}
|
||||
fn plus(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::PlusEq, ()),
|
||||
_ => self.produce(Type::Plus, ()),
|
||||
}
|
||||
}
|
||||
fn rem(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::RemEq, ()),
|
||||
_ => self.produce(Type::Rem, ()),
|
||||
}
|
||||
}
|
||||
fn slash(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::SlashEq, ()),
|
||||
Ok('/') => self.consume()?.line_comment(),
|
||||
Ok('*') => self.consume()?.block_comment(),
|
||||
_ => self.produce(Type::Slash, ()),
|
||||
}
|
||||
}
|
||||
fn star(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::StarEq, ()),
|
||||
_ => self.produce(Type::Star, ()),
|
||||
}
|
||||
}
|
||||
fn xor(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce(Type::XorEq, ()),
|
||||
Ok('^') => self.consume()?.produce(Type::XorXor, ()),
|
||||
_ => self.produce(Type::Xor, ()),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Comments
|
||||
impl<'t> Lexer<'t> {
|
||||
fn line_comment(&mut self) -> LResult<Token> {
|
||||
while Ok('\n') != self.peek() {
|
||||
self.consume()?;
|
||||
}
|
||||
self.produce(Type::Comment, ())
|
||||
}
|
||||
fn block_comment(&mut self) -> LResult<Token> {
|
||||
while let Ok(c) = self.next() {
|
||||
if '*' == c && Ok('/') == self.next() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.produce(Type::Comment, ())
|
||||
}
|
||||
}
|
||||
/// Identifiers
|
||||
impl<'t> Lexer<'t> {
|
||||
fn identifier(&mut self) -> LResult<Token> {
|
||||
let mut out = String::from(self.xid_start()?);
|
||||
while let Ok(c) = self.xid_continue() {
|
||||
out.push(c)
|
||||
}
|
||||
if let Ok(keyword) = Keyword::from_str(&out) {
|
||||
self.produce(Type::Keyword(keyword), ())
|
||||
} else {
|
||||
self.produce(Type::Identifier, Data::Identifier(out.into()))
|
||||
}
|
||||
}
|
||||
fn xid_start(&mut self) -> LResult<char> {
|
||||
match self.peek()? {
|
||||
xid if xid == '_' || xid.is_xid_start() => {
|
||||
self.consume()?;
|
||||
Ok(xid)
|
||||
}
|
||||
bad => Err(Error::not_identifier(bad, self.line(), self.col())),
|
||||
}
|
||||
}
|
||||
fn xid_continue(&mut self) -> LResult<char> {
|
||||
match self.peek()? {
|
||||
xid if xid.is_xid_continue() => {
|
||||
self.consume()?;
|
||||
Ok(xid)
|
||||
}
|
||||
bad => Err(Error::not_identifier(bad, self.line(), self.col())),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Integers
|
||||
impl<'t> Lexer<'t> {
|
||||
fn int_with_base(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('x') => self.consume()?.digits::<16>(),
|
||||
Ok('d') => self.consume()?.digits::<10>(),
|
||||
Ok('o') => self.consume()?.digits::<8>(),
|
||||
Ok('b') => self.consume()?.digits::<2>(),
|
||||
Ok('0'..='9') => self.digits::<10>(),
|
||||
_ => self.produce(Type::Integer, 0),
|
||||
}
|
||||
}
|
||||
fn digits<const B: u32>(&mut self) -> LResult<Token> {
|
||||
let mut value = self.digit::<B>()? as u128;
|
||||
while let Ok(true) = self.peek().as_ref().map(char::is_ascii_alphanumeric) {
|
||||
value = value * B as u128 + self.digit::<B>()? as u128;
|
||||
}
|
||||
self.produce(Type::Integer, value)
|
||||
}
|
||||
fn digit<const B: u32>(&mut self) -> LResult<u32> {
|
||||
let digit = self.peek()?;
|
||||
self.consume()?;
|
||||
digit
|
||||
.to_digit(B)
|
||||
.ok_or(Error::invalid_digit(digit, self.line(), self.col()))
|
||||
}
|
||||
}
|
||||
/// Strings and characters
|
||||
impl<'t> Lexer<'t> {
|
||||
fn string(&mut self) -> LResult<Token> {
|
||||
let mut value = String::new();
|
||||
while '"'
|
||||
!= self
|
||||
.peek()
|
||||
.map_err(|e| e.mask_reason(Reason::UnmatchedDelimiters('"')))?
|
||||
{
|
||||
value.push(self.unescape()?)
|
||||
}
|
||||
self.consume()?.produce(Type::String, value)
|
||||
}
|
||||
fn character(&mut self) -> LResult<Token> {
|
||||
let out = self.unescape()?;
|
||||
match self.peek()? {
|
||||
'\'' => self.consume()?.produce(Type::Character, out),
|
||||
_ => Err(Error::unmatched_delimiters('\'', self.line(), self.col())),
|
||||
}
|
||||
}
|
||||
/// Unescape a single character
|
||||
fn unescape(&mut self) -> LResult<char> {
|
||||
match self.next() {
|
||||
Ok('\\') => (),
|
||||
other => return other,
|
||||
}
|
||||
Ok(match self.next()? {
|
||||
'a' => '\x07',
|
||||
'b' => '\x08',
|
||||
'f' => '\x0c',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'x' => self.hex_escape()?,
|
||||
'u' => self.unicode_escape()?,
|
||||
'0' => '\0',
|
||||
chr => chr,
|
||||
})
|
||||
}
|
||||
/// unescape a single 2-digit hex escape
|
||||
fn hex_escape(&mut self) -> LResult<char> {
|
||||
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
|
||||
char::from_u32(out).ok_or(Error::bad_unicode(out, self.line(), self.col()))
|
||||
}
|
||||
/// unescape a single \u{} unicode escape
|
||||
fn unicode_escape(&mut self) -> LResult<char> {
|
||||
let mut out = 0;
|
||||
let Ok('{') = self.peek() else {
|
||||
return Err(Error::invalid_escape('u', self.line(), self.col()));
|
||||
};
|
||||
self.consume()?;
|
||||
while let Ok(c) = self.peek() {
|
||||
match c {
|
||||
'}' => {
|
||||
self.consume()?;
|
||||
return char::from_u32(out).ok_or(Error::bad_unicode(
|
||||
out,
|
||||
self.line(),
|
||||
self.col(),
|
||||
));
|
||||
}
|
||||
_ => out = (out << 4) + self.digit::<16>()?,
|
||||
}
|
||||
}
|
||||
Err(Error::invalid_escape('u', self.line(), self.col()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> From<&Lexer<'t>> for Loc {
|
||||
fn from(value: &Lexer<'t>) -> Self {
|
||||
Loc(value.line(), value.col())
|
||||
}
|
||||
}
|
||||
|
||||
use error::{Error, LResult, Reason};
|
||||
pub mod error {
|
||||
//! [Error] type for the [Lexer](super::Lexer)
|
||||
use std::fmt::Display;
|
||||
|
||||
/// Result type with [Err] = [Error]
|
||||
pub type LResult<T> = Result<T, Error>;
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Error {
|
||||
pub reason: Reason,
|
||||
pub line: u32,
|
||||
pub col: u32,
|
||||
}
|
||||
/// The reason for the [Error]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Reason {
|
||||
/// Found an opening delimiter of type [char], but not the expected closing delimiter
|
||||
UnmatchedDelimiters(char),
|
||||
/// Found a character that doesn't belong to any [Type](crate::token::token_type::Type)
|
||||
UnexpectedChar(char),
|
||||
/// Found a character that's not valid in identifiers while looking for an identifier
|
||||
NotIdentifier(char),
|
||||
/// Found a character that's not valid in an escape sequence while looking for an escape
|
||||
/// sequence
|
||||
UnknownEscape(char),
|
||||
/// Escape sequence contains invalid hexadecimal digit or unmatched braces
|
||||
InvalidEscape(char),
|
||||
/// Character is not a valid digit in the requested base
|
||||
InvalidDigit(char),
|
||||
/// Base conversion requested, but the base character was not in the set of known
|
||||
/// characters
|
||||
UnknownBase(char),
|
||||
/// Unicode escape does not map to a valid unicode code-point
|
||||
BadUnicode(u32),
|
||||
/// Reached end of input
|
||||
EndOfFile,
|
||||
}
|
||||
error_impl! {
|
||||
unmatched_delimiters(c: char) => Reason::UnmatchedDelimiters(c),
|
||||
unexpected_char(c: char) => Reason::UnexpectedChar(c),
|
||||
not_identifier(c: char) => Reason::NotIdentifier(c),
|
||||
unknown_escape(e: char) => Reason::UnknownEscape(e),
|
||||
invalid_escape(e: char) => Reason::InvalidEscape(e),
|
||||
invalid_digit(digit: char) => Reason::InvalidDigit(digit),
|
||||
unknown_base(base: char) => Reason::UnknownBase(base),
|
||||
bad_unicode(value: u32) => Reason::BadUnicode(value),
|
||||
end_of_file => Reason::EndOfFile,
|
||||
}
|
||||
impl Error {
|
||||
/// Changes the [Reason] of this error
|
||||
pub(super) fn mask_reason(self, reason: Reason) -> Self {
|
||||
Self { reason, ..self }
|
||||
}
|
||||
/// Returns the [Reason] for this error
|
||||
pub fn reason(&self) -> &Reason {
|
||||
&self.reason
|
||||
}
|
||||
/// Returns the (line, col) where the error happened
|
||||
pub fn location(&self) -> (u32, u32) {
|
||||
(self.line, self.col)
|
||||
}
|
||||
}
|
||||
macro error_impl ($($fn:ident$(( $($p:ident: $t:ty),* ))? => $reason:expr),*$(,)?) {
|
||||
#[allow(dead_code)]
|
||||
impl Error {
|
||||
$(pub(super) fn $fn ($($($p: $t),*,)? line: u32, col: u32) -> Self {
|
||||
Self { reason: $reason, line, col }
|
||||
})*
|
||||
}
|
||||
}
|
||||
impl std::error::Error for Error {}
|
||||
impl Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}:{}: {}", self.line, self.col, self.reason)
|
||||
}
|
||||
}
|
||||
impl Display for Reason {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Reason::UnmatchedDelimiters(c) => write! {f, "Unmatched `{c}` in input"},
|
||||
Reason::UnexpectedChar(c) => write!(f, "Character `{c}` not expected"),
|
||||
Reason::NotIdentifier(c) => write!(f, "Character `{c}` not valid in identifiers"),
|
||||
Reason::UnknownEscape(c) => write!(f, "`\\{c}` is not a known escape sequence"),
|
||||
Reason::InvalidEscape(c) => write!(f, "Escape sequence `\\{c}`... is malformed"),
|
||||
Reason::InvalidDigit(c) => write!(f, "`{c}` is not a valid digit"),
|
||||
Reason::UnknownBase(c) => write!(f, "`0{c}`... is not a valid base"),
|
||||
Reason::BadUnicode(c) => write!(f, "`{c}` is not a valid unicode code-point"),
|
||||
Reason::EndOfFile => write!(f, "Reached end of input"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,8 +2,6 @@
|
||||
#![warn(clippy::all)]
|
||||
#![feature(decl_macro)]
|
||||
|
||||
pub mod lexer;
|
||||
|
||||
pub mod resolver;
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -5,173 +5,6 @@ mod ast {
|
||||
// TODO
|
||||
}
|
||||
mod lexer {
|
||||
use crate::lexer::Lexer;
|
||||
use cl_token::*;
|
||||
|
||||
macro test_lexer_output_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
|
||||
#[test]
|
||||
fn $f() {$(
|
||||
assert_eq!(
|
||||
Lexer::new($test)
|
||||
.into_iter()
|
||||
.map(|t| t.unwrap().ty())
|
||||
.collect::<Vec<_>>(),
|
||||
dbg!($expect)
|
||||
);
|
||||
)*}
|
||||
)*}
|
||||
|
||||
macro test_lexer_data_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
|
||||
#[test]
|
||||
fn $f() {$(
|
||||
assert_eq!(
|
||||
Lexer::new($test)
|
||||
.into_iter()
|
||||
.map(|t| t.unwrap().into_data())
|
||||
.collect::<Vec<_>>(),
|
||||
dbg!($expect)
|
||||
);
|
||||
)*}
|
||||
)*}
|
||||
|
||||
/// Convert an `[ expr, ... ]` into a `[ *, ... ]`
|
||||
macro td ($($id:expr),*) {
|
||||
[$($id.into()),*]
|
||||
}
|
||||
|
||||
mod ident {
|
||||
use super::*;
|
||||
macro ident ($($id:literal),*) {
|
||||
[$(Data::Identifier($id.into())),*]
|
||||
}
|
||||
test_lexer_data_type! {
|
||||
underscore { "_ _" => ident!["_", "_"] }
|
||||
unicode { "_ε ε_" => ident!["_ε", "ε_"] }
|
||||
many_underscore { "____________________________________" =>
|
||||
ident!["____________________________________"] }
|
||||
}
|
||||
}
|
||||
mod keyword {
|
||||
use super::*;
|
||||
macro kw($($k:ident),*) {
|
||||
[ $(Type::Keyword(Keyword::$k),)* ]
|
||||
}
|
||||
test_lexer_output_type! {
|
||||
kw_break { "break break" => kw![Break, Break] }
|
||||
kw_continue { "continue continue" => kw![Continue, Continue] }
|
||||
kw_else { "else else" => kw![Else, Else] }
|
||||
kw_false { "false false" => kw![False, False] }
|
||||
kw_for { "for for" => kw![For, For] }
|
||||
kw_fn { "fn fn" => kw![Fn, Fn] }
|
||||
kw_if { "if if" => kw![If, If] }
|
||||
kw_in { "in in" => kw![In, In] }
|
||||
kw_let { "let let" => kw![Let, Let] }
|
||||
kw_return { "return return" => kw![Return, Return] }
|
||||
kw_true { "true true" => kw![True, True] }
|
||||
kw_while { "while while" => kw![While, While] }
|
||||
keywords { "break continue else false for fn if in let return true while" =>
|
||||
kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] }
|
||||
}
|
||||
}
|
||||
mod integer {
|
||||
use super::*;
|
||||
test_lexer_data_type! {
|
||||
hex {
|
||||
"0x0 0x1 0x15 0x2100 0x8000" =>
|
||||
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
dec {
|
||||
"0d0 0d1 0d21 0d8448 0d32768" =>
|
||||
td![0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
oct {
|
||||
"0o0 0o1 0o25 0o20400 0o100000" =>
|
||||
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
bin {
|
||||
"0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" =>
|
||||
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
baseless {
|
||||
"0 1 21 8448 32768" =>
|
||||
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
}
|
||||
}
|
||||
mod string {
|
||||
use super::*;
|
||||
test_lexer_data_type! {
|
||||
empty_string {
|
||||
"\"\"" =>
|
||||
td![String::from("")]
|
||||
}
|
||||
unicode_string {
|
||||
"\"I 💙 🦈!\"" =>
|
||||
td![String::from("I 💙 🦈!")]
|
||||
}
|
||||
escape_string {
|
||||
" \"This is a shark: \\u{1f988}\" " =>
|
||||
td![String::from("This is a shark: 🦈")]
|
||||
}
|
||||
}
|
||||
}
|
||||
mod punct {
|
||||
use super::*;
|
||||
test_lexer_output_type! {
|
||||
l_curly { "{ {" => [ Type::LCurly, Type::LCurly ] }
|
||||
r_curly { "} }" => [ Type::RCurly, Type::RCurly ] }
|
||||
l_brack { "[ [" => [ Type::LBrack, Type::LBrack ] }
|
||||
r_brack { "] ]" => [ Type::RBrack, Type::RBrack ] }
|
||||
l_paren { "( (" => [ Type::LParen, Type::LParen ] }
|
||||
r_paren { ") )" => [ Type::RParen, Type::RParen ] }
|
||||
amp { "& &" => [ Type::Amp, Type::Amp ] }
|
||||
amp_amp { "&& &&" => [ Type::AmpAmp, Type::AmpAmp ] }
|
||||
amp_eq { "&= &=" => [ Type::AmpEq, Type::AmpEq ] }
|
||||
arrow { "-> ->" => [ Type::Arrow, Type::Arrow] }
|
||||
at { "@ @" => [ Type::At, Type::At] }
|
||||
backslash { "\\ \\" => [ Type::Backslash, Type::Backslash] }
|
||||
bang { "! !" => [ Type::Bang, Type::Bang] }
|
||||
bangbang { "!! !!" => [ Type::BangBang, Type::BangBang] }
|
||||
bangeq { "!= !=" => [ Type::BangEq, Type::BangEq] }
|
||||
bar { "| |" => [ Type::Bar, Type::Bar] }
|
||||
barbar { "|| ||" => [ Type::BarBar, Type::BarBar] }
|
||||
bareq { "|= |=" => [ Type::BarEq, Type::BarEq] }
|
||||
colon { ": :" => [ Type::Colon, Type::Colon] }
|
||||
comma { ", ," => [ Type::Comma, Type::Comma] }
|
||||
dot { ". ." => [ Type::Dot, Type::Dot] }
|
||||
dotdot { ".. .." => [ Type::DotDot, Type::DotDot] }
|
||||
dotdoteq { "..= ..=" => [ Type::DotDotEq, Type::DotDotEq] }
|
||||
eq { "= =" => [ Type::Eq, Type::Eq] }
|
||||
eqeq { "== ==" => [ Type::EqEq, Type::EqEq] }
|
||||
fatarrow { "=> =>" => [ Type::FatArrow, Type::FatArrow] }
|
||||
grave { "` `" => [ Type::Grave, Type::Grave] }
|
||||
gt { "> >" => [ Type::Gt, Type::Gt] }
|
||||
gteq { ">= >=" => [ Type::GtEq, Type::GtEq] }
|
||||
gtgt { ">> >>" => [ Type::GtGt, Type::GtGt] }
|
||||
gtgteq { ">>= >>=" => [ Type::GtGtEq, Type::GtGtEq] }
|
||||
hash { "# #" => [ Type::Hash, Type::Hash] }
|
||||
lt { "< <" => [ Type::Lt, Type::Lt] }
|
||||
lteq { "<= <=" => [ Type::LtEq, Type::LtEq] }
|
||||
ltlt { "<< <<" => [ Type::LtLt, Type::LtLt] }
|
||||
ltlteq { "<<= <<=" => [ Type::LtLtEq, Type::LtLtEq] }
|
||||
minus { "- -" => [ Type::Minus, Type::Minus] }
|
||||
minuseq { "-= -=" => [ Type::MinusEq, Type::MinusEq] }
|
||||
plus { "+ +" => [ Type::Plus, Type::Plus] }
|
||||
pluseq { "+= +=" => [ Type::PlusEq, Type::PlusEq] }
|
||||
question { "? ?" => [ Type::Question, Type::Question] }
|
||||
rem { "% %" => [ Type::Rem, Type::Rem] }
|
||||
remeq { "%= %=" => [ Type::RemEq, Type::RemEq] }
|
||||
semi { "; ;" => [ Type::Semi, Type::Semi] }
|
||||
slash { "/ /" => [ Type::Slash, Type::Slash] }
|
||||
slasheq { "/= /=" => [ Type::SlashEq, Type::SlashEq] }
|
||||
star { "* *" => [ Type::Star, Type::Star] }
|
||||
stareq { "*= *=" => [ Type::StarEq, Type::StarEq] }
|
||||
tilde { "~ ~" => [ Type::Tilde, Type::Tilde] }
|
||||
xor { "^ ^" => [ Type::Xor, Type::Xor] }
|
||||
xoreq { "^= ^=" => [ Type::XorEq, Type::XorEq] }
|
||||
xorxor { "^^ ^^" => [ Type::XorXor, Type::XorXor] }
|
||||
}
|
||||
}
|
||||
}
|
||||
mod parser {
|
||||
// TODO
|
||||
|
||||
Reference in New Issue
Block a user