conlang: Move all cl-libs into the compiler directory
This commit is contained in:
		
							
								
								
									
										556
									
								
								compiler/cl-lexer/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										556
									
								
								compiler/cl-lexer/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,556 @@ | ||||
| //! Converts a text file into tokens | ||||
| #![warn(clippy::all)] | ||||
| #![feature(decl_macro)] | ||||
| use cl_structures::span::Loc; | ||||
| use cl_token::{TokenKind as Kind, *}; | ||||
| use std::{ | ||||
|     iter::Peekable, | ||||
|     str::{Chars, FromStr}, | ||||
| }; | ||||
| use unicode_ident::*; | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests; | ||||
|  | ||||
| pub mod lexer_iter { | ||||
|     //! Iterator over a [`Lexer`], returning [`LResult<Token>`]s | ||||
|     use super::{ | ||||
|         error::{LResult, Reason}, | ||||
|         Lexer, Token, | ||||
|     }; | ||||
|  | ||||
|     /// Iterator over a [`Lexer`], returning [`LResult<Token>`]s | ||||
|     pub struct LexerIter<'t> { | ||||
|         lexer: Lexer<'t>, | ||||
|     } | ||||
|     impl<'t> Iterator for LexerIter<'t> { | ||||
|         type Item = LResult<Token>; | ||||
|         fn next(&mut self) -> Option<Self::Item> { | ||||
|             match self.lexer.scan() { | ||||
|                 Ok(v) => Some(Ok(v)), | ||||
|                 Err(e) => { | ||||
|                     if e.reason == Reason::EndOfFile { | ||||
|                         None | ||||
|                     } else { | ||||
|                         Some(Err(e)) | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     impl<'t> IntoIterator for Lexer<'t> { | ||||
|         type Item = LResult<Token>; | ||||
|         type IntoIter = LexerIter<'t>; | ||||
|         fn into_iter(self) -> Self::IntoIter { | ||||
|             LexerIter { lexer: self } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// The Lexer iterates over the characters in a body of text, searching for [Tokens](Token). | ||||
| /// | ||||
| /// # Examples | ||||
| /// ```rust | ||||
| /// # use cl_lexer::Lexer; | ||||
| /// # fn main() -> Result<(), Box<dyn std::error::Error>> { | ||||
| /// // Read in your code from somewhere | ||||
| /// let some_code = " | ||||
| /// fn main () { | ||||
| ///     // TODO: code goes here! | ||||
| /// } | ||||
| /// "; | ||||
| /// // Create a lexer over your code | ||||
| /// let mut lexer = Lexer::new(some_code); | ||||
| /// // Scan for a single token | ||||
| /// let first_token = lexer.scan()?; | ||||
| /// println!("{first_token:?}"); | ||||
| /// // Loop over all the rest of the tokens | ||||
| /// for token in lexer { | ||||
| /// #   let token: Result<_,()> = Ok(token?); | ||||
| ///     match token { | ||||
| ///         Ok(token) => println!("{token:?}"), | ||||
| ///         Err(e) => eprintln!("{e:?}"), | ||||
| ///     } | ||||
| /// } | ||||
| /// # Ok(()) } | ||||
| /// ``` | ||||
| #[derive(Clone, Debug)] | ||||
| pub struct Lexer<'t> { | ||||
|     iter: Peekable<Chars<'t>>, | ||||
|     start: usize, | ||||
|     start_loc: (u32, u32), | ||||
|     current: usize, | ||||
|     current_loc: (u32, u32), | ||||
| } | ||||
|  | ||||
| impl<'t> Lexer<'t> { | ||||
|     /// Creates a new [Lexer] over a [str] | ||||
|     pub fn new(text: &'t str) -> Self { | ||||
|         Self { | ||||
|             iter: text.chars().peekable(), | ||||
|             start: 0, | ||||
|             start_loc: (1, 1), | ||||
|             current: 0, | ||||
|             current_loc: (1, 1), | ||||
|         } | ||||
|     } | ||||
|     /// Scans through the text, searching for the next [Token] | ||||
|     pub fn scan(&mut self) -> LResult<Token> { | ||||
|         match self.skip_whitespace().peek()? { | ||||
|             '{' => self.consume()?.produce_op(Punct::LCurly), | ||||
|             '}' => self.consume()?.produce_op(Punct::RCurly), | ||||
|             '[' => self.consume()?.produce_op(Punct::LBrack), | ||||
|             ']' => self.consume()?.produce_op(Punct::RBrack), | ||||
|             '(' => self.consume()?.produce_op(Punct::LParen), | ||||
|             ')' => self.consume()?.produce_op(Punct::RParen), | ||||
|             '&' => self.consume()?.amp(), | ||||
|             '@' => self.consume()?.produce_op(Punct::At), | ||||
|             '\\' => self.consume()?.produce_op(Punct::Backslash), | ||||
|             '!' => self.consume()?.bang(), | ||||
|             '|' => self.consume()?.bar(), | ||||
|             ':' => self.consume()?.colon(), | ||||
|             ',' => self.consume()?.produce_op(Punct::Comma), | ||||
|             '.' => self.consume()?.dot(), | ||||
|             '=' => self.consume()?.equal(), | ||||
|             '`' => self.consume()?.produce_op(Punct::Grave), | ||||
|             '>' => self.consume()?.greater(), | ||||
|             '#' => self.consume()?.hash(), | ||||
|             '<' => self.consume()?.less(), | ||||
|             '-' => self.consume()?.minus(), | ||||
|             '+' => self.consume()?.plus(), | ||||
|             '?' => self.consume()?.produce_op(Punct::Question), | ||||
|             '%' => self.consume()?.rem(), | ||||
|             ';' => self.consume()?.produce_op(Punct::Semi), | ||||
|             '/' => self.consume()?.slash(), | ||||
|             '*' => self.consume()?.star(), | ||||
|             '~' => self.consume()?.produce_op(Punct::Tilde), | ||||
|             '^' => self.consume()?.xor(), | ||||
|             '0' => self.consume()?.int_with_base(), | ||||
|             '1'..='9' => self.digits::<10>(), | ||||
|             '"' => self.consume()?.string(), | ||||
|             '\'' => self.consume()?.character(), | ||||
|             '_' => self.identifier(), | ||||
|             i if is_xid_start(i) => self.identifier(), | ||||
|             e => { | ||||
|                 let err = Err(Error::unexpected_char(e, self.line(), self.col())); | ||||
|                 let _ = self.consume(); | ||||
|                 err | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     /// Returns the current line | ||||
|     pub fn line(&self) -> u32 { | ||||
|         self.start_loc.0 | ||||
|     } | ||||
|     /// Returns the current column | ||||
|     pub fn col(&self) -> u32 { | ||||
|         self.start_loc.1 | ||||
|     } | ||||
|     fn next(&mut self) -> LResult<char> { | ||||
|         let out = self.peek(); | ||||
|         self.consume()?; | ||||
|         out | ||||
|     } | ||||
|     fn peek(&mut self) -> LResult<char> { | ||||
|         self.iter | ||||
|             .peek() | ||||
|             .copied() | ||||
|             .ok_or(Error::end_of_file(self.line(), self.col())) | ||||
|     } | ||||
|     fn produce(&mut self, kind: TokenKind, data: impl Into<TokenData>) -> LResult<Token> { | ||||
|         let loc = self.start_loc; | ||||
|         self.start_loc = self.current_loc; | ||||
|         self.start = self.current; | ||||
|         Ok(Token::new(kind, data, loc.0, loc.1)) | ||||
|     } | ||||
|     fn produce_op(&mut self, kind: Punct) -> LResult<Token> { | ||||
|         self.produce(TokenKind::Punct(kind), ()) | ||||
|     } | ||||
|     fn skip_whitespace(&mut self) -> &mut Self { | ||||
|         while let Ok(c) = self.peek() { | ||||
|             if !c.is_whitespace() { | ||||
|                 break; | ||||
|             } | ||||
|             let _ = self.consume(); | ||||
|         } | ||||
|         self.start = self.current; | ||||
|         self.start_loc = self.current_loc; | ||||
|         self | ||||
|     } | ||||
|     fn consume(&mut self) -> LResult<&mut Self> { | ||||
|         self.current += 1; | ||||
|         match self.iter.next() { | ||||
|             Some('\n') => { | ||||
|                 let (line, col) = &mut self.current_loc; | ||||
|                 *line += 1; | ||||
|                 *col = 1; | ||||
|             } | ||||
|             Some(_) => self.current_loc.1 += 1, | ||||
|             None => Err(Error::end_of_file(self.line(), self.col()))?, | ||||
|         } | ||||
|         Ok(self) | ||||
|     } | ||||
| } | ||||
| /// Digraphs and trigraphs | ||||
| impl<'t> Lexer<'t> { | ||||
|     fn amp(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('&') => self.consume()?.produce_op(Punct::AmpAmp), | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::AmpEq), | ||||
|             _ => self.produce_op(Punct::Amp), | ||||
|         } | ||||
|     } | ||||
|     fn bang(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('!') => self.consume()?.produce_op(Punct::BangBang), | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::BangEq), | ||||
|             _ => self.produce_op(Punct::Bang), | ||||
|         } | ||||
|     } | ||||
|     fn bar(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('|') => self.consume()?.produce_op(Punct::BarBar), | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::BarEq), | ||||
|             _ => self.produce_op(Punct::Bar), | ||||
|         } | ||||
|     } | ||||
|     fn colon(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok(':') => self.consume()?.produce_op(Punct::ColonColon), | ||||
|             _ => self.produce_op(Punct::Colon), | ||||
|         } | ||||
|     } | ||||
|     fn dot(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('.') => { | ||||
|                 if let Ok('=') = self.consume()?.peek() { | ||||
|                     self.consume()?.produce_op(Punct::DotDotEq) | ||||
|                 } else { | ||||
|                     self.produce_op(Punct::DotDot) | ||||
|                 } | ||||
|             } | ||||
|             _ => self.produce_op(Punct::Dot), | ||||
|         } | ||||
|     } | ||||
|     fn equal(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::EqEq), | ||||
|             Ok('>') => self.consume()?.produce_op(Punct::FatArrow), | ||||
|             _ => self.produce_op(Punct::Eq), | ||||
|         } | ||||
|     } | ||||
|     fn greater(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::GtEq), | ||||
|             Ok('>') => { | ||||
|                 if let Ok('=') = self.consume()?.peek() { | ||||
|                     self.consume()?.produce_op(Punct::GtGtEq) | ||||
|                 } else { | ||||
|                     self.produce_op(Punct::GtGt) | ||||
|                 } | ||||
|             } | ||||
|             _ => self.produce_op(Punct::Gt), | ||||
|         } | ||||
|     } | ||||
|     fn hash(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('!') => self.consume()?.produce_op(Punct::HashBang), | ||||
|             _ => self.produce_op(Punct::Hash), | ||||
|         } | ||||
|     } | ||||
|     fn less(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::LtEq), | ||||
|             Ok('<') => { | ||||
|                 if let Ok('=') = self.consume()?.peek() { | ||||
|                     self.consume()?.produce_op(Punct::LtLtEq) | ||||
|                 } else { | ||||
|                     self.produce_op(Punct::LtLt) | ||||
|                 } | ||||
|             } | ||||
|             _ => self.produce_op(Punct::Lt), | ||||
|         } | ||||
|     } | ||||
|     fn minus(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::MinusEq), | ||||
|             Ok('>') => self.consume()?.produce_op(Punct::Arrow), | ||||
|             _ => self.produce_op(Punct::Minus), | ||||
|         } | ||||
|     } | ||||
|     fn plus(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::PlusEq), | ||||
|             _ => self.produce_op(Punct::Plus), | ||||
|         } | ||||
|     } | ||||
|     fn rem(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::RemEq), | ||||
|             _ => self.produce_op(Punct::Rem), | ||||
|         } | ||||
|     } | ||||
|     fn slash(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::SlashEq), | ||||
|             Ok('/') => self.consume()?.line_comment(), | ||||
|             Ok('*') => self.consume()?.block_comment(), | ||||
|             _ => self.produce_op(Punct::Slash), | ||||
|         } | ||||
|     } | ||||
|     fn star(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::StarEq), | ||||
|             _ => self.produce_op(Punct::Star), | ||||
|         } | ||||
|     } | ||||
|     fn xor(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('=') => self.consume()?.produce_op(Punct::XorEq), | ||||
|             Ok('^') => self.consume()?.produce_op(Punct::XorXor), | ||||
|             _ => self.produce_op(Punct::Xor), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| /// Comments | ||||
| impl<'t> Lexer<'t> { | ||||
|     fn line_comment(&mut self) -> LResult<Token> { | ||||
|         while Ok('\n') != self.peek() { | ||||
|             self.consume()?; | ||||
|         } | ||||
|         self.produce(Kind::Comment, ()) | ||||
|     } | ||||
|     fn block_comment(&mut self) -> LResult<Token> { | ||||
|         while let Ok(c) = self.next() { | ||||
|             if '*' == c && Ok('/') == self.next() { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|         self.produce(Kind::Comment, ()) | ||||
|     } | ||||
| } | ||||
| /// Identifiers | ||||
| impl<'t> Lexer<'t> { | ||||
|     fn identifier(&mut self) -> LResult<Token> { | ||||
|         let mut out = String::from(self.xid_start()?); | ||||
|         while let Ok(c) = self.xid_continue() { | ||||
|             out.push(c) | ||||
|         } | ||||
|         if let Ok(keyword) = Kind::from_str(&out) { | ||||
|             self.produce(keyword, ()) | ||||
|         } else { | ||||
|             self.produce(Kind::Identifier, TokenData::String(out)) | ||||
|         } | ||||
|     } | ||||
|     fn xid_start(&mut self) -> LResult<char> { | ||||
|         match self.peek()? { | ||||
|             xid if xid == '_' || is_xid_start(xid) => { | ||||
|                 self.consume()?; | ||||
|                 Ok(xid) | ||||
|             } | ||||
|             bad => Err(Error::not_identifier(bad, self.line(), self.col())), | ||||
|         } | ||||
|     } | ||||
|     fn xid_continue(&mut self) -> LResult<char> { | ||||
|         match self.peek()? { | ||||
|             xid if is_xid_continue(xid) => { | ||||
|                 self.consume()?; | ||||
|                 Ok(xid) | ||||
|             } | ||||
|             bad => Err(Error::not_identifier(bad, self.line(), self.col())), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| /// Integers | ||||
| impl<'t> Lexer<'t> { | ||||
|     fn int_with_base(&mut self) -> LResult<Token> { | ||||
|         match self.peek() { | ||||
|             Ok('x') => self.consume()?.digits::<16>(), | ||||
|             Ok('d') => self.consume()?.digits::<10>(), | ||||
|             Ok('o') => self.consume()?.digits::<8>(), | ||||
|             Ok('b') => self.consume()?.digits::<2>(), | ||||
|             Ok('0'..='9') => self.digits::<10>(), | ||||
|             _ => self.produce(Kind::Literal, 0), | ||||
|         } | ||||
|     } | ||||
|     fn digits<const B: u32>(&mut self) -> LResult<Token> { | ||||
|         let mut value = self.digit::<B>()? as u128; | ||||
|         while let Ok(true) = self.peek().as_ref().map(char::is_ascii_alphanumeric) { | ||||
|             value = value * B as u128 + self.digit::<B>()? as u128; | ||||
|         } | ||||
|         self.produce(Kind::Literal, value) | ||||
|     } | ||||
|     fn digit<const B: u32>(&mut self) -> LResult<u32> { | ||||
|         let digit = self.peek()?; | ||||
|         self.consume()?; | ||||
|         digit | ||||
|             .to_digit(B) | ||||
|             .ok_or(Error::invalid_digit(digit, self.line(), self.col())) | ||||
|     } | ||||
| } | ||||
| /// Strings and characters | ||||
| impl<'t> Lexer<'t> { | ||||
|     fn string(&mut self) -> LResult<Token> { | ||||
|         let mut value = String::new(); | ||||
|         while '"' | ||||
|             != self | ||||
|                 .peek() | ||||
|                 .map_err(|e| e.mask_reason(Reason::UnmatchedDelimiters('"')))? | ||||
|         { | ||||
|             value.push(self.unescape()?) | ||||
|         } | ||||
|         self.consume()?.produce(Kind::Literal, value) | ||||
|     } | ||||
|     fn character(&mut self) -> LResult<Token> { | ||||
|         let out = self.unescape()?; | ||||
|         match self.peek()? { | ||||
|             '\'' => self.consume()?.produce(Kind::Literal, out), | ||||
|             _ => Err(Error::unmatched_delimiters('\'', self.line(), self.col())), | ||||
|         } | ||||
|     } | ||||
|     /// Unescape a single character | ||||
|     fn unescape(&mut self) -> LResult<char> { | ||||
|         match self.next() { | ||||
|             Ok('\\') => (), | ||||
|             other => return other, | ||||
|         } | ||||
|         Ok(match self.next()? { | ||||
|             'a' => '\x07', | ||||
|             'b' => '\x08', | ||||
|             'f' => '\x0c', | ||||
|             'n' => '\n', | ||||
|             'r' => '\r', | ||||
|             't' => '\t', | ||||
|             'x' => self.hex_escape()?, | ||||
|             'u' => self.unicode_escape()?, | ||||
|             '0' => '\0', | ||||
|             chr => chr, | ||||
|         }) | ||||
|     } | ||||
|     /// unescape a single 2-digit hex escape | ||||
|     fn hex_escape(&mut self) -> LResult<char> { | ||||
|         let out = (self.digit::<16>()? << 4) + self.digit::<16>()?; | ||||
|         char::from_u32(out).ok_or(Error::bad_unicode(out, self.line(), self.col())) | ||||
|     } | ||||
|     /// unescape a single \u{} unicode escape | ||||
|     fn unicode_escape(&mut self) -> LResult<char> { | ||||
|         let mut out = 0; | ||||
|         let Ok('{') = self.peek() else { | ||||
|             return Err(Error::invalid_escape('u', self.line(), self.col())); | ||||
|         }; | ||||
|         self.consume()?; | ||||
|         while let Ok(c) = self.peek() { | ||||
|             match c { | ||||
|                 '}' => { | ||||
|                     self.consume()?; | ||||
|                     return char::from_u32(out).ok_or(Error::bad_unicode( | ||||
|                         out, | ||||
|                         self.line(), | ||||
|                         self.col(), | ||||
|                     )); | ||||
|                 } | ||||
|                 _ => out = (out << 4) + self.digit::<16>()?, | ||||
|             } | ||||
|         } | ||||
|         Err(Error::invalid_escape('u', self.line(), self.col())) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'t> From<&Lexer<'t>> for Loc { | ||||
|     fn from(value: &Lexer<'t>) -> Self { | ||||
|         Loc(value.line(), value.col()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| use error::{Error, LResult, Reason}; | ||||
| pub mod error { | ||||
|     //! [Error] type for the [Lexer](super::Lexer) | ||||
|     use std::fmt::Display; | ||||
|  | ||||
|     /// Result type with [Err] = [Error] | ||||
|     pub type LResult<T> = Result<T, Error>; | ||||
|     #[derive(Clone, Debug, PartialEq, Eq)] | ||||
|     pub struct Error { | ||||
|         pub reason: Reason, | ||||
|         pub line: u32, | ||||
|         pub col: u32, | ||||
|     } | ||||
|     /// The reason for the [Error] | ||||
|     #[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||||
|     pub enum Reason { | ||||
|         /// Found an opening delimiter of type [char], but not the expected closing delimiter | ||||
|         UnmatchedDelimiters(char), | ||||
|         /// Found a character that doesn't belong to any [TokenKind](cl_token::TokenKind) | ||||
|         UnexpectedChar(char), | ||||
|         /// Found a character that's not valid in identifiers while looking for an identifier | ||||
|         NotIdentifier(char), | ||||
|         /// Found a character that's not valid in an escape sequence while looking for an escape | ||||
|         /// sequence | ||||
|         UnknownEscape(char), | ||||
|         /// Escape sequence contains invalid hexadecimal digit or unmatched braces | ||||
|         InvalidEscape(char), | ||||
|         /// Character is not a valid digit in the requested base | ||||
|         InvalidDigit(char), | ||||
|         /// Base conversion requested, but the base character was not in the set of known | ||||
|         /// characters | ||||
|         UnknownBase(char), | ||||
|         /// Unicode escape does not map to a valid unicode code-point | ||||
|         BadUnicode(u32), | ||||
|         /// Reached end of input | ||||
|         EndOfFile, | ||||
|     } | ||||
|     error_impl! { | ||||
|         unmatched_delimiters(c: char) => Reason::UnmatchedDelimiters(c), | ||||
|         unexpected_char(c: char) => Reason::UnexpectedChar(c), | ||||
|         not_identifier(c: char) => Reason::NotIdentifier(c), | ||||
|         unknown_escape(e: char) => Reason::UnknownEscape(e), | ||||
|         invalid_escape(e: char) => Reason::InvalidEscape(e), | ||||
|         invalid_digit(digit: char) => Reason::InvalidDigit(digit), | ||||
|         unknown_base(base: char) => Reason::UnknownBase(base), | ||||
|         bad_unicode(value: u32) => Reason::BadUnicode(value), | ||||
|         end_of_file => Reason::EndOfFile, | ||||
|     } | ||||
|     impl Error { | ||||
|         /// Changes the [Reason] of this error | ||||
|         pub(super) fn mask_reason(self, reason: Reason) -> Self { | ||||
|             Self { reason, ..self } | ||||
|         } | ||||
|         /// Returns the [Reason] for this error | ||||
|         pub fn reason(&self) -> &Reason { | ||||
|             &self.reason | ||||
|         } | ||||
|         /// Returns the (line, col) where the error happened | ||||
|         pub fn location(&self) -> (u32, u32) { | ||||
|             (self.line, self.col) | ||||
|         } | ||||
|     } | ||||
|     macro error_impl ($($fn:ident$(( $($p:ident: $t:ty),* ))? => $reason:expr),*$(,)?) { | ||||
|         #[allow(dead_code)] | ||||
|         impl Error { | ||||
|             $(pub(super) fn $fn ($($($p: $t),*,)? line: u32, col: u32) -> Self { | ||||
|                 Self { reason: $reason, line, col } | ||||
|             })* | ||||
|         } | ||||
|     } | ||||
|     impl std::error::Error for Error {} | ||||
|     impl Display for Error { | ||||
|         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
|             write!(f, "{}:{}: {}", self.line, self.col, self.reason) | ||||
|         } | ||||
|     } | ||||
|     impl Display for Reason { | ||||
|         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
|             match self { | ||||
|                 Reason::UnmatchedDelimiters(c) => write! {f, "Unmatched `{c}` in input"}, | ||||
|                 Reason::UnexpectedChar(c) => write!(f, "Character `{c}` not expected"), | ||||
|                 Reason::NotIdentifier(c) => write!(f, "Character `{c}` not valid in identifiers"), | ||||
|                 Reason::UnknownEscape(c) => write!(f, "`\\{c}` is not a known escape sequence"), | ||||
|                 Reason::InvalidEscape(c) => write!(f, "Escape sequence `\\{c}`... is malformed"), | ||||
|                 Reason::InvalidDigit(c) => write!(f, "`{c}` is not a valid digit"), | ||||
|                 Reason::UnknownBase(c) => write!(f, "`0{c}`... is not a valid base"), | ||||
|                 Reason::BadUnicode(c) => write!(f, "`{c}` is not a valid unicode code-point"), | ||||
|                 Reason::EndOfFile => write!(f, "Reached end of input"), | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										171
									
								
								compiler/cl-lexer/src/tests.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								compiler/cl-lexer/src/tests.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,171 @@ | ||||
| use crate::Lexer; | ||||
| use cl_token::*; | ||||
|  | ||||
| macro test_lexer_output_type  ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$( | ||||
|     #[test] | ||||
|     fn $f() {$( | ||||
|         assert_eq!( | ||||
|             Lexer::new($test) | ||||
|                 .into_iter() | ||||
|                 .map(|t| t.unwrap().ty()) | ||||
|                 .collect::<Vec<_>>(), | ||||
|             dbg!($expect) | ||||
|         ); | ||||
|     )*} | ||||
| )*} | ||||
|  | ||||
| macro test_lexer_data_type  ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$( | ||||
|     #[test] | ||||
|     fn $f() {$( | ||||
|         assert_eq!( | ||||
|             Lexer::new($test) | ||||
|                 .into_iter() | ||||
|                 .map(|t| t.unwrap().into_data()) | ||||
|                 .collect::<Vec<_>>(), | ||||
|             dbg!($expect) | ||||
|         ); | ||||
|     )*} | ||||
| )*} | ||||
|  | ||||
| /// Convert an `[ expr, ... ]` into a `[ *, ... ]` | ||||
| macro td ($($id:expr),*) { | ||||
|     [$($id.into()),*] | ||||
| } | ||||
|  | ||||
| mod ident { | ||||
|     use super::*; | ||||
|     macro ident ($($id:literal),*) { | ||||
|         [$(TokenData::String($id.into())),*] | ||||
|     } | ||||
|     test_lexer_data_type! { | ||||
|         underscore { "_ _" => ident!["_", "_"] } | ||||
|         unicode { "_ε ε_" => ident!["_ε", "ε_"] } | ||||
|         many_underscore { "____________________________________" => | ||||
|         ident!["____________________________________"] } | ||||
|     } | ||||
| } | ||||
| mod keyword { | ||||
|     use super::*; | ||||
|     macro kw($($k:ident),*) { | ||||
|         [ $(TokenKind::$k,)* ] | ||||
|     } | ||||
|     test_lexer_output_type! { | ||||
|         kw_break { "break break" => kw![Break, Break] } | ||||
|         kw_continue { "continue continue" => kw![Continue, Continue] } | ||||
|         kw_else { "else else" => kw![Else, Else] } | ||||
|         kw_false { "false false" => kw![False, False] } | ||||
|         kw_for { "for for" => kw![For, For] } | ||||
|         kw_fn { "fn fn" => kw![Fn, Fn] } | ||||
|         kw_if { "if if" => kw![If, If] } | ||||
|         kw_in { "in in" => kw![In, In] } | ||||
|         kw_let { "let let" => kw![Let, Let] } | ||||
|         kw_return { "return return" => kw![Return, Return] } | ||||
|         kw_true { "true true" => kw![True, True] } | ||||
|         kw_while { "while while" => kw![While, While] } | ||||
|         keywords { "break continue else false for fn if in let return true while" => | ||||
|             kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] } | ||||
|     } | ||||
| } | ||||
| mod integer { | ||||
|     use super::*; | ||||
|     test_lexer_data_type! { | ||||
|         hex { | ||||
|             "0x0 0x1 0x15 0x2100 0x8000" => | ||||
|             td![0x0, 0x1, 0x15, 0x2100, 0x8000] | ||||
|         } | ||||
|         dec { | ||||
|             "0d0 0d1 0d21 0d8448 0d32768" => | ||||
|             td![0, 0x1, 0x15, 0x2100, 0x8000] | ||||
|         } | ||||
|         oct { | ||||
|             "0o0 0o1 0o25 0o20400 0o100000" => | ||||
|             td![0x0, 0x1, 0x15, 0x2100, 0x8000] | ||||
|         } | ||||
|         bin { | ||||
|             "0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" => | ||||
|             td![0x0, 0x1, 0x15, 0x2100, 0x8000] | ||||
|         } | ||||
|         baseless { | ||||
|             "0 1 21 8448 32768" => | ||||
|             td![0x0, 0x1, 0x15, 0x2100, 0x8000] | ||||
|         } | ||||
|     } | ||||
| } | ||||
| mod string { | ||||
|     use super::*; | ||||
|     test_lexer_data_type! { | ||||
|         empty_string { | ||||
|             "\"\"" => | ||||
|             td![String::from("")] | ||||
|         } | ||||
|         unicode_string { | ||||
|             "\"I 💙 🦈!\"" => | ||||
|             td![String::from("I 💙 🦈!")] | ||||
|         } | ||||
|         escape_string { | ||||
|             " \"This is a shark: \\u{1f988}\" " => | ||||
|             td![String::from("This is a shark: 🦈")] | ||||
|         } | ||||
|     } | ||||
| } | ||||
| mod punct { | ||||
|     macro op($op:ident) { | ||||
|         TokenKind::Punct(Punct::$op) | ||||
|     } | ||||
|  | ||||
|     use super::*; | ||||
|     test_lexer_output_type! { | ||||
|         l_curly   { "{ {"   => [ op!(LCurly), op!(LCurly) ] } | ||||
|         r_curly   { "} }"   => [ op!(RCurly), op!(RCurly) ] } | ||||
|         l_brack   { "[ ["   => [ op!(LBrack), op!(LBrack) ] } | ||||
|         r_brack   { "] ]"   => [ op!(RBrack), op!(RBrack) ] } | ||||
|         l_paren   { "( ("   => [ op!(LParen), op!(LParen) ] } | ||||
|         r_paren   { ") )"   => [ op!(RParen), op!(RParen) ] } | ||||
|         amp       { "& &"   => [ op!(Amp), op!(Amp) ] } | ||||
|         amp_amp   { "&& &&" => [ op!(AmpAmp), op!(AmpAmp) ] } | ||||
|         amp_eq    { "&= &=" => [ op!(AmpEq), op!(AmpEq) ] } | ||||
|         arrow     { "-> ->" => [ op!(Arrow), op!(Arrow)] } | ||||
|         at        { "@ @"   => [ op!(At), op!(At)] } | ||||
|         backslash { "\\ \\" => [ op!(Backslash), op!(Backslash)] } | ||||
|         bang      { "! !"   => [ op!(Bang), op!(Bang)] } | ||||
|         bangbang  { "!! !!" => [ op!(BangBang), op!(BangBang)] } | ||||
|         bangeq    { "!= !=" => [ op!(BangEq), op!(BangEq)] } | ||||
|         bar       { "| |"   => [ op!(Bar), op!(Bar)] } | ||||
|         barbar    { "|| ||" => [ op!(BarBar), op!(BarBar)] } | ||||
|         bareq     { "|= |=" => [ op!(BarEq), op!(BarEq)] } | ||||
|         colon     { ": :"   => [ op!(Colon), op!(Colon)] } | ||||
|         comma     { ", ,"   => [ op!(Comma), op!(Comma)] } | ||||
|         dot       { ". ."   => [ op!(Dot), op!(Dot)] } | ||||
|         dotdot    { ".. .." => [ op!(DotDot), op!(DotDot)] } | ||||
|         dotdoteq  { "..= ..=" => [ op!(DotDotEq), op!(DotDotEq)] } | ||||
|         eq        { "= ="   => [ op!(Eq), op!(Eq)] } | ||||
|         eqeq      { "== ==" => [ op!(EqEq), op!(EqEq)] } | ||||
|         fatarrow  { "=> =>" => [ op!(FatArrow), op!(FatArrow)] } | ||||
|         grave     { "` `"   => [ op!(Grave), op!(Grave)] } | ||||
|         gt        { "> >"   => [ op!(Gt), op!(Gt)] } | ||||
|         gteq      { ">= >=" => [ op!(GtEq), op!(GtEq)] } | ||||
|         gtgt      { ">> >>" => [ op!(GtGt), op!(GtGt)] } | ||||
|         gtgteq    { ">>= >>=" => [ op!(GtGtEq), op!(GtGtEq)] } | ||||
|         hash      { "# #"   => [ op!(Hash), op!(Hash)] } | ||||
|         lt        { "< <"   => [ op!(Lt), op!(Lt)] } | ||||
|         lteq      { "<= <=" => [ op!(LtEq), op!(LtEq)] } | ||||
|         ltlt      { "<< <<" => [ op!(LtLt), op!(LtLt)] } | ||||
|         ltlteq    { "<<= <<=" => [ op!(LtLtEq), op!(LtLtEq)] } | ||||
|         minus     { "- -"   => [ op!(Minus), op!(Minus)] } | ||||
|         minuseq   { "-= -=" => [ op!(MinusEq), op!(MinusEq)] } | ||||
|         plus      { "+ +"   => [ op!(Plus), op!(Plus)] } | ||||
|         pluseq    { "+= +=" => [ op!(PlusEq), op!(PlusEq)] } | ||||
|         question  { "? ?"   => [ op!(Question), op!(Question)] } | ||||
|         rem       { "% %"   => [ op!(Rem), op!(Rem)] } | ||||
|         remeq     { "%= %=" => [ op!(RemEq), op!(RemEq)] } | ||||
|         semi      { "; ;"   => [ op!(Semi), op!(Semi)] } | ||||
|         slash     { "/ /"   => [ op!(Slash), op!(Slash)] } | ||||
|         slasheq   { "/= /=" => [ op!(SlashEq), op!(SlashEq)] } | ||||
|         star      { "* *"   => [ op!(Star), op!(Star)] } | ||||
|         stareq    { "*= *=" => [ op!(StarEq), op!(StarEq)] } | ||||
|         tilde     { "~ ~"   => [ op!(Tilde), op!(Tilde)] } | ||||
|         xor       { "^ ^"   => [ op!(Xor), op!(Xor)] } | ||||
|         xoreq     { "^= ^=" => [ op!(XorEq), op!(XorEq)] } | ||||
|         xorxor    { "^^ ^^" => [ op!(XorXor), op!(XorXor)] } | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user