conlang: Move all cl-libs into the compiler directory
This commit is contained in:
556
compiler/cl-lexer/src/lib.rs
Normal file
556
compiler/cl-lexer/src/lib.rs
Normal file
@@ -0,0 +1,556 @@
|
||||
//! Converts a text file into tokens
|
||||
#![warn(clippy::all)]
|
||||
#![feature(decl_macro)]
|
||||
use cl_structures::span::Loc;
|
||||
use cl_token::{TokenKind as Kind, *};
|
||||
use std::{
|
||||
iter::Peekable,
|
||||
str::{Chars, FromStr},
|
||||
};
|
||||
use unicode_ident::*;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub mod lexer_iter {
|
||||
//! Iterator over a [`Lexer`], returning [`LResult<Token>`]s
|
||||
use super::{
|
||||
error::{LResult, Reason},
|
||||
Lexer, Token,
|
||||
};
|
||||
|
||||
/// Iterator over a [`Lexer`], returning [`LResult<Token>`]s
|
||||
pub struct LexerIter<'t> {
|
||||
lexer: Lexer<'t>,
|
||||
}
|
||||
impl<'t> Iterator for LexerIter<'t> {
|
||||
type Item = LResult<Token>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.lexer.scan() {
|
||||
Ok(v) => Some(Ok(v)),
|
||||
Err(e) => {
|
||||
if e.reason == Reason::EndOfFile {
|
||||
None
|
||||
} else {
|
||||
Some(Err(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> IntoIterator for Lexer<'t> {
|
||||
type Item = LResult<Token>;
|
||||
type IntoIter = LexerIter<'t>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
LexerIter { lexer: self }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The Lexer iterates over the characters in a body of text, searching for [Tokens](Token).
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// # use cl_lexer::Lexer;
|
||||
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// // Read in your code from somewhere
|
||||
/// let some_code = "
|
||||
/// fn main () {
|
||||
/// // TODO: code goes here!
|
||||
/// }
|
||||
/// ";
|
||||
/// // Create a lexer over your code
|
||||
/// let mut lexer = Lexer::new(some_code);
|
||||
/// // Scan for a single token
|
||||
/// let first_token = lexer.scan()?;
|
||||
/// println!("{first_token:?}");
|
||||
/// // Loop over all the rest of the tokens
|
||||
/// for token in lexer {
|
||||
/// # let token: Result<_,()> = Ok(token?);
|
||||
/// match token {
|
||||
/// Ok(token) => println!("{token:?}"),
|
||||
/// Err(e) => eprintln!("{e:?}"),
|
||||
/// }
|
||||
/// }
|
||||
/// # Ok(()) }
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Lexer<'t> {
|
||||
iter: Peekable<Chars<'t>>,
|
||||
start: usize,
|
||||
start_loc: (u32, u32),
|
||||
current: usize,
|
||||
current_loc: (u32, u32),
|
||||
}
|
||||
|
||||
impl<'t> Lexer<'t> {
|
||||
/// Creates a new [Lexer] over a [str]
|
||||
pub fn new(text: &'t str) -> Self {
|
||||
Self {
|
||||
iter: text.chars().peekable(),
|
||||
start: 0,
|
||||
start_loc: (1, 1),
|
||||
current: 0,
|
||||
current_loc: (1, 1),
|
||||
}
|
||||
}
|
||||
/// Scans through the text, searching for the next [Token]
|
||||
pub fn scan(&mut self) -> LResult<Token> {
|
||||
match self.skip_whitespace().peek()? {
|
||||
'{' => self.consume()?.produce_op(Punct::LCurly),
|
||||
'}' => self.consume()?.produce_op(Punct::RCurly),
|
||||
'[' => self.consume()?.produce_op(Punct::LBrack),
|
||||
']' => self.consume()?.produce_op(Punct::RBrack),
|
||||
'(' => self.consume()?.produce_op(Punct::LParen),
|
||||
')' => self.consume()?.produce_op(Punct::RParen),
|
||||
'&' => self.consume()?.amp(),
|
||||
'@' => self.consume()?.produce_op(Punct::At),
|
||||
'\\' => self.consume()?.produce_op(Punct::Backslash),
|
||||
'!' => self.consume()?.bang(),
|
||||
'|' => self.consume()?.bar(),
|
||||
':' => self.consume()?.colon(),
|
||||
',' => self.consume()?.produce_op(Punct::Comma),
|
||||
'.' => self.consume()?.dot(),
|
||||
'=' => self.consume()?.equal(),
|
||||
'`' => self.consume()?.produce_op(Punct::Grave),
|
||||
'>' => self.consume()?.greater(),
|
||||
'#' => self.consume()?.hash(),
|
||||
'<' => self.consume()?.less(),
|
||||
'-' => self.consume()?.minus(),
|
||||
'+' => self.consume()?.plus(),
|
||||
'?' => self.consume()?.produce_op(Punct::Question),
|
||||
'%' => self.consume()?.rem(),
|
||||
';' => self.consume()?.produce_op(Punct::Semi),
|
||||
'/' => self.consume()?.slash(),
|
||||
'*' => self.consume()?.star(),
|
||||
'~' => self.consume()?.produce_op(Punct::Tilde),
|
||||
'^' => self.consume()?.xor(),
|
||||
'0' => self.consume()?.int_with_base(),
|
||||
'1'..='9' => self.digits::<10>(),
|
||||
'"' => self.consume()?.string(),
|
||||
'\'' => self.consume()?.character(),
|
||||
'_' => self.identifier(),
|
||||
i if is_xid_start(i) => self.identifier(),
|
||||
e => {
|
||||
let err = Err(Error::unexpected_char(e, self.line(), self.col()));
|
||||
let _ = self.consume();
|
||||
err
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Returns the current line
|
||||
pub fn line(&self) -> u32 {
|
||||
self.start_loc.0
|
||||
}
|
||||
/// Returns the current column
|
||||
pub fn col(&self) -> u32 {
|
||||
self.start_loc.1
|
||||
}
|
||||
fn next(&mut self) -> LResult<char> {
|
||||
let out = self.peek();
|
||||
self.consume()?;
|
||||
out
|
||||
}
|
||||
fn peek(&mut self) -> LResult<char> {
|
||||
self.iter
|
||||
.peek()
|
||||
.copied()
|
||||
.ok_or(Error::end_of_file(self.line(), self.col()))
|
||||
}
|
||||
fn produce(&mut self, kind: TokenKind, data: impl Into<TokenData>) -> LResult<Token> {
|
||||
let loc = self.start_loc;
|
||||
self.start_loc = self.current_loc;
|
||||
self.start = self.current;
|
||||
Ok(Token::new(kind, data, loc.0, loc.1))
|
||||
}
|
||||
fn produce_op(&mut self, kind: Punct) -> LResult<Token> {
|
||||
self.produce(TokenKind::Punct(kind), ())
|
||||
}
|
||||
fn skip_whitespace(&mut self) -> &mut Self {
|
||||
while let Ok(c) = self.peek() {
|
||||
if !c.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
let _ = self.consume();
|
||||
}
|
||||
self.start = self.current;
|
||||
self.start_loc = self.current_loc;
|
||||
self
|
||||
}
|
||||
fn consume(&mut self) -> LResult<&mut Self> {
|
||||
self.current += 1;
|
||||
match self.iter.next() {
|
||||
Some('\n') => {
|
||||
let (line, col) = &mut self.current_loc;
|
||||
*line += 1;
|
||||
*col = 1;
|
||||
}
|
||||
Some(_) => self.current_loc.1 += 1,
|
||||
None => Err(Error::end_of_file(self.line(), self.col()))?,
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
/// Digraphs and trigraphs
|
||||
impl<'t> Lexer<'t> {
|
||||
fn amp(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('&') => self.consume()?.produce_op(Punct::AmpAmp),
|
||||
Ok('=') => self.consume()?.produce_op(Punct::AmpEq),
|
||||
_ => self.produce_op(Punct::Amp),
|
||||
}
|
||||
}
|
||||
fn bang(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('!') => self.consume()?.produce_op(Punct::BangBang),
|
||||
Ok('=') => self.consume()?.produce_op(Punct::BangEq),
|
||||
_ => self.produce_op(Punct::Bang),
|
||||
}
|
||||
}
|
||||
fn bar(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('|') => self.consume()?.produce_op(Punct::BarBar),
|
||||
Ok('=') => self.consume()?.produce_op(Punct::BarEq),
|
||||
_ => self.produce_op(Punct::Bar),
|
||||
}
|
||||
}
|
||||
fn colon(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok(':') => self.consume()?.produce_op(Punct::ColonColon),
|
||||
_ => self.produce_op(Punct::Colon),
|
||||
}
|
||||
}
|
||||
fn dot(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('.') => {
|
||||
if let Ok('=') = self.consume()?.peek() {
|
||||
self.consume()?.produce_op(Punct::DotDotEq)
|
||||
} else {
|
||||
self.produce_op(Punct::DotDot)
|
||||
}
|
||||
}
|
||||
_ => self.produce_op(Punct::Dot),
|
||||
}
|
||||
}
|
||||
fn equal(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::EqEq),
|
||||
Ok('>') => self.consume()?.produce_op(Punct::FatArrow),
|
||||
_ => self.produce_op(Punct::Eq),
|
||||
}
|
||||
}
|
||||
fn greater(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::GtEq),
|
||||
Ok('>') => {
|
||||
if let Ok('=') = self.consume()?.peek() {
|
||||
self.consume()?.produce_op(Punct::GtGtEq)
|
||||
} else {
|
||||
self.produce_op(Punct::GtGt)
|
||||
}
|
||||
}
|
||||
_ => self.produce_op(Punct::Gt),
|
||||
}
|
||||
}
|
||||
fn hash(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('!') => self.consume()?.produce_op(Punct::HashBang),
|
||||
_ => self.produce_op(Punct::Hash),
|
||||
}
|
||||
}
|
||||
fn less(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::LtEq),
|
||||
Ok('<') => {
|
||||
if let Ok('=') = self.consume()?.peek() {
|
||||
self.consume()?.produce_op(Punct::LtLtEq)
|
||||
} else {
|
||||
self.produce_op(Punct::LtLt)
|
||||
}
|
||||
}
|
||||
_ => self.produce_op(Punct::Lt),
|
||||
}
|
||||
}
|
||||
fn minus(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::MinusEq),
|
||||
Ok('>') => self.consume()?.produce_op(Punct::Arrow),
|
||||
_ => self.produce_op(Punct::Minus),
|
||||
}
|
||||
}
|
||||
fn plus(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::PlusEq),
|
||||
_ => self.produce_op(Punct::Plus),
|
||||
}
|
||||
}
|
||||
fn rem(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::RemEq),
|
||||
_ => self.produce_op(Punct::Rem),
|
||||
}
|
||||
}
|
||||
fn slash(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::SlashEq),
|
||||
Ok('/') => self.consume()?.line_comment(),
|
||||
Ok('*') => self.consume()?.block_comment(),
|
||||
_ => self.produce_op(Punct::Slash),
|
||||
}
|
||||
}
|
||||
fn star(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::StarEq),
|
||||
_ => self.produce_op(Punct::Star),
|
||||
}
|
||||
}
|
||||
fn xor(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('=') => self.consume()?.produce_op(Punct::XorEq),
|
||||
Ok('^') => self.consume()?.produce_op(Punct::XorXor),
|
||||
_ => self.produce_op(Punct::Xor),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Comments
|
||||
impl<'t> Lexer<'t> {
|
||||
fn line_comment(&mut self) -> LResult<Token> {
|
||||
while Ok('\n') != self.peek() {
|
||||
self.consume()?;
|
||||
}
|
||||
self.produce(Kind::Comment, ())
|
||||
}
|
||||
fn block_comment(&mut self) -> LResult<Token> {
|
||||
while let Ok(c) = self.next() {
|
||||
if '*' == c && Ok('/') == self.next() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.produce(Kind::Comment, ())
|
||||
}
|
||||
}
|
||||
/// Identifiers
|
||||
impl<'t> Lexer<'t> {
|
||||
fn identifier(&mut self) -> LResult<Token> {
|
||||
let mut out = String::from(self.xid_start()?);
|
||||
while let Ok(c) = self.xid_continue() {
|
||||
out.push(c)
|
||||
}
|
||||
if let Ok(keyword) = Kind::from_str(&out) {
|
||||
self.produce(keyword, ())
|
||||
} else {
|
||||
self.produce(Kind::Identifier, TokenData::String(out))
|
||||
}
|
||||
}
|
||||
fn xid_start(&mut self) -> LResult<char> {
|
||||
match self.peek()? {
|
||||
xid if xid == '_' || is_xid_start(xid) => {
|
||||
self.consume()?;
|
||||
Ok(xid)
|
||||
}
|
||||
bad => Err(Error::not_identifier(bad, self.line(), self.col())),
|
||||
}
|
||||
}
|
||||
fn xid_continue(&mut self) -> LResult<char> {
|
||||
match self.peek()? {
|
||||
xid if is_xid_continue(xid) => {
|
||||
self.consume()?;
|
||||
Ok(xid)
|
||||
}
|
||||
bad => Err(Error::not_identifier(bad, self.line(), self.col())),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Integers
|
||||
impl<'t> Lexer<'t> {
|
||||
fn int_with_base(&mut self) -> LResult<Token> {
|
||||
match self.peek() {
|
||||
Ok('x') => self.consume()?.digits::<16>(),
|
||||
Ok('d') => self.consume()?.digits::<10>(),
|
||||
Ok('o') => self.consume()?.digits::<8>(),
|
||||
Ok('b') => self.consume()?.digits::<2>(),
|
||||
Ok('0'..='9') => self.digits::<10>(),
|
||||
_ => self.produce(Kind::Literal, 0),
|
||||
}
|
||||
}
|
||||
fn digits<const B: u32>(&mut self) -> LResult<Token> {
|
||||
let mut value = self.digit::<B>()? as u128;
|
||||
while let Ok(true) = self.peek().as_ref().map(char::is_ascii_alphanumeric) {
|
||||
value = value * B as u128 + self.digit::<B>()? as u128;
|
||||
}
|
||||
self.produce(Kind::Literal, value)
|
||||
}
|
||||
fn digit<const B: u32>(&mut self) -> LResult<u32> {
|
||||
let digit = self.peek()?;
|
||||
self.consume()?;
|
||||
digit
|
||||
.to_digit(B)
|
||||
.ok_or(Error::invalid_digit(digit, self.line(), self.col()))
|
||||
}
|
||||
}
|
||||
/// Strings and characters
|
||||
impl<'t> Lexer<'t> {
|
||||
fn string(&mut self) -> LResult<Token> {
|
||||
let mut value = String::new();
|
||||
while '"'
|
||||
!= self
|
||||
.peek()
|
||||
.map_err(|e| e.mask_reason(Reason::UnmatchedDelimiters('"')))?
|
||||
{
|
||||
value.push(self.unescape()?)
|
||||
}
|
||||
self.consume()?.produce(Kind::Literal, value)
|
||||
}
|
||||
fn character(&mut self) -> LResult<Token> {
|
||||
let out = self.unescape()?;
|
||||
match self.peek()? {
|
||||
'\'' => self.consume()?.produce(Kind::Literal, out),
|
||||
_ => Err(Error::unmatched_delimiters('\'', self.line(), self.col())),
|
||||
}
|
||||
}
|
||||
/// Unescape a single character
|
||||
fn unescape(&mut self) -> LResult<char> {
|
||||
match self.next() {
|
||||
Ok('\\') => (),
|
||||
other => return other,
|
||||
}
|
||||
Ok(match self.next()? {
|
||||
'a' => '\x07',
|
||||
'b' => '\x08',
|
||||
'f' => '\x0c',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'x' => self.hex_escape()?,
|
||||
'u' => self.unicode_escape()?,
|
||||
'0' => '\0',
|
||||
chr => chr,
|
||||
})
|
||||
}
|
||||
/// unescape a single 2-digit hex escape
|
||||
fn hex_escape(&mut self) -> LResult<char> {
|
||||
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
|
||||
char::from_u32(out).ok_or(Error::bad_unicode(out, self.line(), self.col()))
|
||||
}
|
||||
/// unescape a single \u{} unicode escape
|
||||
fn unicode_escape(&mut self) -> LResult<char> {
|
||||
let mut out = 0;
|
||||
let Ok('{') = self.peek() else {
|
||||
return Err(Error::invalid_escape('u', self.line(), self.col()));
|
||||
};
|
||||
self.consume()?;
|
||||
while let Ok(c) = self.peek() {
|
||||
match c {
|
||||
'}' => {
|
||||
self.consume()?;
|
||||
return char::from_u32(out).ok_or(Error::bad_unicode(
|
||||
out,
|
||||
self.line(),
|
||||
self.col(),
|
||||
));
|
||||
}
|
||||
_ => out = (out << 4) + self.digit::<16>()?,
|
||||
}
|
||||
}
|
||||
Err(Error::invalid_escape('u', self.line(), self.col()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> From<&Lexer<'t>> for Loc {
|
||||
fn from(value: &Lexer<'t>) -> Self {
|
||||
Loc(value.line(), value.col())
|
||||
}
|
||||
}
|
||||
|
||||
use error::{Error, LResult, Reason};
|
||||
pub mod error {
|
||||
//! [Error] type for the [Lexer](super::Lexer)
|
||||
use std::fmt::Display;
|
||||
|
||||
/// Result type with [Err] = [Error]
|
||||
pub type LResult<T> = Result<T, Error>;
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Error {
|
||||
pub reason: Reason,
|
||||
pub line: u32,
|
||||
pub col: u32,
|
||||
}
|
||||
/// The reason for the [Error]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Reason {
|
||||
/// Found an opening delimiter of type [char], but not the expected closing delimiter
|
||||
UnmatchedDelimiters(char),
|
||||
/// Found a character that doesn't belong to any [TokenKind](cl_token::TokenKind)
|
||||
UnexpectedChar(char),
|
||||
/// Found a character that's not valid in identifiers while looking for an identifier
|
||||
NotIdentifier(char),
|
||||
/// Found a character that's not valid in an escape sequence while looking for an escape
|
||||
/// sequence
|
||||
UnknownEscape(char),
|
||||
/// Escape sequence contains invalid hexadecimal digit or unmatched braces
|
||||
InvalidEscape(char),
|
||||
/// Character is not a valid digit in the requested base
|
||||
InvalidDigit(char),
|
||||
/// Base conversion requested, but the base character was not in the set of known
|
||||
/// characters
|
||||
UnknownBase(char),
|
||||
/// Unicode escape does not map to a valid unicode code-point
|
||||
BadUnicode(u32),
|
||||
/// Reached end of input
|
||||
EndOfFile,
|
||||
}
|
||||
error_impl! {
|
||||
unmatched_delimiters(c: char) => Reason::UnmatchedDelimiters(c),
|
||||
unexpected_char(c: char) => Reason::UnexpectedChar(c),
|
||||
not_identifier(c: char) => Reason::NotIdentifier(c),
|
||||
unknown_escape(e: char) => Reason::UnknownEscape(e),
|
||||
invalid_escape(e: char) => Reason::InvalidEscape(e),
|
||||
invalid_digit(digit: char) => Reason::InvalidDigit(digit),
|
||||
unknown_base(base: char) => Reason::UnknownBase(base),
|
||||
bad_unicode(value: u32) => Reason::BadUnicode(value),
|
||||
end_of_file => Reason::EndOfFile,
|
||||
}
|
||||
impl Error {
|
||||
/// Changes the [Reason] of this error
|
||||
pub(super) fn mask_reason(self, reason: Reason) -> Self {
|
||||
Self { reason, ..self }
|
||||
}
|
||||
/// Returns the [Reason] for this error
|
||||
pub fn reason(&self) -> &Reason {
|
||||
&self.reason
|
||||
}
|
||||
/// Returns the (line, col) where the error happened
|
||||
pub fn location(&self) -> (u32, u32) {
|
||||
(self.line, self.col)
|
||||
}
|
||||
}
|
||||
macro error_impl ($($fn:ident$(( $($p:ident: $t:ty),* ))? => $reason:expr),*$(,)?) {
|
||||
#[allow(dead_code)]
|
||||
impl Error {
|
||||
$(pub(super) fn $fn ($($($p: $t),*,)? line: u32, col: u32) -> Self {
|
||||
Self { reason: $reason, line, col }
|
||||
})*
|
||||
}
|
||||
}
|
||||
impl std::error::Error for Error {}
|
||||
impl Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}:{}: {}", self.line, self.col, self.reason)
|
||||
}
|
||||
}
|
||||
impl Display for Reason {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Reason::UnmatchedDelimiters(c) => write! {f, "Unmatched `{c}` in input"},
|
||||
Reason::UnexpectedChar(c) => write!(f, "Character `{c}` not expected"),
|
||||
Reason::NotIdentifier(c) => write!(f, "Character `{c}` not valid in identifiers"),
|
||||
Reason::UnknownEscape(c) => write!(f, "`\\{c}` is not a known escape sequence"),
|
||||
Reason::InvalidEscape(c) => write!(f, "Escape sequence `\\{c}`... is malformed"),
|
||||
Reason::InvalidDigit(c) => write!(f, "`{c}` is not a valid digit"),
|
||||
Reason::UnknownBase(c) => write!(f, "`0{c}`... is not a valid base"),
|
||||
Reason::BadUnicode(c) => write!(f, "`{c}` is not a valid unicode code-point"),
|
||||
Reason::EndOfFile => write!(f, "Reached end of input"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
171
compiler/cl-lexer/src/tests.rs
Normal file
171
compiler/cl-lexer/src/tests.rs
Normal file
@@ -0,0 +1,171 @@
|
||||
use crate::Lexer;
|
||||
use cl_token::*;
|
||||
|
||||
macro test_lexer_output_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
|
||||
#[test]
|
||||
fn $f() {$(
|
||||
assert_eq!(
|
||||
Lexer::new($test)
|
||||
.into_iter()
|
||||
.map(|t| t.unwrap().ty())
|
||||
.collect::<Vec<_>>(),
|
||||
dbg!($expect)
|
||||
);
|
||||
)*}
|
||||
)*}
|
||||
|
||||
macro test_lexer_data_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
|
||||
#[test]
|
||||
fn $f() {$(
|
||||
assert_eq!(
|
||||
Lexer::new($test)
|
||||
.into_iter()
|
||||
.map(|t| t.unwrap().into_data())
|
||||
.collect::<Vec<_>>(),
|
||||
dbg!($expect)
|
||||
);
|
||||
)*}
|
||||
)*}
|
||||
|
||||
/// Convert an `[ expr, ... ]` into a `[ *, ... ]`
|
||||
macro td ($($id:expr),*) {
|
||||
[$($id.into()),*]
|
||||
}
|
||||
|
||||
mod ident {
|
||||
use super::*;
|
||||
macro ident ($($id:literal),*) {
|
||||
[$(TokenData::String($id.into())),*]
|
||||
}
|
||||
test_lexer_data_type! {
|
||||
underscore { "_ _" => ident!["_", "_"] }
|
||||
unicode { "_ε ε_" => ident!["_ε", "ε_"] }
|
||||
many_underscore { "____________________________________" =>
|
||||
ident!["____________________________________"] }
|
||||
}
|
||||
}
|
||||
mod keyword {
|
||||
use super::*;
|
||||
macro kw($($k:ident),*) {
|
||||
[ $(TokenKind::$k,)* ]
|
||||
}
|
||||
test_lexer_output_type! {
|
||||
kw_break { "break break" => kw![Break, Break] }
|
||||
kw_continue { "continue continue" => kw![Continue, Continue] }
|
||||
kw_else { "else else" => kw![Else, Else] }
|
||||
kw_false { "false false" => kw![False, False] }
|
||||
kw_for { "for for" => kw![For, For] }
|
||||
kw_fn { "fn fn" => kw![Fn, Fn] }
|
||||
kw_if { "if if" => kw![If, If] }
|
||||
kw_in { "in in" => kw![In, In] }
|
||||
kw_let { "let let" => kw![Let, Let] }
|
||||
kw_return { "return return" => kw![Return, Return] }
|
||||
kw_true { "true true" => kw![True, True] }
|
||||
kw_while { "while while" => kw![While, While] }
|
||||
keywords { "break continue else false for fn if in let return true while" =>
|
||||
kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] }
|
||||
}
|
||||
}
|
||||
mod integer {
|
||||
use super::*;
|
||||
test_lexer_data_type! {
|
||||
hex {
|
||||
"0x0 0x1 0x15 0x2100 0x8000" =>
|
||||
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
dec {
|
||||
"0d0 0d1 0d21 0d8448 0d32768" =>
|
||||
td![0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
oct {
|
||||
"0o0 0o1 0o25 0o20400 0o100000" =>
|
||||
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
bin {
|
||||
"0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" =>
|
||||
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
baseless {
|
||||
"0 1 21 8448 32768" =>
|
||||
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
|
||||
}
|
||||
}
|
||||
}
|
||||
mod string {
|
||||
use super::*;
|
||||
test_lexer_data_type! {
|
||||
empty_string {
|
||||
"\"\"" =>
|
||||
td![String::from("")]
|
||||
}
|
||||
unicode_string {
|
||||
"\"I 💙 🦈!\"" =>
|
||||
td![String::from("I 💙 🦈!")]
|
||||
}
|
||||
escape_string {
|
||||
" \"This is a shark: \\u{1f988}\" " =>
|
||||
td![String::from("This is a shark: 🦈")]
|
||||
}
|
||||
}
|
||||
}
|
||||
mod punct {
|
||||
macro op($op:ident) {
|
||||
TokenKind::Punct(Punct::$op)
|
||||
}
|
||||
|
||||
use super::*;
|
||||
test_lexer_output_type! {
|
||||
l_curly { "{ {" => [ op!(LCurly), op!(LCurly) ] }
|
||||
r_curly { "} }" => [ op!(RCurly), op!(RCurly) ] }
|
||||
l_brack { "[ [" => [ op!(LBrack), op!(LBrack) ] }
|
||||
r_brack { "] ]" => [ op!(RBrack), op!(RBrack) ] }
|
||||
l_paren { "( (" => [ op!(LParen), op!(LParen) ] }
|
||||
r_paren { ") )" => [ op!(RParen), op!(RParen) ] }
|
||||
amp { "& &" => [ op!(Amp), op!(Amp) ] }
|
||||
amp_amp { "&& &&" => [ op!(AmpAmp), op!(AmpAmp) ] }
|
||||
amp_eq { "&= &=" => [ op!(AmpEq), op!(AmpEq) ] }
|
||||
arrow { "-> ->" => [ op!(Arrow), op!(Arrow)] }
|
||||
at { "@ @" => [ op!(At), op!(At)] }
|
||||
backslash { "\\ \\" => [ op!(Backslash), op!(Backslash)] }
|
||||
bang { "! !" => [ op!(Bang), op!(Bang)] }
|
||||
bangbang { "!! !!" => [ op!(BangBang), op!(BangBang)] }
|
||||
bangeq { "!= !=" => [ op!(BangEq), op!(BangEq)] }
|
||||
bar { "| |" => [ op!(Bar), op!(Bar)] }
|
||||
barbar { "|| ||" => [ op!(BarBar), op!(BarBar)] }
|
||||
bareq { "|= |=" => [ op!(BarEq), op!(BarEq)] }
|
||||
colon { ": :" => [ op!(Colon), op!(Colon)] }
|
||||
comma { ", ," => [ op!(Comma), op!(Comma)] }
|
||||
dot { ". ." => [ op!(Dot), op!(Dot)] }
|
||||
dotdot { ".. .." => [ op!(DotDot), op!(DotDot)] }
|
||||
dotdoteq { "..= ..=" => [ op!(DotDotEq), op!(DotDotEq)] }
|
||||
eq { "= =" => [ op!(Eq), op!(Eq)] }
|
||||
eqeq { "== ==" => [ op!(EqEq), op!(EqEq)] }
|
||||
fatarrow { "=> =>" => [ op!(FatArrow), op!(FatArrow)] }
|
||||
grave { "` `" => [ op!(Grave), op!(Grave)] }
|
||||
gt { "> >" => [ op!(Gt), op!(Gt)] }
|
||||
gteq { ">= >=" => [ op!(GtEq), op!(GtEq)] }
|
||||
gtgt { ">> >>" => [ op!(GtGt), op!(GtGt)] }
|
||||
gtgteq { ">>= >>=" => [ op!(GtGtEq), op!(GtGtEq)] }
|
||||
hash { "# #" => [ op!(Hash), op!(Hash)] }
|
||||
lt { "< <" => [ op!(Lt), op!(Lt)] }
|
||||
lteq { "<= <=" => [ op!(LtEq), op!(LtEq)] }
|
||||
ltlt { "<< <<" => [ op!(LtLt), op!(LtLt)] }
|
||||
ltlteq { "<<= <<=" => [ op!(LtLtEq), op!(LtLtEq)] }
|
||||
minus { "- -" => [ op!(Minus), op!(Minus)] }
|
||||
minuseq { "-= -=" => [ op!(MinusEq), op!(MinusEq)] }
|
||||
plus { "+ +" => [ op!(Plus), op!(Plus)] }
|
||||
pluseq { "+= +=" => [ op!(PlusEq), op!(PlusEq)] }
|
||||
question { "? ?" => [ op!(Question), op!(Question)] }
|
||||
rem { "% %" => [ op!(Rem), op!(Rem)] }
|
||||
remeq { "%= %=" => [ op!(RemEq), op!(RemEq)] }
|
||||
semi { "; ;" => [ op!(Semi), op!(Semi)] }
|
||||
slash { "/ /" => [ op!(Slash), op!(Slash)] }
|
||||
slasheq { "/= /=" => [ op!(SlashEq), op!(SlashEq)] }
|
||||
star { "* *" => [ op!(Star), op!(Star)] }
|
||||
stareq { "*= *=" => [ op!(StarEq), op!(StarEq)] }
|
||||
tilde { "~ ~" => [ op!(Tilde), op!(Tilde)] }
|
||||
xor { "^ ^" => [ op!(Xor), op!(Xor)] }
|
||||
xoreq { "^= ^=" => [ op!(XorEq), op!(XorEq)] }
|
||||
xorxor { "^^ ^^" => [ op!(XorXor), op!(XorXor)] }
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user