cl-lexer: Move lexer into its own crate

This commit is contained in:
John 2024-02-29 20:58:50 -06:00
parent abf00f383c
commit 50b473cd55
15 changed files with 198 additions and 188 deletions

View File

@ -7,6 +7,7 @@ members = [
"cl-token",
"cl-ast",
"cl-parser",
"cl-lexer",
]
resolver = "2"

View File

@ -13,5 +13,5 @@ cl-structures = { path = "../cl-structures" }
[dev-dependencies]
conlang = { path = "../libconlang" }
cl-lexer = { path = "../cl-lexer" }
cl-parser = { path = "../cl-parser" }

View File

@ -2,7 +2,7 @@
use crate::{env::Environment, temp_type_impl::ConValue, Interpret};
use cl_ast::*;
use cl_parser::Parser;
use conlang::lexer::Lexer;
use cl_lexer::Lexer;
pub use macros::*;
mod macros {

13
cl-lexer/Cargo.toml Normal file
View File

@ -0,0 +1,13 @@
[package]
name = "cl-lexer"
repository.workspace = true
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true
publish.workspace = true
[dependencies]
cl-token = { path = "../cl-token" }
cl-structures = { path = "../cl-structures" }
unicode-xid = "0.2.4"

View File

@ -1,12 +1,16 @@
//! Converts a text file into tokens
use cl_token::*;
#![feature(decl_macro)]
use cl_structures::span::Loc;
use cl_token::*;
use std::{
iter::Peekable,
str::{Chars, FromStr},
};
use unicode_xid::UnicodeXID;
#[cfg(test)]
mod tests;
pub mod lexer_iter {
//! Iterator over a [`Lexer`], returning [`LResult<Token>`]s
use super::{

167
cl-lexer/src/tests.rs Normal file
View File

@ -0,0 +1,167 @@
use crate::Lexer;
use cl_token::*;
macro test_lexer_output_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
#[test]
fn $f() {$(
assert_eq!(
Lexer::new($test)
.into_iter()
.map(|t| t.unwrap().ty())
.collect::<Vec<_>>(),
dbg!($expect)
);
)*}
)*}
macro test_lexer_data_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
#[test]
fn $f() {$(
assert_eq!(
Lexer::new($test)
.into_iter()
.map(|t| t.unwrap().into_data())
.collect::<Vec<_>>(),
dbg!($expect)
);
)*}
)*}
/// Convert an `[ expr, ... ]` into a `[ *, ... ]`
macro td ($($id:expr),*) {
[$($id.into()),*]
}
mod ident {
use super::*;
macro ident ($($id:literal),*) {
[$(Data::Identifier($id.into())),*]
}
test_lexer_data_type! {
underscore { "_ _" => ident!["_", "_"] }
unicode { "_ε ε_" => ident!["", "ε_"] }
many_underscore { "____________________________________" =>
ident!["____________________________________"] }
}
}
mod keyword {
use super::*;
macro kw($($k:ident),*) {
[ $(Type::Keyword(Keyword::$k),)* ]
}
test_lexer_output_type! {
kw_break { "break break" => kw![Break, Break] }
kw_continue { "continue continue" => kw![Continue, Continue] }
kw_else { "else else" => kw![Else, Else] }
kw_false { "false false" => kw![False, False] }
kw_for { "for for" => kw![For, For] }
kw_fn { "fn fn" => kw![Fn, Fn] }
kw_if { "if if" => kw![If, If] }
kw_in { "in in" => kw![In, In] }
kw_let { "let let" => kw![Let, Let] }
kw_return { "return return" => kw![Return, Return] }
kw_true { "true true" => kw![True, True] }
kw_while { "while while" => kw![While, While] }
keywords { "break continue else false for fn if in let return true while" =>
kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] }
}
}
mod integer {
use super::*;
test_lexer_data_type! {
hex {
"0x0 0x1 0x15 0x2100 0x8000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
dec {
"0d0 0d1 0d21 0d8448 0d32768" =>
td![0, 0x1, 0x15, 0x2100, 0x8000]
}
oct {
"0o0 0o1 0o25 0o20400 0o100000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
bin {
"0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
baseless {
"0 1 21 8448 32768" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
}
}
mod string {
use super::*;
test_lexer_data_type! {
empty_string {
"\"\"" =>
td![String::from("")]
}
unicode_string {
"\"I 💙 🦈!\"" =>
td![String::from("I 💙 🦈!")]
}
escape_string {
" \"This is a shark: \\u{1f988}\" " =>
td![String::from("This is a shark: 🦈")]
}
}
}
mod punct {
use super::*;
test_lexer_output_type! {
l_curly { "{ {" => [ Type::LCurly, Type::LCurly ] }
r_curly { "} }" => [ Type::RCurly, Type::RCurly ] }
l_brack { "[ [" => [ Type::LBrack, Type::LBrack ] }
r_brack { "] ]" => [ Type::RBrack, Type::RBrack ] }
l_paren { "( (" => [ Type::LParen, Type::LParen ] }
r_paren { ") )" => [ Type::RParen, Type::RParen ] }
amp { "& &" => [ Type::Amp, Type::Amp ] }
amp_amp { "&& &&" => [ Type::AmpAmp, Type::AmpAmp ] }
amp_eq { "&= &=" => [ Type::AmpEq, Type::AmpEq ] }
arrow { "-> ->" => [ Type::Arrow, Type::Arrow] }
at { "@ @" => [ Type::At, Type::At] }
backslash { "\\ \\" => [ Type::Backslash, Type::Backslash] }
bang { "! !" => [ Type::Bang, Type::Bang] }
bangbang { "!! !!" => [ Type::BangBang, Type::BangBang] }
bangeq { "!= !=" => [ Type::BangEq, Type::BangEq] }
bar { "| |" => [ Type::Bar, Type::Bar] }
barbar { "|| ||" => [ Type::BarBar, Type::BarBar] }
bareq { "|= |=" => [ Type::BarEq, Type::BarEq] }
colon { ": :" => [ Type::Colon, Type::Colon] }
comma { ", ," => [ Type::Comma, Type::Comma] }
dot { ". ." => [ Type::Dot, Type::Dot] }
dotdot { ".. .." => [ Type::DotDot, Type::DotDot] }
dotdoteq { "..= ..=" => [ Type::DotDotEq, Type::DotDotEq] }
eq { "= =" => [ Type::Eq, Type::Eq] }
eqeq { "== ==" => [ Type::EqEq, Type::EqEq] }
fatarrow { "=> =>" => [ Type::FatArrow, Type::FatArrow] }
grave { "` `" => [ Type::Grave, Type::Grave] }
gt { "> >" => [ Type::Gt, Type::Gt] }
gteq { ">= >=" => [ Type::GtEq, Type::GtEq] }
gtgt { ">> >>" => [ Type::GtGt, Type::GtGt] }
gtgteq { ">>= >>=" => [ Type::GtGtEq, Type::GtGtEq] }
hash { "# #" => [ Type::Hash, Type::Hash] }
lt { "< <" => [ Type::Lt, Type::Lt] }
lteq { "<= <=" => [ Type::LtEq, Type::LtEq] }
ltlt { "<< <<" => [ Type::LtLt, Type::LtLt] }
ltlteq { "<<= <<=" => [ Type::LtLtEq, Type::LtLtEq] }
minus { "- -" => [ Type::Minus, Type::Minus] }
minuseq { "-= -=" => [ Type::MinusEq, Type::MinusEq] }
plus { "+ +" => [ Type::Plus, Type::Plus] }
pluseq { "+= +=" => [ Type::PlusEq, Type::PlusEq] }
question { "? ?" => [ Type::Question, Type::Question] }
rem { "% %" => [ Type::Rem, Type::Rem] }
remeq { "%= %=" => [ Type::RemEq, Type::RemEq] }
semi { "; ;" => [ Type::Semi, Type::Semi] }
slash { "/ /" => [ Type::Slash, Type::Slash] }
slasheq { "/= /=" => [ Type::SlashEq, Type::SlashEq] }
star { "* *" => [ Type::Star, Type::Star] }
stareq { "*= *=" => [ Type::StarEq, Type::StarEq] }
tilde { "~ ~" => [ Type::Tilde, Type::Tilde] }
xor { "^ ^" => [ Type::Xor, Type::Xor] }
xoreq { "^= ^=" => [ Type::XorEq, Type::XorEq] }
xorxor { "^^ ^^" => [ Type::XorXor, Type::XorXor] }
}
}

View File

@ -9,6 +9,6 @@ publish.workspace = true
[dependencies]
cl-ast = { path = "../cl-ast" }
cl-lexer = { path = "../cl-lexer" }
cl-token = { path = "../cl-token" }
cl-structures = { path = "../cl-structures" }
conlang = { path = "../libconlang" }

View File

@ -1,6 +1,6 @@
use super::*;
use conlang::lexer::error::{Error as LexError, Reason};
use cl_lexer::error::{Error as LexError, Reason};
use std::fmt::Display;
pub type PResult<T> = Result<T, Error>;

View File

@ -5,7 +5,7 @@ use crate::error::{
PResult, Parsing,
};
use cl_ast::*;
use conlang::lexer::Lexer;
use cl_lexer::Lexer;
/// Parses a sequence of [Tokens](Token) into an [AST](cl_ast)
pub struct Parser<'t> {
@ -905,11 +905,7 @@ impl<'t> Parser<'t> {
/// [Block] = `{` [Stmt]* `}`
pub fn block(&mut self) -> PResult<Block> {
const PARSING: Parsing = Parsing::Block;
Ok(
Block {
stmts: delim(rep(Self::stmt, CURLIES.1, PARSING), CURLIES, PARSING)(self)?,
},
)
Ok(Block { stmts: delim(rep(Self::stmt, CURLIES.1, PARSING), CURLIES, PARSING)(self)? })
}
}
/// ## Control flow subexpressions

View File

@ -12,6 +12,7 @@ publish.workspace = true
[dependencies]
conlang = { path = "../libconlang" }
cl-ast = { path = "../cl-ast" }
cl-lexer = { path = "../cl-lexer" }
cl-token = { path = "../cl-token" }
cl-parser = { path = "../cl-parser" }
cl-interpret = { path = "../cl-interpret" }

View File

@ -1,9 +1,9 @@
//! Collects identifiers into a list
use cl_lexer::Lexer;
use cl_parser::Parser;
use cl_repl::repline::Repline;
use cl_structures::span::Loc;
use conlang::lexer::Lexer;
use std::{
collections::HashMap,
error::Error,

View File

@ -1,7 +1,7 @@
//! This example grabs input from stdin, lexes it, and prints which lexer rules matched
#![allow(unused_imports)]
use cl_lexer::Lexer;
use cl_token::Token;
use conlang::lexer::Lexer;
use std::{
error::Error,
io::{stdin, IsTerminal, Read},

View File

@ -74,12 +74,9 @@ pub mod program {
};
use cl_ast::{self as ast, ast_impl::format::Pretty};
use cl_lexer::Lexer;
use cl_parser::{error::PResult, Parser};
use conlang::{
// pretty_printer::{PrettyPrintable, Printer},
lexer::Lexer,
resolver::{error::TyResult, Resolver},
};
use conlang::resolver::{error::TyResult, Resolver};
use std::{fmt::Display, io::Write};
pub struct Parsable;
@ -228,7 +225,7 @@ pub mod cli {
match (repl, path) {
(true, Some(path)) => {
let prog = std::fs::read_to_string(path).unwrap();
let code = cl_parser::Parser::new(conlang::lexer::Lexer::new(&prog))
let code = cl_parser::Parser::new(cl_lexer::Lexer::new(&prog))
.file()
.unwrap();
let mut env = cl_interpret::env::Environment::new();

View File

@ -2,8 +2,6 @@
#![warn(clippy::all)]
#![feature(decl_macro)]
pub mod lexer;
pub mod resolver;
#[cfg(test)]

View File

@ -5,173 +5,6 @@ mod ast {
// TODO
}
mod lexer {
use crate::lexer::Lexer;
use cl_token::*;
macro test_lexer_output_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
#[test]
fn $f() {$(
assert_eq!(
Lexer::new($test)
.into_iter()
.map(|t| t.unwrap().ty())
.collect::<Vec<_>>(),
dbg!($expect)
);
)*}
)*}
macro test_lexer_data_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
#[test]
fn $f() {$(
assert_eq!(
Lexer::new($test)
.into_iter()
.map(|t| t.unwrap().into_data())
.collect::<Vec<_>>(),
dbg!($expect)
);
)*}
)*}
/// Convert an `[ expr, ... ]` into a `[ *, ... ]`
macro td ($($id:expr),*) {
[$($id.into()),*]
}
mod ident {
use super::*;
macro ident ($($id:literal),*) {
[$(Data::Identifier($id.into())),*]
}
test_lexer_data_type! {
underscore { "_ _" => ident!["_", "_"] }
unicode { "_ε ε_" => ident!["", "ε_"] }
many_underscore { "____________________________________" =>
ident!["____________________________________"] }
}
}
mod keyword {
use super::*;
macro kw($($k:ident),*) {
[ $(Type::Keyword(Keyword::$k),)* ]
}
test_lexer_output_type! {
kw_break { "break break" => kw![Break, Break] }
kw_continue { "continue continue" => kw![Continue, Continue] }
kw_else { "else else" => kw![Else, Else] }
kw_false { "false false" => kw![False, False] }
kw_for { "for for" => kw![For, For] }
kw_fn { "fn fn" => kw![Fn, Fn] }
kw_if { "if if" => kw![If, If] }
kw_in { "in in" => kw![In, In] }
kw_let { "let let" => kw![Let, Let] }
kw_return { "return return" => kw![Return, Return] }
kw_true { "true true" => kw![True, True] }
kw_while { "while while" => kw![While, While] }
keywords { "break continue else false for fn if in let return true while" =>
kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] }
}
}
mod integer {
use super::*;
test_lexer_data_type! {
hex {
"0x0 0x1 0x15 0x2100 0x8000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
dec {
"0d0 0d1 0d21 0d8448 0d32768" =>
td![0, 0x1, 0x15, 0x2100, 0x8000]
}
oct {
"0o0 0o1 0o25 0o20400 0o100000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
bin {
"0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
baseless {
"0 1 21 8448 32768" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
}
}
mod string {
use super::*;
test_lexer_data_type! {
empty_string {
"\"\"" =>
td![String::from("")]
}
unicode_string {
"\"I 💙 🦈!\"" =>
td![String::from("I 💙 🦈!")]
}
escape_string {
" \"This is a shark: \\u{1f988}\" " =>
td![String::from("This is a shark: 🦈")]
}
}
}
mod punct {
use super::*;
test_lexer_output_type! {
l_curly { "{ {" => [ Type::LCurly, Type::LCurly ] }
r_curly { "} }" => [ Type::RCurly, Type::RCurly ] }
l_brack { "[ [" => [ Type::LBrack, Type::LBrack ] }
r_brack { "] ]" => [ Type::RBrack, Type::RBrack ] }
l_paren { "( (" => [ Type::LParen, Type::LParen ] }
r_paren { ") )" => [ Type::RParen, Type::RParen ] }
amp { "& &" => [ Type::Amp, Type::Amp ] }
amp_amp { "&& &&" => [ Type::AmpAmp, Type::AmpAmp ] }
amp_eq { "&= &=" => [ Type::AmpEq, Type::AmpEq ] }
arrow { "-> ->" => [ Type::Arrow, Type::Arrow] }
at { "@ @" => [ Type::At, Type::At] }
backslash { "\\ \\" => [ Type::Backslash, Type::Backslash] }
bang { "! !" => [ Type::Bang, Type::Bang] }
bangbang { "!! !!" => [ Type::BangBang, Type::BangBang] }
bangeq { "!= !=" => [ Type::BangEq, Type::BangEq] }
bar { "| |" => [ Type::Bar, Type::Bar] }
barbar { "|| ||" => [ Type::BarBar, Type::BarBar] }
bareq { "|= |=" => [ Type::BarEq, Type::BarEq] }
colon { ": :" => [ Type::Colon, Type::Colon] }
comma { ", ," => [ Type::Comma, Type::Comma] }
dot { ". ." => [ Type::Dot, Type::Dot] }
dotdot { ".. .." => [ Type::DotDot, Type::DotDot] }
dotdoteq { "..= ..=" => [ Type::DotDotEq, Type::DotDotEq] }
eq { "= =" => [ Type::Eq, Type::Eq] }
eqeq { "== ==" => [ Type::EqEq, Type::EqEq] }
fatarrow { "=> =>" => [ Type::FatArrow, Type::FatArrow] }
grave { "` `" => [ Type::Grave, Type::Grave] }
gt { "> >" => [ Type::Gt, Type::Gt] }
gteq { ">= >=" => [ Type::GtEq, Type::GtEq] }
gtgt { ">> >>" => [ Type::GtGt, Type::GtGt] }
gtgteq { ">>= >>=" => [ Type::GtGtEq, Type::GtGtEq] }
hash { "# #" => [ Type::Hash, Type::Hash] }
lt { "< <" => [ Type::Lt, Type::Lt] }
lteq { "<= <=" => [ Type::LtEq, Type::LtEq] }
ltlt { "<< <<" => [ Type::LtLt, Type::LtLt] }
ltlteq { "<<= <<=" => [ Type::LtLtEq, Type::LtLtEq] }
minus { "- -" => [ Type::Minus, Type::Minus] }
minuseq { "-= -=" => [ Type::MinusEq, Type::MinusEq] }
plus { "+ +" => [ Type::Plus, Type::Plus] }
pluseq { "+= +=" => [ Type::PlusEq, Type::PlusEq] }
question { "? ?" => [ Type::Question, Type::Question] }
rem { "% %" => [ Type::Rem, Type::Rem] }
remeq { "%= %=" => [ Type::RemEq, Type::RemEq] }
semi { "; ;" => [ Type::Semi, Type::Semi] }
slash { "/ /" => [ Type::Slash, Type::Slash] }
slasheq { "/= /=" => [ Type::SlashEq, Type::SlashEq] }
star { "* *" => [ Type::Star, Type::Star] }
stareq { "*= *=" => [ Type::StarEq, Type::StarEq] }
tilde { "~ ~" => [ Type::Tilde, Type::Tilde] }
xor { "^ ^" => [ Type::Xor, Type::Xor] }
xoreq { "^= ^=" => [ Type::XorEq, Type::XorEq] }
xorxor { "^^ ^^" => [ Type::XorXor, Type::XorXor] }
}
}
}
mod parser {
// TODO