cl-lexer: Move lexer into its own crate

2024-02-29 20:58:50 -06:00 · 2024-02-29 20:58:50 -06:00 · 50b473cd55
commit 50b473cd55
parent abf00f383c
15 changed files with 198 additions and 188 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -7,6 +7,7 @@ members = [
    "cl-token",
    "cl-ast",
    "cl-parser",
+    "cl-lexer",
 ]
 resolver = "2"

--- a/cl-interpret/Cargo.toml
+++ b/cl-interpret/Cargo.toml
@ -13,5 +13,5 @@ cl-structures = { path = "../cl-structures" }


 [dev-dependencies]
-conlang = { path = "../libconlang" }
+cl-lexer = { path = "../cl-lexer" }
 cl-parser = { path = "../cl-parser" }
--- a/cl-interpret/src/tests.rs
+++ b/cl-interpret/src/tests.rs
@ -2,7 +2,7 @@
 use crate::{env::Environment, temp_type_impl::ConValue, Interpret};
 use cl_ast::*;
 use cl_parser::Parser;
-use conlang::lexer::Lexer;
+use cl_lexer::Lexer;
 pub use macros::*;

 mod macros {
--- a/cl-lexer/Cargo.toml
+++ b/cl-lexer/Cargo.toml
@ -0,0 +1,13 @@
+[package]
+name = "cl-lexer"
+repository.workspace = true
+version.workspace = true
+authors.workspace = true
+edition.workspace = true
+license.workspace = true
+publish.workspace = true
+
+[dependencies]
+cl-token = { path = "../cl-token" }
+cl-structures = { path = "../cl-structures" }
+unicode-xid = "0.2.4"
--- a/libconlang/src/lexer.rs
+++ b/libconlang/src/lexer.rs
@ -1,12 +1,16 @@
 //! Converts a text file into tokens
-use cl_token::*;
+#![feature(decl_macro)]
 use cl_structures::span::Loc;
+use cl_token::*;
 use std::{
    iter::Peekable,
    str::{Chars, FromStr},
 };
 use unicode_xid::UnicodeXID;

+#[cfg(test)]
+mod tests;
+
 pub mod lexer_iter {
    //! Iterator over a [`Lexer`], returning [`LResult<Token>`]s
    use super::{
--- a/cl-lexer/src/tests.rs
+++ b/cl-lexer/src/tests.rs
@ -0,0 +1,167 @@
+use crate::Lexer;
+use cl_token::*;
+
+macro test_lexer_output_type  ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
+    #[test]
+    fn $f() {$(
+        assert_eq!(
+            Lexer::new($test)
+                .into_iter()
+                .map(|t| t.unwrap().ty())
+                .collect::<Vec<_>>(),
+            dbg!($expect)
+        );
+    )*}
+)*}
+
+macro test_lexer_data_type  ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
+    #[test]
+    fn $f() {$(
+        assert_eq!(
+            Lexer::new($test)
+                .into_iter()
+                .map(|t| t.unwrap().into_data())
+                .collect::<Vec<_>>(),
+            dbg!($expect)
+        );
+    )*}
+)*}
+
+/// Convert an `[ expr, ... ]` into a `[ *, ... ]`
+macro td ($($id:expr),*) {
+    [$($id.into()),*]
+}
+
+mod ident {
+    use super::*;
+    macro ident ($($id:literal),*) {
+        [$(Data::Identifier($id.into())),*]
+    }
+    test_lexer_data_type! {
+        underscore { "_ _" => ident!["_", "_"] }
+        unicode { "_ε ε_" => ident!["_ε", "ε_"] }
+        many_underscore { "____________________________________" =>
+        ident!["____________________________________"] }
+    }
+}
+mod keyword {
+    use super::*;
+    macro kw($($k:ident),*) {
+        [ $(Type::Keyword(Keyword::$k),)* ]
+    }
+    test_lexer_output_type! {
+        kw_break { "break break" => kw![Break, Break] }
+        kw_continue { "continue continue" => kw![Continue, Continue] }
+        kw_else { "else else" => kw![Else, Else] }
+        kw_false { "false false" => kw![False, False] }
+        kw_for { "for for" => kw![For, For] }
+        kw_fn { "fn fn" => kw![Fn, Fn] }
+        kw_if { "if if" => kw![If, If] }
+        kw_in { "in in" => kw![In, In] }
+        kw_let { "let let" => kw![Let, Let] }
+        kw_return { "return return" => kw![Return, Return] }
+        kw_true { "true true" => kw![True, True] }
+        kw_while { "while while" => kw![While, While] }
+        keywords { "break continue else false for fn if in let return true while" =>
+            kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] }
+    }
+}
+mod integer {
+    use super::*;
+    test_lexer_data_type! {
+        hex {
+            "0x0 0x1 0x15 0x2100 0x8000" =>
+            td![0x0, 0x1, 0x15, 0x2100, 0x8000]
+        }
+        dec {
+            "0d0 0d1 0d21 0d8448 0d32768" =>
+            td![0, 0x1, 0x15, 0x2100, 0x8000]
+        }
+        oct {
+            "0o0 0o1 0o25 0o20400 0o100000" =>
+            td![0x0, 0x1, 0x15, 0x2100, 0x8000]
+        }
+        bin {
+            "0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" =>
+            td![0x0, 0x1, 0x15, 0x2100, 0x8000]
+        }
+        baseless {
+            "0 1 21 8448 32768" =>
+            td![0x0, 0x1, 0x15, 0x2100, 0x8000]
+        }
+    }
+}
+mod string {
+    use super::*;
+    test_lexer_data_type! {
+        empty_string {
+            "\"\"" =>
+            td![String::from("")]
+        }
+        unicode_string {
+            "\"I 💙 🦈!\"" =>
+            td![String::from("I 💙 🦈!")]
+        }
+        escape_string {
+            " \"This is a shark: \\u{1f988}\" " =>
+            td![String::from("This is a shark: 🦈")]
+        }
+    }
+}
+mod punct {
+    use super::*;
+    test_lexer_output_type! {
+        l_curly   { "{ {"   => [ Type::LCurly, Type::LCurly ] }
+        r_curly   { "} }"   => [ Type::RCurly, Type::RCurly ] }
+        l_brack   { "[ ["   => [ Type::LBrack, Type::LBrack ] }
+        r_brack   { "] ]"   => [ Type::RBrack, Type::RBrack ] }
+        l_paren   { "( ("   => [ Type::LParen, Type::LParen ] }
+        r_paren   { ") )"   => [ Type::RParen, Type::RParen ] }
+        amp       { "& &"   => [ Type::Amp, Type::Amp ] }
+        amp_amp   { "&& &&" => [ Type::AmpAmp, Type::AmpAmp ] }
+        amp_eq    { "&= &=" => [ Type::AmpEq, Type::AmpEq ] }
+        arrow     { "-> ->" => [ Type::Arrow, Type::Arrow] }
+        at        { "@ @"   => [ Type::At, Type::At] }
+        backslash { "\\ \\" => [ Type::Backslash, Type::Backslash] }
+        bang      { "! !"   => [ Type::Bang, Type::Bang] }
+        bangbang  { "!! !!" => [ Type::BangBang, Type::BangBang] }
+        bangeq    { "!= !=" => [ Type::BangEq, Type::BangEq] }
+        bar       { "| |"   => [ Type::Bar, Type::Bar] }
+        barbar    { "|| ||" => [ Type::BarBar, Type::BarBar] }
+        bareq     { "|= |=" => [ Type::BarEq, Type::BarEq] }
+        colon     { ": :"   => [ Type::Colon, Type::Colon] }
+        comma     { ", ,"   => [ Type::Comma, Type::Comma] }
+        dot       { ". ."   => [ Type::Dot, Type::Dot] }
+        dotdot    { ".. .." => [ Type::DotDot, Type::DotDot] }
+        dotdoteq  { "..= ..=" => [ Type::DotDotEq, Type::DotDotEq] }
+        eq        { "= ="   => [ Type::Eq, Type::Eq] }
+        eqeq      { "== ==" => [ Type::EqEq, Type::EqEq] }
+        fatarrow  { "=> =>" => [ Type::FatArrow, Type::FatArrow] }
+        grave     { "` `"   => [ Type::Grave, Type::Grave] }
+        gt        { "> >"   => [ Type::Gt, Type::Gt] }
+        gteq      { ">= >=" => [ Type::GtEq, Type::GtEq] }
+        gtgt      { ">> >>" => [ Type::GtGt, Type::GtGt] }
+        gtgteq    { ">>= >>=" => [ Type::GtGtEq, Type::GtGtEq] }
+        hash      { "# #"   => [ Type::Hash, Type::Hash] }
+        lt        { "< <"   => [ Type::Lt, Type::Lt] }
+        lteq      { "<= <=" => [ Type::LtEq, Type::LtEq] }
+        ltlt      { "<< <<" => [ Type::LtLt, Type::LtLt] }
+        ltlteq    { "<<= <<=" => [ Type::LtLtEq, Type::LtLtEq] }
+        minus     { "- -"   => [ Type::Minus, Type::Minus] }
+        minuseq   { "-= -=" => [ Type::MinusEq, Type::MinusEq] }
+        plus      { "+ +"   => [ Type::Plus, Type::Plus] }
+        pluseq    { "+= +=" => [ Type::PlusEq, Type::PlusEq] }
+        question  { "? ?"   => [ Type::Question, Type::Question] }
+        rem       { "% %"   => [ Type::Rem, Type::Rem] }
+        remeq     { "%= %=" => [ Type::RemEq, Type::RemEq] }
+        semi      { "; ;"   => [ Type::Semi, Type::Semi] }
+        slash     { "/ /"   => [ Type::Slash, Type::Slash] }
+        slasheq   { "/= /=" => [ Type::SlashEq, Type::SlashEq] }
+        star      { "* *"   => [ Type::Star, Type::Star] }
+        stareq    { "*= *=" => [ Type::StarEq, Type::StarEq] }
+        tilde     { "~ ~"   => [ Type::Tilde, Type::Tilde] }
+        xor       { "^ ^"   => [ Type::Xor, Type::Xor] }
+        xoreq     { "^= ^=" => [ Type::XorEq, Type::XorEq] }
+        xorxor    { "^^ ^^" => [ Type::XorXor, Type::XorXor] }
+    }
+}
--- a/cl-parser/Cargo.toml
+++ b/cl-parser/Cargo.toml
@ -9,6 +9,6 @@ publish.workspace = true

 [dependencies]
 cl-ast = { path = "../cl-ast" }
+cl-lexer = { path = "../cl-lexer" }
 cl-token = { path = "../cl-token" }
 cl-structures = { path = "../cl-structures" }
-conlang = { path = "../libconlang" }
--- a/cl-parser/src/error.rs
+++ b/cl-parser/src/error.rs
@ -1,6 +1,6 @@
 use super::*;

-use conlang::lexer::error::{Error as LexError, Reason};
+use cl_lexer::error::{Error as LexError, Reason};
 use std::fmt::Display;
 pub type PResult<T> = Result<T, Error>;

--- a/cl-parser/src/parser.rs
+++ b/cl-parser/src/parser.rs
@ -5,7 +5,7 @@ use crate::error::{
    PResult, Parsing,
 };
 use cl_ast::*;
-use conlang::lexer::Lexer;
+use cl_lexer::Lexer;

 /// Parses a sequence of [Tokens](Token) into an [AST](cl_ast)
 pub struct Parser<'t> {
@ -905,11 +905,7 @@ impl<'t> Parser<'t> {
    /// [Block] = `{` [Stmt]* `}`
    pub fn block(&mut self) -> PResult<Block> {
        const PARSING: Parsing = Parsing::Block;
-        Ok(
-            Block {
-                stmts: delim(rep(Self::stmt, CURLIES.1, PARSING), CURLIES, PARSING)(self)?,
-            },
-        )
+        Ok(Block { stmts: delim(rep(Self::stmt, CURLIES.1, PARSING), CURLIES, PARSING)(self)? })
    }
 }
 /// ## Control flow subexpressions
--- a/cl-repl/Cargo.toml
+++ b/cl-repl/Cargo.toml
@ -12,6 +12,7 @@ publish.workspace = true
 [dependencies]
 conlang = { path = "../libconlang" }
 cl-ast = { path = "../cl-ast" }
+cl-lexer = { path = "../cl-lexer" }
 cl-token = { path = "../cl-token" }
 cl-parser = { path = "../cl-parser" }
 cl-interpret = { path = "../cl-interpret" }
--- a/cl-repl/examples/collect-identifiers.rs
+++ b/cl-repl/examples/collect-identifiers.rs
@ -1,9 +1,9 @@
 //! Collects identifiers into a list

+use cl_lexer::Lexer;
 use cl_parser::Parser;
 use cl_repl::repline::Repline;
 use cl_structures::span::Loc;
-use conlang::lexer::Lexer;
 use std::{
    collections::HashMap,
    error::Error,
--- a/cl-repl/examples/identify_tokens.rs
+++ b/cl-repl/examples/identify_tokens.rs
@ -1,7 +1,7 @@
 //! This example grabs input from stdin, lexes it, and prints which lexer rules matched
 #![allow(unused_imports)]
+use cl_lexer::Lexer;
 use cl_token::Token;
-use conlang::lexer::Lexer;
 use std::{
    error::Error,
    io::{stdin, IsTerminal, Read},
--- a/cl-repl/src/lib.rs
+++ b/cl-repl/src/lib.rs
@ -74,12 +74,9 @@ pub mod program {
    };

    use cl_ast::{self as ast, ast_impl::format::Pretty};
+    use cl_lexer::Lexer;
    use cl_parser::{error::PResult, Parser};
-    use conlang::{
-        // pretty_printer::{PrettyPrintable, Printer},
-        lexer::Lexer,
-        resolver::{error::TyResult, Resolver},
-    };
+    use conlang::resolver::{error::TyResult, Resolver};
    use std::{fmt::Display, io::Write};

    pub struct Parsable;
@ -228,7 +225,7 @@ pub mod cli {
            match (repl, path) {
                (true, Some(path)) => {
                    let prog = std::fs::read_to_string(path).unwrap();
-                    let code = cl_parser::Parser::new(conlang::lexer::Lexer::new(&prog))
+                    let code = cl_parser::Parser::new(cl_lexer::Lexer::new(&prog))
                        .file()
                        .unwrap();
                    let mut env = cl_interpret::env::Environment::new();
--- a/libconlang/src/lib.rs
+++ b/libconlang/src/lib.rs
@ -2,8 +2,6 @@
 #![warn(clippy::all)]
 #![feature(decl_macro)]

-pub mod lexer;
-
 pub mod resolver;

 #[cfg(test)]
--- a/libconlang/src/tests.rs
+++ b/libconlang/src/tests.rs
@ -5,173 +5,6 @@ mod ast {
    // TODO
 }
 mod lexer {
-    use crate::lexer::Lexer;
-    use cl_token::*;
-
-    macro test_lexer_output_type  ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
-        #[test]
-        fn $f() {$(
-            assert_eq!(
-                Lexer::new($test)
-                    .into_iter()
-                    .map(|t| t.unwrap().ty())
-                    .collect::<Vec<_>>(),
-                dbg!($expect)
-            );
-        )*}
-    )*}
-
-    macro test_lexer_data_type  ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
-        #[test]
-        fn $f() {$(
-            assert_eq!(
-                Lexer::new($test)
-                    .into_iter()
-                    .map(|t| t.unwrap().into_data())
-                    .collect::<Vec<_>>(),
-                dbg!($expect)
-            );
-        )*}
-    )*}
-
-    /// Convert an `[ expr, ... ]` into a `[ *, ... ]`
-    macro td ($($id:expr),*) {
-        [$($id.into()),*]
-    }
-
-    mod ident {
-        use super::*;
-        macro ident ($($id:literal),*) {
-            [$(Data::Identifier($id.into())),*]
-        }
-        test_lexer_data_type! {
-            underscore { "_ _" => ident!["_", "_"] }
-            unicode { "_ε ε_" => ident!["_ε", "ε_"] }
-            many_underscore { "____________________________________" =>
-            ident!["____________________________________"] }
-        }
-    }
-    mod keyword {
-        use super::*;
-        macro kw($($k:ident),*) {
-            [ $(Type::Keyword(Keyword::$k),)* ]
-        }
-        test_lexer_output_type! {
-            kw_break { "break break" => kw![Break, Break] }
-            kw_continue { "continue continue" => kw![Continue, Continue] }
-            kw_else { "else else" => kw![Else, Else] }
-            kw_false { "false false" => kw![False, False] }
-            kw_for { "for for" => kw![For, For] }
-            kw_fn { "fn fn" => kw![Fn, Fn] }
-            kw_if { "if if" => kw![If, If] }
-            kw_in { "in in" => kw![In, In] }
-            kw_let { "let let" => kw![Let, Let] }
-            kw_return { "return return" => kw![Return, Return] }
-            kw_true { "true true" => kw![True, True] }
-            kw_while { "while while" => kw![While, While] }
-            keywords { "break continue else false for fn if in let return true while" =>
-                kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] }
-        }
-    }
-    mod integer {
-        use super::*;
-        test_lexer_data_type! {
-            hex {
-                "0x0 0x1 0x15 0x2100 0x8000" =>
-                td![0x0, 0x1, 0x15, 0x2100, 0x8000]
-            }
-            dec {
-                "0d0 0d1 0d21 0d8448 0d32768" =>
-                td![0, 0x1, 0x15, 0x2100, 0x8000]
-            }
-            oct {
-                "0o0 0o1 0o25 0o20400 0o100000" =>
-                td![0x0, 0x1, 0x15, 0x2100, 0x8000]
-            }
-            bin {
-                "0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" =>
-                td![0x0, 0x1, 0x15, 0x2100, 0x8000]
-            }
-            baseless {
-                "0 1 21 8448 32768" =>
-                td![0x0, 0x1, 0x15, 0x2100, 0x8000]
-            }
-        }
-    }
-    mod string {
-        use super::*;
-        test_lexer_data_type! {
-            empty_string {
-                "\"\"" =>
-                td![String::from("")]
-            }
-            unicode_string {
-                "\"I 💙 🦈!\"" =>
-                td![String::from("I 💙 🦈!")]
-            }
-            escape_string {
-                " \"This is a shark: \\u{1f988}\" " =>
-                td![String::from("This is a shark: 🦈")]
-            }
-        }
-    }
-    mod punct {
-        use super::*;
-        test_lexer_output_type! {
-            l_curly   { "{ {"   => [ Type::LCurly, Type::LCurly ] }
-            r_curly   { "} }"   => [ Type::RCurly, Type::RCurly ] }
-            l_brack   { "[ ["   => [ Type::LBrack, Type::LBrack ] }
-            r_brack   { "] ]"   => [ Type::RBrack, Type::RBrack ] }
-            l_paren   { "( ("   => [ Type::LParen, Type::LParen ] }
-            r_paren   { ") )"   => [ Type::RParen, Type::RParen ] }
-            amp       { "& &"   => [ Type::Amp, Type::Amp ] }
-            amp_amp   { "&& &&" => [ Type::AmpAmp, Type::AmpAmp ] }
-            amp_eq    { "&= &=" => [ Type::AmpEq, Type::AmpEq ] }
-            arrow     { "-> ->" => [ Type::Arrow, Type::Arrow] }
-            at        { "@ @"   => [ Type::At, Type::At] }
-            backslash { "\\ \\" => [ Type::Backslash, Type::Backslash] }
-            bang      { "! !"   => [ Type::Bang, Type::Bang] }
-            bangbang  { "!! !!" => [ Type::BangBang, Type::BangBang] }
-            bangeq    { "!= !=" => [ Type::BangEq, Type::BangEq] }
-            bar       { "| |"   => [ Type::Bar, Type::Bar] }
-            barbar    { "|| ||" => [ Type::BarBar, Type::BarBar] }
-            bareq     { "|= |=" => [ Type::BarEq, Type::BarEq] }
-            colon     { ": :"   => [ Type::Colon, Type::Colon] }
-            comma     { ", ,"   => [ Type::Comma, Type::Comma] }
-            dot       { ". ."   => [ Type::Dot, Type::Dot] }
-            dotdot    { ".. .." => [ Type::DotDot, Type::DotDot] }
-            dotdoteq  { "..= ..=" => [ Type::DotDotEq, Type::DotDotEq] }
-            eq        { "= ="   => [ Type::Eq, Type::Eq] }
-            eqeq      { "== ==" => [ Type::EqEq, Type::EqEq] }
-            fatarrow  { "=> =>" => [ Type::FatArrow, Type::FatArrow] }
-            grave     { "` `"   => [ Type::Grave, Type::Grave] }
-            gt        { "> >"   => [ Type::Gt, Type::Gt] }
-            gteq      { ">= >=" => [ Type::GtEq, Type::GtEq] }
-            gtgt      { ">> >>" => [ Type::GtGt, Type::GtGt] }
-            gtgteq    { ">>= >>=" => [ Type::GtGtEq, Type::GtGtEq] }
-            hash      { "# #"   => [ Type::Hash, Type::Hash] }
-            lt        { "< <"   => [ Type::Lt, Type::Lt] }
-            lteq      { "<= <=" => [ Type::LtEq, Type::LtEq] }
-            ltlt      { "<< <<" => [ Type::LtLt, Type::LtLt] }
-            ltlteq    { "<<= <<=" => [ Type::LtLtEq, Type::LtLtEq] }
-            minus     { "- -"   => [ Type::Minus, Type::Minus] }
-            minuseq   { "-= -=" => [ Type::MinusEq, Type::MinusEq] }
-            plus      { "+ +"   => [ Type::Plus, Type::Plus] }
-            pluseq    { "+= +=" => [ Type::PlusEq, Type::PlusEq] }
-            question  { "? ?"   => [ Type::Question, Type::Question] }
-            rem       { "% %"   => [ Type::Rem, Type::Rem] }
-            remeq     { "%= %=" => [ Type::RemEq, Type::RemEq] }
-            semi      { "; ;"   => [ Type::Semi, Type::Semi] }
-            slash     { "/ /"   => [ Type::Slash, Type::Slash] }
-            slasheq   { "/= /=" => [ Type::SlashEq, Type::SlashEq] }
-            star      { "* *"   => [ Type::Star, Type::Star] }
-            stareq    { "*= *=" => [ Type::StarEq, Type::StarEq] }
-            tilde     { "~ ~"   => [ Type::Tilde, Type::Tilde] }
-            xor       { "^ ^"   => [ Type::Xor, Type::Xor] }
-            xoreq     { "^= ^=" => [ Type::XorEq, Type::XorEq] }
-            xorxor    { "^^ ^^" => [ Type::XorXor, Type::XorXor] }
-        }
-    }
 }
 mod parser {
    // TODO