From c43ecf00d1ad1cec02d06e41080312290fb19bcf Mon Sep 17 00:00:00 2001 From: John Date: Mon, 23 Oct 2023 19:43:16 -0500 Subject: [PATCH] token::token_data::Data: Renamed from token::TokenData & Moved into its own module + Added token::preamble; common imports when working with Token --- libconlang/src/lexer.rs | 4 +- libconlang/src/parser.rs | 14 +-- libconlang/src/tests.rs | 4 +- libconlang/src/token.rs | 141 ++++++----------------------- libconlang/src/token/token_data.rs | 45 +++++++++ libconlang/src/token/token_type.rs | 97 +++++++++++++++++++- 6 files changed, 172 insertions(+), 133 deletions(-) create mode 100644 libconlang/src/token/token_data.rs diff --git a/libconlang/src/lexer.rs b/libconlang/src/lexer.rs index 59c226e..f3059e3 100644 --- a/libconlang/src/lexer.rs +++ b/libconlang/src/lexer.rs @@ -117,7 +117,7 @@ impl<'t> Lexer<'t> { .copied() .ok_or(Error::end_of_file(self.line(), self.col())) } - fn produce(&mut self, ty: Type, data: impl Into) -> LResult { + fn produce(&mut self, ty: Type, data: impl Into) -> LResult { let loc = self.start_loc; self.start_loc = self.current_loc; self.start = self.current; @@ -284,7 +284,7 @@ impl<'t> Lexer<'t> { if let Ok(keyword) = Keyword::from_str(&out) { self.produce(Type::Keyword(keyword), ()) } else { - self.produce(Type::Identifier, TokenData::Identifier(out.into())) + self.produce(Type::Identifier, Data::Identifier(out.into())) } } fn xid_start(&mut self) -> LResult { diff --git a/libconlang/src/parser.rs b/libconlang/src/parser.rs index b83a5c3..8038e14 100644 --- a/libconlang/src/parser.rs +++ b/libconlang/src/parser.rs @@ -1,10 +1,6 @@ //! Parses [tokens](super::token) into an [AST](super::ast) -use super::{ - ast::preamble::*, - lexer::Lexer, - token::{Keyword, Token, TokenData, Type}, -}; +use super::{ast::preamble::*, lexer::Lexer, token::preamble::*}; use error::{Error, Reason::*, *}; pub mod error { @@ -256,7 +252,7 @@ macro ptodo($self:expr $(, $t:expr)*) { impl Parser { fn identifier(&mut self) -> PResult { let out = match self.matches(Type::Identifier)?.data() { - TokenData::Identifier(id) => Identifier(id.to_string()), + Data::Identifier(id) => Identifier(id.to_string()), _ => Err(Error::not_identifier())?, }; self.consume(); @@ -280,7 +276,7 @@ impl Parser { } fn int(&mut self) -> PResult { let out = match self.matches(Type::Integer)?.data() { - TokenData::Integer(i) => *i, + Data::Integer(i) => *i, _ => Err(Error::not_int())?, }; self.consume(); @@ -288,7 +284,7 @@ impl Parser { } fn string(&mut self) -> PResult { let out = match self.matches(Type::String)?.data() { - TokenData::String(s) => s.clone(), + Data::String(s) => s.clone(), _ => Err(Error::not_string())?, }; self.consume(); @@ -296,7 +292,7 @@ impl Parser { } fn char(&mut self) -> PResult { let out = match self.matches(Type::Character)?.data() { - TokenData::Character(c) => *c, + Data::Character(c) => *c, _ => Err(Error::not_char())?, }; self.consume(); diff --git a/libconlang/src/tests.rs b/libconlang/src/tests.rs index adf93c9..9f06db7 100644 --- a/libconlang/src/tests.rs +++ b/libconlang/src/tests.rs @@ -8,7 +8,7 @@ mod lexer { #[allow(unused_imports)] use crate::{ lexer::Lexer, - token::{Token, TokenData, Keyword, Type}, + token::preamble::*, }; macro test_lexer_output_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$( @@ -45,7 +45,7 @@ mod lexer { mod ident { use super::*; macro ident ($($id:literal),*) { - [$(TokenData::Identifier($id.into())),*] + [$(Data::Identifier($id.into())),*] } test_lexer_data_type! { underscore { "_ _" => ident!["_", "_"] } diff --git a/libconlang/src/token.rs b/libconlang/src/token.rs index 2296066..79e22b9 100644 --- a/libconlang/src/token.rs +++ b/libconlang/src/token.rs @@ -1,145 +1,56 @@ -//! Stores a component of a file as a type and span +//! # Token +//! +//! Stores a component of a file as a [Type], some [Data], and a line and column number -mod token_type; -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum Type { - // Invalid syntax - Invalid, - // Any kind of comment - Comment, - // Any identifier - Identifier, - Keyword(Keyword), - // Literals - Integer, - Float, - String, - Character, - // Delimiters and punctuation - LCurly, // { - RCurly, // } - LBrack, // [ - RBrack, // ] - LParen, // ( - RParen, // ) - Amp, // & - AmpAmp, // && - AmpEq, // &= - Arrow, // -> - At, // @ - Backslash, // \ - Bang, // ! - BangBang, // !! - BangEq, // != - Bar, // | - BarBar, // || - BarEq, // |= - Colon, // : - Comma, // , - Dot, // . - DotDot, // .. - DotDotEq, // ..= - Eq, // = - EqEq, // == - FatArrow, // => - Grave, // ` - Gt, // > - GtEq, // >= - GtGt, // >> - GtGtEq, // >>= - Hash, // # - Lt, // < - LtEq, // <= - LtLt, // << - LtLtEq, // <<= - Minus, // - - MinusEq, // -= - Plus, // + - PlusEq, // += - Question, // ? - Rem, // % - RemEq, // %= - Semi, // ; - Slash, // / - SlashEq, // /= - Star, // * - StarEq, // *= - Tilde, // ~ - Xor, // ^ - XorEq, // ^= - XorXor, // ^^ +pub mod token_data; +pub mod token_type; +pub mod preamble { + //! Common imports for working with [tokens](super) + pub use super::{ + token_data::Data, + token_type::{Keyword, Type}, + Token, + }; } -/// Represents a reserved word. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum Keyword { - Break, - Continue, - Else, - False, - For, - Fn, - If, - In, - Let, - Return, - True, - While, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum TokenData { - Identifier(Box), - String(String), - Character(char), - Integer(u128), - Float(f64), - None, -} -from! { - value: &str => Self::Identifier(value.into()), - value: String => Self::String(value), - value: u128 => Self::Integer(value), - value: f64 => Self::Float(value), - value: char => Self::Character(value), - _v: () => Self::None, -} -macro from($($value:ident: $src:ty => $dst:expr),*$(,)?) { - $(impl From<$src> for TokenData { - fn from($value: $src) -> Self { $dst } - })* -} +use token_data::Data; +use token_type::Type; +/// Contains a single unit of lexical information, +/// and an optional bit of [data](TokenData) #[derive(Clone, Debug, PartialEq)] pub struct Token { ty: Type, - data: TokenData, + data: Data, line: u32, col: u32, } impl Token { - /// Creates a new [Token] out of a [Type], [TokenData], line, and column. - pub fn new(ty: Type, data: impl Into, line: u32, col: u32) -> Self { + /// Creates a new [Token] out of a [Type], [Data], line, and column. + pub fn new(ty: Type, data: impl Into, line: u32, col: u32) -> Self { Self { ty, data: data.into(), line, col } } /// Casts this token to a new [Type] pub fn cast(self, ty: Type) -> Self { Self { ty, ..self } } - /// Gets the [Type] of this token + /// Returns the [Type] of this token pub fn ty(&self) -> Type { self.ty } - /// Gets the [TokenData] of this token - pub fn data(&self) -> &TokenData { + /// Returns a reference to this token's [Data] + pub fn data(&self) -> &Data { &self.data } - pub fn into_data(self) -> TokenData { + /// Converts this token into its inner [Data] + pub fn into_data(self) -> Data { self.data } + /// Returns the line where this token originated pub fn line(&self) -> u32 { self.line } + /// Returns the column where this token originated pub fn col(&self) -> u32 { self.col } diff --git a/libconlang/src/token/token_data.rs b/libconlang/src/token/token_data.rs new file mode 100644 index 0000000..5f54d63 --- /dev/null +++ b/libconlang/src/token/token_data.rs @@ -0,0 +1,45 @@ +//! Additional data stored within a [Token](super::Token), +//! external to its [Type](super::token_type::Type) +/// Additional data stored within a [Token](super::Token), +/// external to its [Type](super::token_type::Type) +#[derive(Clone, Debug, PartialEq)] +pub enum Data { + /// [Token](super::Token) contains an [identifier](str) + Identifier(Box), + /// [Token](super::Token) contains a [String] + String(String), + /// [Token](super::Token) contains a [character](char) + Character(char), + /// [Token](super::Token) contains an [integer](u128) + Integer(u128), + /// [Token](super::Token) contains a [float](f64) + Float(f64), + /// [Token](super::Token) contains no additional data + None, +} +from! { + value: &str => Self::Identifier(value.into()), + value: String => Self::String(value), + value: u128 => Self::Integer(value), + value: f64 => Self::Float(value), + value: char => Self::Character(value), + _v: () => Self::None, +} +/// Implements [From] for an enum +macro from($($value:ident: $src:ty => $dst:expr),*$(,)?) { + $(impl From<$src> for Data { + fn from($value: $src) -> Self { $dst } + })* +} +impl std::fmt::Display for Data { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Data::Identifier(v) => v.fmt(f), + Data::String(v) => write!(f, "\"{v}\""), + Data::Character(v) => write!(f, "'{v}'"), + Data::Integer(v) => v.fmt(f), + Data::Float(v) => v.fmt(f), + Data::None => "None".fmt(f), + } + } +} diff --git a/libconlang/src/token/token_type.rs b/libconlang/src/token/token_type.rs index ef30d51..3205b32 100644 --- a/libconlang/src/token/token_type.rs +++ b/libconlang/src/token/token_type.rs @@ -1,6 +1,92 @@ -//! Trait impls and helper functions for [Type] and [Keyword] -use super::{Keyword, Type}; -use std::fmt::Display; +//! Stores a [Token's](super::Token) lexical information +use std::{fmt::Display, str::FromStr}; + +/// Stores a [Token's](super::Token) lexical information +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Type { + // Invalid syntax + Invalid, + // Any kind of comment + Comment, + // Any identifier + Identifier, + Keyword(Keyword), + // Literals + Integer, + Float, + String, + Character, + // Delimiters and punctuation + LCurly, // { + RCurly, // } + LBrack, // [ + RBrack, // ] + LParen, // ( + RParen, // ) + Amp, // & + AmpAmp, // && + AmpEq, // &= + Arrow, // -> + At, // @ + Backslash, // \ + Bang, // ! + BangBang, // !! + BangEq, // != + Bar, // | + BarBar, // || + BarEq, // |= + Colon, // : + Comma, // , + Dot, // . + DotDot, // .. + DotDotEq, // ..= + Eq, // = + EqEq, // == + FatArrow, // => + Grave, // ` + Gt, // > + GtEq, // >= + GtGt, // >> + GtGtEq, // >>= + Hash, // # + Lt, // < + LtEq, // <= + LtLt, // << + LtLtEq, // <<= + Minus, // - + MinusEq, // -= + Plus, // + + PlusEq, // += + Question, // ? + Rem, // % + RemEq, // %= + Semi, // ; + Slash, // / + SlashEq, // /= + Star, // * + StarEq, // *= + Tilde, // ~ + Xor, // ^ + XorEq, // ^= + XorXor, // ^^ +} + +/// Represents a reserved word. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Keyword { + Break, + Continue, + Else, + False, + For, + Fn, + If, + In, + Let, + Return, + True, + While, +} impl Display for Type { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -87,8 +173,9 @@ impl Display for Keyword { } } } -impl std::str::FromStr for Keyword { - type Err = (); // If an identifier isn't a keyword, that's okay. +impl FromStr for Keyword { + /// [FromStr] can only fail when an identifier isn't a keyword + type Err = (); fn from_str(s: &str) -> Result { Ok(match s { "break" => Self::Break,