Conlang v0.0.5: Pratternization

cl-token:
- Minimize data redundancy by consolidating TokenKind::Literal; TokenData::{String, Identifier}
- Rename Op to Punct

cl-ast:
- Remove ExprKind::{Member, Call} in favor of making them
'binary' operators
- Consolidate boxes (TODO: consolidate more boxes)
- Remove repetition vecs in favor of boxes (this may come with performance tradeoffs!)

cl-lexer:
- Reflect changes from cl-token

cl-interpret, cl-repl/src/examples:
- Reflect changes from cl-ast

cl-parser:
- Switch to Pratt parsing for expressions
  - TODO: Code cleanup
  - TODO: Use total ordering for Precedence instead of binding powers (that's what the binding powers are there for anyway)
- Switch functional parsers to take Punct instead of TokenKind
  - It's not like we need a `for`-separated list
- Remove `binary` macro. No longer needed with precedence climbing.
- Repurpose `operator` macro to produce both the operator and the respective Precedence
- Remove several of the smaller parser functions, since they've been consolidated into the larger `exprkind`
This commit is contained in:
2024-04-13 03:33:26 -05:00
parent 2c36ccc0cf
commit fc3cbbf450
11 changed files with 636 additions and 778 deletions

View File

@@ -2,7 +2,7 @@
#![warn(clippy::all)]
#![feature(decl_macro)]
use cl_structures::span::Loc;
use cl_token::{token_type::Op, TokenKind as Kind, *};
use cl_token::{TokenKind as Kind, *};
use std::{
iter::Peekable,
str::{Chars, FromStr},
@@ -97,33 +97,33 @@ impl<'t> Lexer<'t> {
/// Scans through the text, searching for the next [Token]
pub fn scan(&mut self) -> LResult<Token> {
match self.skip_whitespace().peek()? {
'{' => self.consume()?.produce_op(Op::LCurly),
'}' => self.consume()?.produce_op(Op::RCurly),
'[' => self.consume()?.produce_op(Op::LBrack),
']' => self.consume()?.produce_op(Op::RBrack),
'(' => self.consume()?.produce_op(Op::LParen),
')' => self.consume()?.produce_op(Op::RParen),
'{' => self.consume()?.produce_op(Punct::LCurly),
'}' => self.consume()?.produce_op(Punct::RCurly),
'[' => self.consume()?.produce_op(Punct::LBrack),
']' => self.consume()?.produce_op(Punct::RBrack),
'(' => self.consume()?.produce_op(Punct::LParen),
')' => self.consume()?.produce_op(Punct::RParen),
'&' => self.consume()?.amp(),
'@' => self.consume()?.produce_op(Op::At),
'\\' => self.consume()?.produce_op(Op::Backslash),
'@' => self.consume()?.produce_op(Punct::At),
'\\' => self.consume()?.produce_op(Punct::Backslash),
'!' => self.consume()?.bang(),
'|' => self.consume()?.bar(),
':' => self.consume()?.colon(),
',' => self.consume()?.produce_op(Op::Comma),
',' => self.consume()?.produce_op(Punct::Comma),
'.' => self.consume()?.dot(),
'=' => self.consume()?.equal(),
'`' => self.consume()?.produce_op(Op::Grave),
'`' => self.consume()?.produce_op(Punct::Grave),
'>' => self.consume()?.greater(),
'#' => self.consume()?.hash(),
'<' => self.consume()?.less(),
'-' => self.consume()?.minus(),
'+' => self.consume()?.plus(),
'?' => self.consume()?.produce_op(Op::Question),
'?' => self.consume()?.produce_op(Punct::Question),
'%' => self.consume()?.rem(),
';' => self.consume()?.produce_op(Op::Semi),
';' => self.consume()?.produce_op(Punct::Semi),
'/' => self.consume()?.slash(),
'*' => self.consume()?.star(),
'~' => self.consume()?.produce_op(Op::Tilde),
'~' => self.consume()?.produce_op(Punct::Tilde),
'^' => self.consume()?.xor(),
'0' => self.consume()?.int_with_base(),
'1'..='9' => self.digits::<10>(),
@@ -163,8 +163,8 @@ impl<'t> Lexer<'t> {
self.start = self.current;
Ok(Token::new(kind, data, loc.0, loc.1))
}
fn produce_op(&mut self, kind: Op) -> LResult<Token> {
self.produce(TokenKind::Op(kind), ())
fn produce_op(&mut self, kind: Punct) -> LResult<Token> {
self.produce(TokenKind::Punct(kind), ())
}
fn skip_whitespace(&mut self) -> &mut Self {
while let Ok(c) = self.peek() {
@@ -195,120 +195,120 @@ impl<'t> Lexer<'t> {
impl<'t> Lexer<'t> {
fn amp(&mut self) -> LResult<Token> {
match self.peek() {
Ok('&') => self.consume()?.produce_op(Op::AmpAmp),
Ok('=') => self.consume()?.produce_op(Op::AmpEq),
_ => self.produce_op(Op::Amp),
Ok('&') => self.consume()?.produce_op(Punct::AmpAmp),
Ok('=') => self.consume()?.produce_op(Punct::AmpEq),
_ => self.produce_op(Punct::Amp),
}
}
fn bang(&mut self) -> LResult<Token> {
match self.peek() {
Ok('!') => self.consume()?.produce_op(Op::BangBang),
Ok('=') => self.consume()?.produce_op(Op::BangEq),
_ => self.produce_op(Op::Bang),
Ok('!') => self.consume()?.produce_op(Punct::BangBang),
Ok('=') => self.consume()?.produce_op(Punct::BangEq),
_ => self.produce_op(Punct::Bang),
}
}
fn bar(&mut self) -> LResult<Token> {
match self.peek() {
Ok('|') => self.consume()?.produce_op(Op::BarBar),
Ok('=') => self.consume()?.produce_op(Op::BarEq),
_ => self.produce_op(Op::Bar),
Ok('|') => self.consume()?.produce_op(Punct::BarBar),
Ok('=') => self.consume()?.produce_op(Punct::BarEq),
_ => self.produce_op(Punct::Bar),
}
}
fn colon(&mut self) -> LResult<Token> {
match self.peek() {
Ok(':') => self.consume()?.produce_op(Op::ColonColon),
_ => self.produce_op(Op::Colon),
Ok(':') => self.consume()?.produce_op(Punct::ColonColon),
_ => self.produce_op(Punct::Colon),
}
}
fn dot(&mut self) -> LResult<Token> {
match self.peek() {
Ok('.') => {
if let Ok('=') = self.consume()?.peek() {
self.consume()?.produce_op(Op::DotDotEq)
self.consume()?.produce_op(Punct::DotDotEq)
} else {
self.produce_op(Op::DotDot)
self.produce_op(Punct::DotDot)
}
}
_ => self.produce_op(Op::Dot),
_ => self.produce_op(Punct::Dot),
}
}
fn equal(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::EqEq),
Ok('>') => self.consume()?.produce_op(Op::FatArrow),
_ => self.produce_op(Op::Eq),
Ok('=') => self.consume()?.produce_op(Punct::EqEq),
Ok('>') => self.consume()?.produce_op(Punct::FatArrow),
_ => self.produce_op(Punct::Eq),
}
}
fn greater(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::GtEq),
Ok('=') => self.consume()?.produce_op(Punct::GtEq),
Ok('>') => {
if let Ok('=') = self.consume()?.peek() {
self.consume()?.produce_op(Op::GtGtEq)
self.consume()?.produce_op(Punct::GtGtEq)
} else {
self.produce_op(Op::GtGt)
self.produce_op(Punct::GtGt)
}
}
_ => self.produce_op(Op::Gt),
_ => self.produce_op(Punct::Gt),
}
}
fn hash(&mut self) -> LResult<Token> {
match self.peek() {
Ok('!') => self.consume()?.produce_op(Op::HashBang),
_ => self.produce_op(Op::Hash),
Ok('!') => self.consume()?.produce_op(Punct::HashBang),
_ => self.produce_op(Punct::Hash),
}
}
fn less(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::LtEq),
Ok('=') => self.consume()?.produce_op(Punct::LtEq),
Ok('<') => {
if let Ok('=') = self.consume()?.peek() {
self.consume()?.produce_op(Op::LtLtEq)
self.consume()?.produce_op(Punct::LtLtEq)
} else {
self.produce_op(Op::LtLt)
self.produce_op(Punct::LtLt)
}
}
_ => self.produce_op(Op::Lt),
_ => self.produce_op(Punct::Lt),
}
}
fn minus(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::MinusEq),
Ok('>') => self.consume()?.produce_op(Op::Arrow),
_ => self.produce_op(Op::Minus),
Ok('=') => self.consume()?.produce_op(Punct::MinusEq),
Ok('>') => self.consume()?.produce_op(Punct::Arrow),
_ => self.produce_op(Punct::Minus),
}
}
fn plus(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::PlusEq),
_ => self.produce_op(Op::Plus),
Ok('=') => self.consume()?.produce_op(Punct::PlusEq),
_ => self.produce_op(Punct::Plus),
}
}
fn rem(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::RemEq),
_ => self.produce_op(Op::Rem),
Ok('=') => self.consume()?.produce_op(Punct::RemEq),
_ => self.produce_op(Punct::Rem),
}
}
fn slash(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::SlashEq),
Ok('=') => self.consume()?.produce_op(Punct::SlashEq),
Ok('/') => self.consume()?.line_comment(),
Ok('*') => self.consume()?.block_comment(),
_ => self.produce_op(Op::Slash),
_ => self.produce_op(Punct::Slash),
}
}
fn star(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::StarEq),
_ => self.produce_op(Op::Star),
Ok('=') => self.consume()?.produce_op(Punct::StarEq),
_ => self.produce_op(Punct::Star),
}
}
fn xor(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce_op(Op::XorEq),
Ok('^') => self.consume()?.produce_op(Op::XorXor),
_ => self.produce_op(Op::Xor),
Ok('=') => self.consume()?.produce_op(Punct::XorEq),
Ok('^') => self.consume()?.produce_op(Punct::XorXor),
_ => self.produce_op(Punct::Xor),
}
}
}
@@ -339,7 +339,7 @@ impl<'t> Lexer<'t> {
if let Ok(keyword) = Kind::from_str(&out) {
self.produce(keyword, ())
} else {
self.produce(Kind::Identifier, TokenData::Identifier(out.into()))
self.produce(Kind::Identifier, TokenData::String(out))
}
}
fn xid_start(&mut self) -> LResult<char> {
@@ -370,7 +370,7 @@ impl<'t> Lexer<'t> {
Ok('o') => self.consume()?.digits::<8>(),
Ok('b') => self.consume()?.digits::<2>(),
Ok('0'..='9') => self.digits::<10>(),
_ => self.produce(Kind::Integer, 0),
_ => self.produce(Kind::Literal, 0),
}
}
fn digits<const B: u32>(&mut self) -> LResult<Token> {
@@ -378,7 +378,7 @@ impl<'t> Lexer<'t> {
while let Ok(true) = self.peek().as_ref().map(char::is_ascii_alphanumeric) {
value = value * B as u128 + self.digit::<B>()? as u128;
}
self.produce(Kind::Integer, value)
self.produce(Kind::Literal, value)
}
fn digit<const B: u32>(&mut self) -> LResult<u32> {
let digit = self.peek()?;
@@ -399,12 +399,12 @@ impl<'t> Lexer<'t> {
{
value.push(self.unescape()?)
}
self.consume()?.produce(Kind::String, value)
self.consume()?.produce(Kind::Literal, value)
}
fn character(&mut self) -> LResult<Token> {
let out = self.unescape()?;
match self.peek()? {
'\'' => self.consume()?.produce(Kind::Character, out),
'\'' => self.consume()?.produce(Kind::Literal, out),
_ => Err(Error::unmatched_delimiters('\'', self.line(), self.col())),
}
}

View File

@@ -35,7 +35,7 @@ macro td ($($id:expr),*) {
mod ident {
use super::*;
macro ident ($($id:literal),*) {
[$(TokenData::Identifier($id.into())),*]
[$(TokenData::String($id.into())),*]
}
test_lexer_data_type! {
underscore { "_ _" => ident!["_", "_"] }
@@ -109,9 +109,8 @@ mod string {
}
}
mod punct {
use cl_token::token_type::Op;
macro op($op:ident) {
TokenKind::Op(Op::$op)
TokenKind::Punct(Punct::$op)
}
use super::*;