Move integer and char parsing out of the parser and back into the lexer
This commit is contained in:
30
src/lexer.rs
30
src/lexer.rs
@@ -75,15 +75,20 @@ impl<'t> Lexer<'t> {
|
|||||||
LexError { pos: self.head, res }
|
LexError { pos: self.head, res }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn as_str(&self) -> (&'t str, Span) {
|
||||||
|
let span = Span(self.head, self.tail);
|
||||||
|
(&self.text[Range::from(span)], span)
|
||||||
|
}
|
||||||
|
|
||||||
/// Produces a Token
|
/// Produces a Token
|
||||||
pub fn produce(&mut self, kind: TKind) -> Token {
|
pub fn produce(&mut self, kind: TKind) -> Token {
|
||||||
self.advance_tail();
|
self.advance_tail();
|
||||||
let span = Span(self.head, self.tail);
|
let (lexeme, span) = self.as_str();
|
||||||
self.head = self.tail;
|
self.head = self.tail;
|
||||||
Token { lexeme: self.text[Range::from(span)].to_owned(), kind, span }
|
Token { lexeme: Lexeme::String(lexeme.to_owned()), kind, span }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn produce_with_lexeme(&mut self, kind: TKind, lexeme: String) -> Token {
|
pub fn produce_with_lexeme(&mut self, kind: TKind, lexeme: Lexeme) -> Token {
|
||||||
self.advance_tail();
|
self.advance_tail();
|
||||||
let span = Span(self.head, self.tail);
|
let span = Span(self.head, self.tail);
|
||||||
self.head = self.tail;
|
self.head = self.tail;
|
||||||
@@ -226,9 +231,10 @@ impl<'t> Lexer<'t> {
|
|||||||
|
|
||||||
pub fn identifier(&mut self) -> Result<Token, LexError> {
|
pub fn identifier(&mut self) -> Result<Token, LexError> {
|
||||||
while self.consume().peek().is_some_and(is_xid_continue) {}
|
while self.consume().peek().is_some_and(is_xid_continue) {}
|
||||||
|
let (lexeme, _span) = self.as_str();
|
||||||
let token = self.produce(TKind::Identifier);
|
let token = self.produce(TKind::Identifier);
|
||||||
Ok(Token {
|
Ok(Token {
|
||||||
kind: match token.lexeme.as_str() {
|
kind: match lexeme {
|
||||||
"as" => TKind::As,
|
"as" => TKind::As,
|
||||||
"break" => TKind::Break,
|
"break" => TKind::Break,
|
||||||
"const" => TKind::Const,
|
"const" => TKind::Const,
|
||||||
@@ -236,6 +242,7 @@ impl<'t> Lexer<'t> {
|
|||||||
"else" => TKind::Else,
|
"else" => TKind::Else,
|
||||||
"false" => TKind::False,
|
"false" => TKind::False,
|
||||||
"fn" => TKind::Fn,
|
"fn" => TKind::Fn,
|
||||||
|
"for" => TKind::For,
|
||||||
"if" => TKind::If,
|
"if" => TKind::If,
|
||||||
"let" => TKind::Let,
|
"let" => TKind::Let,
|
||||||
"loop" => TKind::Loop,
|
"loop" => TKind::Loop,
|
||||||
@@ -261,7 +268,7 @@ impl<'t> Lexer<'t> {
|
|||||||
None => '\0',
|
None => '\0',
|
||||||
};
|
};
|
||||||
if self.take().is_some_and(|c| c == '\'') {
|
if self.take().is_some_and(|c| c == '\'') {
|
||||||
Ok(self.produce_with_lexeme(TKind::Character, c.into()))
|
Ok(self.produce_with_lexeme(TKind::Character, Lexeme::Char(c)))
|
||||||
} else {
|
} else {
|
||||||
Err(self.error("Unterminated character"))
|
Err(self.error("Unterminated character"))
|
||||||
}
|
}
|
||||||
@@ -279,7 +286,7 @@ impl<'t> Lexer<'t> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
lexeme.shrink_to_fit();
|
lexeme.shrink_to_fit();
|
||||||
Ok(self.produce_with_lexeme(TKind::String, lexeme))
|
Ok(self.produce_with_lexeme(TKind::String, Lexeme::String(lexeme)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn escape(&mut self) -> Result<char, LexError> {
|
pub fn escape(&mut self) -> Result<char, LexError> {
|
||||||
@@ -318,10 +325,17 @@ impl<'t> Lexer<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
|
pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
|
||||||
while self.peek().is_some_and(|c| c.is_digit(BASE)) {
|
let mut int: u128 = 0;
|
||||||
|
while let Some(c) = self.peek() {
|
||||||
|
int = match c.to_digit(BASE).ok_or(c) {
|
||||||
|
Err('_') => int,
|
||||||
|
Ok(c) => int.wrapping_mul(BASE as _).wrapping_add(c as _),
|
||||||
|
_ => break,
|
||||||
|
};
|
||||||
self.consume();
|
self.consume();
|
||||||
}
|
}
|
||||||
Ok(self.produce(TKind::Integer))
|
|
||||||
|
Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
|
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
|
||||||
|
|||||||
@@ -75,10 +75,9 @@ fn lex() -> Result<(), Box<dyn Error>> {
|
|||||||
println!("\x1b[31m{e}\x1b[0m");
|
println!("\x1b[31m{e}\x1b[0m");
|
||||||
break Ok(Response::Deny);
|
break Ok(Response::Deny);
|
||||||
}
|
}
|
||||||
Ok(Token { lexeme, kind, span: Span { head, tail } }) => println!(
|
Ok(Token { lexeme, kind, span: Span { head, tail } }) => {
|
||||||
"{kind:?}\x1b[11G {head:<4} {tail:<4} {}",
|
println!("{kind:?}\x1b[11G {head:<4} {tail:<4} {lexeme:?}")
|
||||||
lexeme.escape_debug()
|
}
|
||||||
),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use crate::{
|
|||||||
ast::*,
|
ast::*,
|
||||||
lexer::{LexError, Lexer},
|
lexer::{LexError, Lexer},
|
||||||
span::Span,
|
span::Span,
|
||||||
token::{TKind, Token},
|
token::{Lexeme, TKind, Token},
|
||||||
};
|
};
|
||||||
use std::{error::Error, fmt::Display, vec};
|
use std::{error::Error, fmt::Display, vec};
|
||||||
|
|
||||||
@@ -90,7 +90,7 @@ impl<'t> Parser<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
|
/// Consumes the currently-peeked [Token], returning its lexeme without cloning.
|
||||||
pub fn take_lexeme(&mut self) -> Option<String> {
|
pub fn take_lexeme(&mut self) -> Option<Lexeme> {
|
||||||
self.take().map(|tok| tok.lexeme)
|
self.take().map(|tok| tok.lexeme)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,19 +196,23 @@ impl<'t> Parse<'t> for Literal {
|
|||||||
TKind::Character => Literal::Char(
|
TKind::Character => Literal::Char(
|
||||||
p.take_lexeme()
|
p.take_lexeme()
|
||||||
.expect("should have Token")
|
.expect("should have Token")
|
||||||
.chars()
|
.char()
|
||||||
.next()
|
|
||||||
.expect("should have one char in char literal"),
|
.expect("should have one char in char literal"),
|
||||||
),
|
),
|
||||||
TKind::Integer => {
|
TKind::Integer => {
|
||||||
let Token { lexeme, kind: _, span } = p.take().expect("should have Token");
|
let Token { lexeme, kind: _, span } = p.take().expect("should have Token");
|
||||||
// TODO: more complex int parsing
|
// TODO: more complex int parsing
|
||||||
let int = lexeme
|
let int = lexeme
|
||||||
.parse()
|
.int()
|
||||||
.map_err(|_| ParseError::Expected(TKind::Integer, span))?;
|
.ok_or(ParseError::Expected(TKind::Integer, span))?;
|
||||||
Literal::Int(int)
|
Literal::Int(int as _)
|
||||||
}
|
}
|
||||||
TKind::String => Literal::Str(p.take_lexeme().expect("should have Token")),
|
TKind::String => Literal::Str({
|
||||||
|
let Token { lexeme, kind: _, span } = p.take().expect("should have Token");
|
||||||
|
lexeme
|
||||||
|
.string()
|
||||||
|
.ok_or(ParseError::Expected(TKind::String, span))?
|
||||||
|
}),
|
||||||
_ => Err(ParseError::Expected(TKind::Integer, tok.span))?,
|
_ => Err(ParseError::Expected(TKind::Integer, tok.span))?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -246,10 +250,14 @@ impl<'t> Parse<'t> for Pat {
|
|||||||
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
|
TKind::True | TKind::False | TKind::Character | TKind::Integer | TKind::String => {
|
||||||
Pat::Lit(p.parse(())?)
|
Pat::Lit(p.parse(())?)
|
||||||
}
|
}
|
||||||
TKind::Identifier => match tok.lexeme.as_str() {
|
TKind::Identifier => match tok.lexeme.str() {
|
||||||
"_" => p.consume().then(Pat::Ignore),
|
Some("_") => p.consume().then(Pat::Ignore),
|
||||||
_ => {
|
_ => {
|
||||||
let name = p.take_lexeme().expect("should have Token");
|
let name = p
|
||||||
|
.take_lexeme()
|
||||||
|
.expect("should have Token")
|
||||||
|
.string()
|
||||||
|
.expect("Identifier token should have String");
|
||||||
match p.peek().map(|t| t.kind)? {
|
match p.peek().map(|t| t.kind)? {
|
||||||
TKind::LParen => Pat::TupStruct(name, p.parse(PPrec::Tuple)?),
|
TKind::LParen => Pat::TupStruct(name, p.parse(PPrec::Tuple)?),
|
||||||
TKind::LCurly => Pat::Struct(
|
TKind::LCurly => Pat::Struct(
|
||||||
@@ -262,7 +270,7 @@ impl<'t> Parse<'t> for Pat {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
TKind::Grave => Pat::MetId(p.consume().next()?.lexeme),
|
TKind::Grave => Pat::MetId(p.consume().next()?.lexeme.to_string()),
|
||||||
TKind::DotDot => Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
|
TKind::DotDot => Pat::Rest(match p.consume().peek_if(TKind::Identifier) {
|
||||||
Some(_) => Some(p.parse(level)?),
|
Some(_) => Some(p.parse(level)?),
|
||||||
None => None,
|
None => None,
|
||||||
@@ -314,9 +322,14 @@ impl<'t> Parse<'t> for Ty {
|
|||||||
let tok = p.peek()?;
|
let tok = p.peek()?;
|
||||||
|
|
||||||
let head = match tok.kind {
|
let head = match tok.kind {
|
||||||
TKind::Identifier => match tok.lexeme.as_str() {
|
TKind::Identifier => match tok.lexeme.str() {
|
||||||
"_" => p.consume().then(Ty::Infer),
|
Some("_") => p.consume().then(Ty::Infer),
|
||||||
_ => Ty::Named(p.take_lexeme().expect("should have Token")),
|
_ => Ty::Named(
|
||||||
|
p.take_lexeme()
|
||||||
|
.expect("should have Token")
|
||||||
|
.string()
|
||||||
|
.expect("Identifier token should have String"),
|
||||||
|
),
|
||||||
},
|
},
|
||||||
TKind::LBrack => {
|
TKind::LBrack => {
|
||||||
let ty = p.consume().parse(level)?;
|
let ty = p.consume().parse(level)?;
|
||||||
@@ -550,7 +563,7 @@ impl<'t> Parse<'t> for Fn {
|
|||||||
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
|
||||||
match p.consume().next_if(TKind::Identifier) {
|
match p.consume().next_if(TKind::Identifier) {
|
||||||
Ok(Token { lexeme, .. }) => Ok(Self(
|
Ok(Token { lexeme, .. }) => Ok(Self(
|
||||||
Some(lexeme),
|
lexeme.string(),
|
||||||
p.parse(PPrec::Typed)?,
|
p.parse(PPrec::Typed)?,
|
||||||
p.parse(Prec::Body.next())?,
|
p.parse(Prec::Body.next())?,
|
||||||
)),
|
)),
|
||||||
@@ -603,12 +616,18 @@ impl<'t> Parse<'t> for MatchArm {
|
|||||||
impl<'t> Parse<'t> for MakeArm {
|
impl<'t> Parse<'t> for MakeArm {
|
||||||
type Prec = ();
|
type Prec = ();
|
||||||
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
|
fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
|
||||||
Ok(MakeArm(p.next_if(TKind::Identifier)?.lexeme, {
|
Ok(MakeArm(
|
||||||
|
p.next_if(TKind::Identifier)?
|
||||||
|
.lexeme
|
||||||
|
.string()
|
||||||
|
.expect("Identifier should have String"),
|
||||||
|
{
|
||||||
p.next_if(TKind::Colon)
|
p.next_if(TKind::Colon)
|
||||||
.ok()
|
.ok()
|
||||||
.map(|_| p.parse(Prec::Body.value()))
|
.map(|_| p.parse(Prec::Body.value()))
|
||||||
.transpose()?
|
.transpose()?
|
||||||
}))
|
},
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -639,8 +658,8 @@ impl<'t> Parse<'t> for Expr {
|
|||||||
Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]),
|
Ps::End if level == prec.next() => Expr::Op(Op::Tuple, vec![]),
|
||||||
Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?,
|
Ps::End => Err(ParseError::NotPrefix(tok.kind, span))?,
|
||||||
|
|
||||||
Ps::Id => Expr::Id(p.take_lexeme().expect("should have ident")),
|
Ps::Id => Expr::Id(p.take_lexeme().expect("should have ident").to_string()),
|
||||||
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme),
|
Ps::Mid => Expr::MetId(p.consume().next()?.lexeme.to_string()),
|
||||||
Ps::Lit => Expr::Lit(p.parse(())?),
|
Ps::Lit => Expr::Lit(p.parse(())?),
|
||||||
Ps::Let => Expr::Let(p.parse(())?),
|
Ps::Let => Expr::Let(p.parse(())?),
|
||||||
Ps::Const => Expr::Const(p.parse(())?),
|
Ps::Const => Expr::Const(p.parse(())?),
|
||||||
|
|||||||
47
src/token.rs
47
src/token.rs
@@ -4,11 +4,55 @@ use crate::span::Span;
|
|||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
pub lexeme: String,
|
pub lexeme: Lexeme,
|
||||||
pub kind: TKind,
|
pub kind: TKind,
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum Lexeme {
|
||||||
|
String(String),
|
||||||
|
Integer(u128, u32),
|
||||||
|
Char(char),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Lexeme {
|
||||||
|
pub fn string(self) -> Option<String> {
|
||||||
|
match self {
|
||||||
|
Self::String(s) => Some(s),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn str(&self) -> Option<&str> {
|
||||||
|
match self {
|
||||||
|
Self::String(s) => Some(s),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn int(&self) -> Option<u128> {
|
||||||
|
match self {
|
||||||
|
Self::Integer(i, _) => Some(*i),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn char(&self) -> Option<char> {
|
||||||
|
match self {
|
||||||
|
Self::Char(c) => Some(*c),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Lexeme {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::String(v) => v.fmt(f),
|
||||||
|
Self::Integer(v, _) => v.fmt(f),
|
||||||
|
Self::Char(v) => v.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
pub enum TKind {
|
pub enum TKind {
|
||||||
Comment,
|
Comment,
|
||||||
@@ -19,6 +63,7 @@ pub enum TKind {
|
|||||||
Else,
|
Else,
|
||||||
False,
|
False,
|
||||||
Fn,
|
Fn,
|
||||||
|
For,
|
||||||
If,
|
If,
|
||||||
Let,
|
Let,
|
||||||
Loop,
|
Loop,
|
||||||
|
|||||||
Reference in New Issue
Block a user