From b89ed307a2e9dd02b3449491a0f9ee2aea088a10 Mon Sep 17 00:00:00 2001 From: John Date: Mon, 16 Oct 2023 22:50:15 -0500 Subject: [PATCH] parser: Add recursive descent parser for AST. TODO: Error recovery and synchronization. --- libconlang/src/lib.rs | 404 +++++++++++++++++++++++++++++++++++++++++- readme.md | 4 +- 2 files changed, 405 insertions(+), 3 deletions(-) diff --git a/libconlang/src/lib.rs b/libconlang/src/lib.rs index 25ebeac..294a6f0 100644 --- a/libconlang/src/lib.rs +++ b/libconlang/src/lib.rs @@ -1528,7 +1528,409 @@ pub mod lexer { } pub mod parser { - //! Parses tokens into an AST + //! Parses [tokens](super::token) into an [AST](super::ast) + use super::{ + ast::preamble::*, + lexer::Lexer, + token::{Keyword, Token, Type}, + }; + use error::{Error, *}; + + mod error { + use super::{Token, Type}; + + #[derive(Clone, Debug, Default, PartialEq, Eq)] + pub enum Reason { + Expected(Type), + NotIdentifier, + NotLiteral, + NotString, + NotBool, + NotFloat, + FloatExponentOverflow, + FloatMantissaOverflow, + NotInt, + IntOverflow, + NotControlFlow, + NotBranch, + EndOfFile, + #[default] + Unspecified, + } + use Reason::*; + /// [Parser] [Result] + pub type PResult = Result; + #[derive(Clone, Debug, Default, PartialEq, Eq)] + pub struct Error { + reason: Reason, + start: Option, + } + macro error_impl($($fn:ident$(($($p:ident: $t:ty),*))?: $reason:expr),*$(,)?) {$( + /// Creates an [Error] with this [Reason]: + #[doc = concat!("[`", stringify!($reason), "`]")] + pub fn $fn($($($p : $t),*)?) -> Self { + Self { reason: $reason$(($($p)*))?, start: None } + } + )*} + impl Error { + pub fn token(self, start: Token) -> Self { + Self { start: Some(start), ..self } + } + pub fn start(&self) -> Option { + self.start + } + pub fn reason(self, reason: Reason) -> Self { + Self { reason, ..self } + } + error_impl! { + expected(e: Type): Expected, + not_identifier: NotIdentifier, + not_literal: NotLiteral, + not_string: NotString, + not_bool: NotBool, + not_float: NotFloat, + float_exponent_overflow: FloatExponentOverflow, + float_mantissa_overflow: FloatMantissaOverflow, + not_int: NotInt, + int_overflow: IntOverflow, + not_control_flow: NotControlFlow, + not_branch: NotBranch, + end_of_file: EndOfFile, + unspecified: Unspecified, + } + } + } + + /// The Parser performs recursive descent on the AST's grammar + /// using a provided [Lexer]. + pub struct Parser<'t> { + tokens: Vec, + panic_stack: Vec, + text: &'t str, + curr: usize, + } + impl<'t> From> for Parser<'t> { + fn from(value: Lexer<'t>) -> Self { + let (tokens, text) = value.consume(); + Self::new(tokens, text) + } + } + + impl<'t> Parser<'t> { + /// Create a new [Parser] from a list of [Tokens][1] + /// and the [text](str) used to generate that list + /// (as [Tokens][1] do not store their strings) + /// + /// [1]: Token + pub fn new(tokens: Vec, text: &'t str) -> Self { + Self { tokens, text, panic_stack: vec![], curr: 0 } + } + /// Consumes any consecutive comments + fn consume_comments(&mut self) -> &mut Self { + while let Some(Type::Comment) = self.peek().map(|t| t.ty()) { + self.curr += 1; + } + self + } + /// Consume the current token + #[inline] + pub fn consume(&mut self) -> &mut Self { + self.curr += 1; + self.consume_comments(); + self + } + /// Peek at the current token + pub fn peek(&self) -> Option<&Token> { + self.tokens.get(self.curr) + } + /// Look ahead `n` tokens + pub fn ahead(&self, n: usize) -> Option<&Token> { + self.tokens.get(self.curr.wrapping_add(n)) + } + /// Look behind `n` tokens + pub fn behind(&self, n: usize) -> Option<&Token> { + self.tokens.get(self.curr.wrapping_sub(n)) + } + /// Records the current position on the panic stack + pub fn mark(&mut self) -> &mut Self { + self.panic_stack.push(self.curr); + self + } + /// Erases a recorded position from the panic stack + pub fn unmark(&mut self) -> &mut Self { + self.panic_stack.pop(); + self + } + /// Unwinds the panic stack one step + pub fn unwind(&mut self) -> Option { + let out = self.panic_stack.pop(); + if let Some(v) = out { + self.curr = v; + } + out + } + /// Parse the [start of an AST](Start) + pub fn parse(&mut self) -> PResult { + self.consume_comments(); + Ok(Start(self.expr()?)) + } + } + /// Helpers + impl<'t> Parser<'t> { + fn consume_type(&mut self, t: Type) -> PResult<&mut Self> { + self.matches(t)?; + Ok(self.consume()) + } + fn check_eof(&mut self) -> PResult<&mut Self> { + if self.curr < self.tokens.len() { + Ok(self) + } else { + Err(Error::end_of_file()) + } + } + fn todo_error(&mut self, l: u32, c: u32, s: &str) -> Error { + eprintln!("TODO: {s}:{l}:{c}"); + Error::unspecified().token(*self.peek().unwrap()) + } + fn matches(&mut self, e: Type) -> PResult<&Token> { + let t = self.check_eof()?.peek().expect("self should not be eof"); + if t.ty() != e { + Err(Error::expected(e).token(*t))? + } + Ok(t) + } + fn keyword(&mut self, keyword: Keyword) -> PResult<&mut Self> { + self.consume_type(Type::Keyword(keyword)) + } + fn delimited(&mut self, lhs: Type, mid: F, rhs: Type) -> PResult + where F: Fn(&mut Self) -> PResult { + self.consume_type(lhs)?; + let out = mid(self); + self.consume_type(rhs)?; + out + } + } + macro ptodo_err($self:expr $(, $t:expr)*) { + $($t;)* + $self.todo_error(line!(), column!(), file!()) + } + macro ptodo($self:expr $(, $t:expr)*) { + $($t;)* + Err(ptodo_err!($self)) + } + fn check_eof(t: Option<&Token>) -> PResult<&Token> { + t.ok_or(Error::end_of_file()) + } + + /// # Terminals and Pseudo-Terminals + impl<'t> Parser<'t> { + pub fn identifier(&mut self) -> PResult { + let range = self.matches(Type::Identifier)?.range(); + Ok(Identifier(self.consume().text[range].into())) + } + pub fn literal(&mut self) -> PResult { + use literal::Literal::*; + use Keyword::{False, True}; + let tok = check_eof(self.peek())?; + match tok.ty() { + Type::Float => self.float().map(Float), + Type::Integer => self.int().map(Int), + Type::String => self.string().map(String), + Type::Character => self.char().map(Char), + Type::Keyword(True | False) => self.bool().map(Bool), + _ => Err(Error::not_literal().token(*tok)), + } + } + pub fn float(&mut self) -> PResult { + ptodo!(self) + } + pub fn int(&mut self) -> PResult { + #[cfg(debug_assertions)] + eprintln!("/* TODO: parse integer literals from other bases */"); + let token = *self.matches(Type::Integer)?; + self.consume().text[token.range()] + .parse() + .map_err(|_| Error::not_int().token(token)) + } + pub fn string(&mut self) -> PResult { + let range = self.matches(Type::String)?.range(); + Ok(self.consume().text[range].into()) + } + pub fn char(&mut self) -> PResult { + ptodo!(self) + } + pub fn bool(&mut self) -> PResult { + use Keyword::{False, True}; + let token = check_eof(self.peek())?; + let out = match token.ty() { + Type::Keyword(False) => false, + Type::Keyword(True) => true, + _ => Err(Error::not_bool().token(*token))?, + }; + self.consume(); + Ok(out) + } + } + /// Expressions + impl<'t> Parser<'t> { + pub fn expr(&mut self) -> PResult { + use expression::Expr; + self.flow() + .map(Expr::Flow) + .or_else(|_| self.ignore().map(Expr::Ignore)) + } + pub fn block(&mut self) -> PResult { + self.delimited(Type::LCurly, Parser::expr, Type::RCurly) + .map(|e| expression::Block { expr: Box::new(e) }) + } + pub fn group(&mut self) -> PResult { + self.delimited(Type::LParen, Parser::expr, Type::RParen) + .map(|e| expression::Group { expr: Box::new(e) }) + } + pub fn r#final(&mut self) -> PResult { + use expression::Final; + self.identifier() + .map(Final::Identifier) + .or_else(|_| self.literal().map(Final::Literal)) + .or_else(|_| self.block().map(Final::Block)) + .or_else(|_| self.group().map(Final::Group)) + .or_else(|_| self.branch().map(Final::Branch)) + } + } + + /// Helper macro for math parsing subexpressions with production + /// ```ebnf + /// Ret = a (b a)* + /// ``` + /// # Examples + /// ```rust,ignore + /// math_impl!{ + /// function_name: ret::Value = parse_operands, parse_operators; + /// } + /// ``` + /// becomes + /// ```rust,ignore + /// pub fn function_name(&mut self) -> PResult { ... } + /// ``` + macro math_impl ($($f: ident: $Ret:path = $a:ident, $b:ident);*$(;)?) {$( + pub fn $f (&mut self) -> PResult<$Ret> { + let (first, mut others) = (self.$a()?, vec![]); + while let Some(op) = self.$b() { + others.push((op, self.$a()?)); + } + Ok($Ret(first, others)) + } + )*} + /// # [Arithmetic and Logical Subexpressions](math) + impl<'t> Parser<'t> { + math_impl! { + //name returns operands operators + ignore: math::Ignore = assign, ignore_op; + assign: math::Assign = compare, assign_op; + compare: math::Compare = logic, compare_op; + logic: math::Logic = bitwise, logic_op; + bitwise: math::Bitwise = shift, bitwise_op; + shift: math::Shift = term, shift_op; + term: math::Term = factor, term_op; + factor: math::Factor = unary, factor_op; + } + pub fn unary(&mut self) -> PResult { + let mut ops = vec![]; + while let Some(op) = self.unary_op() { + ops.push(op) + } + Ok(math::Unary(ops, self.r#final()?)) + } + } + macro operator_impl($($(#[$m:meta])*$f:ident: $Ret:ty),*$(,)*) {$( + $(#[$m])* pub fn $f(&mut self) -> Option<$Ret> { + let out: Option<$Ret> = self.peek()?.ty().into(); + if out.is_some() { self.consume(); } + out + } + )*} + /// # [Operators](operator) + impl<'t> Parser<'t> { + operator_impl! { + ignore_op: operator::Ignore, + compare_op: operator::Compare, + assign_op: operator::Assign, + logic_op: operator::Logic, + bitwise_op: operator::Bitwise, + shift_op: operator::Shift, + term_op: operator::Term, + factor_op: operator::Factor, + unary_op: operator::Unary, + } + } + /// # [Control Flow](control) + impl<'t> Parser<'t> { + pub fn branch(&mut self) -> PResult { + use control::Branch; + use Keyword::{For, If, While}; + let token = check_eof(self.peek())?; + match token.ty() { + Type::Keyword(While) => self.parse_while().map(Branch::While), + Type::Keyword(For) => self.parse_for().map(Branch::For), + Type::Keyword(If) => self.parse_if().map(Branch::If), + _ => Err(Error::not_branch().token(*token)), + } + } + pub fn parse_if(&mut self) -> PResult { + self.consume_type(Type::Keyword(Keyword::If))?; + Ok(control::If { + cond: self.expr()?.into(), + body: self.block()?, + else_: self.parse_else()?, + }) + } + pub fn parse_while(&mut self) -> PResult { + self.consume_type(Type::Keyword(Keyword::While))?; + Ok(control::While { + cond: self.expr()?.into(), + body: self.block()?, + else_: self.parse_else()?, + }) + } + pub fn parse_for(&mut self) -> PResult { + self.keyword(Keyword::For)?; + Ok(control::For { + var: self.identifier()?, + iter: { self.keyword(Keyword::In)?.expr()?.into() }, + body: self.block()?, + else_: self.parse_else()?, + }) + } + pub fn parse_else(&mut self) -> PResult> { + // it's fine for `else` to be missing entirely + match self.keyword(Keyword::Else) { + Ok(_) => Ok(Some(control::Else { block: self.block()? })), + Err(_) => Ok(None), + } + } + pub fn flow(&mut self) -> PResult { + use control::Flow; + use Keyword::{Break, Continue, Return}; + let token = check_eof(self.peek())?; + match token.ty() { + Type::Keyword(Break) => self.parse_break().map(Flow::Break), + Type::Keyword(Return) => self.parse_return().map(Flow::Return), + Type::Keyword(Continue) => self.parse_continue().map(Flow::Continue), + _ => Err(Error::not_control_flow().token(*token)), + } + } + pub fn parse_break(&mut self) -> PResult { + Ok(control::Break { expr: self.keyword(Keyword::Break)?.expr()?.into() }) + } + pub fn parse_return(&mut self) -> PResult { + Ok(control::Return { expr: self.keyword(Keyword::Return)?.expr()?.into() }) + } + pub fn parse_continue(&mut self) -> PResult { + ptodo!(self) + } + } +} + } pub mod interpreter { diff --git a/readme.md b/readme.md index ee6aedd..ab69a6c 100644 --- a/readme.md +++ b/readme.md @@ -8,8 +8,8 @@ Friday each month. - [x] Decide on a minimal set of keywords and operators to support - [x] Lex an entire Rust source file (minus generics, paths, and lifetimes) - [x] Write expression grammar -- [ ] Write AST for expression grammar -- [ ] Write parser for AST +- [x] Write AST for expression grammar +- [x] Write parser for AST - [ ] Create tests for parser (and AST) - [ ] Parse `dummy.cl` into a valid AST - [ ] Pretty printer, for debugging