// © 2023-2024 John Breaux //See LICENSE.md for license //! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast) pub mod ast; use self::error::{ Error, ErrorKind::{self, *}, PResult, Parsing, }; use crate::{ lexer::{ token::{Reg, Special, Token, TokenKind as Kind}, Lexer, }, preprocessor::Preprocessor, span::Span, }; use ast::*; #[derive(Clone, Debug)] pub struct Parser<'t> { lexer: Preprocessor<'t>, next: Option>, loc: Span, } impl<'t> Parser<'t> { /// Creates a new [Parser] pub fn new(text: &'t str) -> Self { let lexer = Preprocessor::new(text); Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer } } /// Createes a new [Parser] from an existing [Lexer] pub fn with_lexer(lexer: Lexer<'t>) -> Self { let lexer = Preprocessor::with_lexer(lexer); Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer } } pub fn parse>(&mut self) -> PResult { Parsable::parse_with(self) } pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error { Error { parsing, kind, loc: self.loc } } /// Peek a token out of the lexer pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> { if self.next.is_none() { self.next = self.lexer.scan(); } self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p)) } pub fn next(&mut self, p: Parsing) -> PResult> { Ok(match self.take() { Some(token) => token, None => { self.peek(p)?; self.take().expect("should have been populated by peek") } }) } /// Consumes the next token pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> { match self.peek(p)?.kind { kind if kind == expect => { self.take(); Ok(self) } kind => Err(self.error(Unexpected(kind), p)), } } /// Consumes the next token without checking it pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> { self.next(p)?; Ok(self) } /// Take the last peeked token pub fn take(&mut self) -> Option> { self.next.take() } } // Expressions impl<'t> Parser<'t> { /// Parses an expression pub fn expr(&mut self) -> PResult> { self.term() } /// Parses a term-expression (binary `*`mul, `/`div, `%`rem) pub fn term(&mut self) -> PResult> { let p = Parsing::Expr; let a = self.factor()?; let mut other = vec![]; loop { match self.peek(p)?.kind { Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)), Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)), Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)), _ if other.is_empty() => break Ok(a), _ => break Ok(Expr::Binary(a.into(), other)), } } } /// Parses a factor expression (binary `+`add, `-`sub) pub fn factor(&mut self) -> PResult> { let p = Parsing::Expr; let a = self.shift()?; let mut other = vec![]; loop { match self.peek(p)?.kind { Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)), Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)), _ if other.is_empty() => break Ok(a), _ => break Ok(Expr::Binary(a.into(), other)), } } } /// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right) pub fn shift(&mut self) -> PResult> { let p = Parsing::Expr; let a = self.bin()?; let mut other = vec![]; loop { match self.peek(p)?.kind { Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)), Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)), _ if other.is_empty() => break Ok(a), _ => break Ok(Expr::Binary(a.into(), other)), } } } pub fn bin(&mut self) -> PResult> { let p = Parsing::Expr; let a = self.unary()?; let mut other = vec![]; loop { match self.peek(p)?.kind { Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)), Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)), Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)), _ if other.is_empty() => break Ok(a), _ => break Ok(Expr::Binary(a.into(), other)), } } } /// Parses a unary expression (`!`invert, `-`negate) pub fn unary(&mut self) -> PResult> { let p = Parsing::Expr; let mut ops = vec![]; loop { match self.peek(p)?.kind { Kind::Star => ops.push(UnOp::Deref), Kind::Minus => ops.push(UnOp::Neg), Kind::Bang => ops.push(UnOp::Not), _ if ops.is_empty() => break Ok(self.primary()?), _ => break Ok(Expr::Unary(ops, self.primary()?.into())), } self.take(); } } /// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier pub fn primary(&mut self) -> PResult> { let p = Parsing::Expr; let Token { lexeme, kind, .. } = *self.peek(p)?; Ok(match kind { Kind::OpenParen => { let out = Expr::Group(self.then(p)?.parse()?); self.assert(Kind::CloseParen, p)?; out } Kind::Number(n, _) => { self.take(); Expr::Number(n) } Kind::Identifier => { self.take(); Expr::Ident(lexeme) } Kind::Amp => self.then(p)?.addrof()?, ty => Err(self.error(NonNumeric(ty), p))?, }) } pub fn addrof(&mut self) -> PResult> { let p = Parsing::Expr; let token = self.peek(p)?; let out = match token.kind { Kind::Identifier => Expr::AddrOf(token.lexeme), Kind::Number(n, _) => Expr::Number(n), ty => Err(self.error(Unexpected(ty), p))?, }; self.take(); Ok(out) } } pub trait Parsable<'t>: Sized { fn parse(text: &'t str) -> PResult { Self::parse_with(&mut Parser::new(text)) } fn parse_with(p: &mut Parser<'t>) -> PResult; } impl<'t> Parsable<'t> for Statements<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let mut stmts = vec![]; while p.peek(Parsing::File)?.kind != Kind::Eof { stmts.push(p.parse()?) } Ok(Self { stmts }) } } impl<'t> Parsable<'t> for Statement<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let token = *p.peek(Parsing::Stmt)?; Ok(match token.kind { Kind::Comment => { p.take(); Statement::Comment(token.lexeme) } Kind::Directive => Statement::Directive(p.parse()?), Kind::Identifier => Statement::Label(p.label()?), _ => Statement::Insn(p.parse()?), }) } } impl<'t> Parsable<'t> for Directive<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Directive; let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?; let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) }; p.take(); Ok(match lexeme { ".define" => Directive::Define(p.parse()?), ".org" => Directive::Org(p.expr()?.into()), ".word" => Directive::Word(p.parse()?), ".words" => Directive::Words(p.parse()?), ".string" => Directive::String(p.string()?), _ => Err(p.error(Unexpected(Kind::Directive), parsing))?, }) } } impl<'t> Parsable<'t> for Vec> { fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Directive; let mut tokens = vec![]; loop { if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind { break; } tokens.push(p.next(parsing)?) } p.take(); Ok(tokens) } } impl<'t> Parsable<'t> for Instruction<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let start = p.peek(Parsing::Instruction)?.pos.start; Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } }) } } impl<'t> Parsable<'t> for InstructionKind<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { use crate::lexer::token::OneArg; // an instruction starts with an opcode Ok(match p.peek(Parsing::Instruction)?.kind() { Kind::NoEm(_) => Self::NoEm(p.parse()?), Kind::OneEm(_) => Self::OneEm(p.parse()?), Kind::Special(Special::Br) => Self::Br(p.parse()?), Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?), Kind::OneArg(_) => Self::OneArg(p.parse()?), Kind::TwoArg(_) => Self::TwoArg(p.parse()?), Kind::Jump(_) => Self::Jump(p.parse()?), ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?, }) } } impl<'t> Parsable<'t> for NoEm { fn parse_with(p: &mut Parser<'t>) -> PResult { match p.next(Parsing::NoEm)?.kind { Kind::NoEm(opcode) => Ok(Self { opcode }), ty => Err(p.error(Unexpected(ty), Parsing::NoEm)), } } } impl<'t> Parsable<'t> for OneEm<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { Ok(Self { opcode: match p.next(Parsing::OneEm)?.kind { Kind::OneEm(opcode) => opcode, ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?, }, width: p.parse()?, dst: p.parse()?, }) } } impl<'t> Parsable<'t> for OneArg<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { Ok(Self { opcode: match p.next(Parsing::OneArg)?.kind { Kind::OneArg(opcode) => opcode, ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?, }, width: p.parse()?, src: p.parse()?, }) } } impl<'t> Parsable<'t> for TwoArg<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::TwoArg; Ok(Self { opcode: match p.next(parsing)?.kind { Kind::TwoArg(opcode) => opcode, ty => Err(p.error(Unexpected(ty), parsing))?, }, width: p.parse()?, src: p.parse()?, dst: p.assert(Kind::Comma, parsing)?.parse()?, }) } } impl<'t> Parsable<'t> for Jump<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Jump; Ok(Self { opcode: match p.next(parsing)?.kind { Kind::Jump(opcode) => opcode, ty => Err(p.error(Unexpected(ty), parsing))?, }, dst: p.parse()?, }) } } impl<'t> Parsable<'t> for Reti { fn parse_with(p: &mut Parser<'t>) -> PResult { use crate::lexer::token::OneArg; p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?; Ok(Reti) } } impl<'t> Parsable<'t> for Br<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { p.assert(Kind::Special(Special::Br), Parsing::Br)?; Ok(Self { src: p.parse()? }) } } impl<'t> Parsable<'t> for Src<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Src; Ok(match p.peek(parsing)?.kind { Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?), // &addr Kind::At => { let reg = match p.then(parsing)?.next(parsing)?.kind { Kind::Reg(r) => r, ty => Err(p.error(Unexpected(ty), parsing))?, }; if let Kind::Plus = p.peek(parsing)?.kind { p.take(); Src::PostInc(reg) } else { Src::Indirect(reg) } } // @reg+, @reg Kind::Reg(_) => Src::Direct(p.parse()?), _ => { let expr = p.parse()?; match p.peek(parsing)?.kind { Kind::OpenParen => Src::Indexed(expr, { let reg = p.assert(Kind::OpenParen, parsing)?.reg()?; p.assert(Kind::CloseParen, parsing)?; reg }), _ => Src::BareExpr(expr), } } }) } } impl<'t> Parsable<'t> for Dst<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Dst; Ok(match p.peek(parsing)?.kind { Kind::Hash => match p.then(parsing)?.next(parsing)?.kind { Kind::Number(0, _) => Dst::Special(DstSpecial::Zero), Kind::Number(1, _) => Dst::Special(DstSpecial::One), Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?, ty => Err(p.error(Unexpected(ty), parsing))?, }, Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?), Kind::Reg(_) => Dst::Direct(p.parse()?), _ => Dst::Indexed(p.expr()?.into(), { let reg = p.assert(Kind::OpenParen, parsing)?.reg()?; p.assert(Kind::CloseParen, parsing)?; reg }), }) } } impl<'t> Parsable<'t> for JumpDst<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Jump; let mut neg = false; let out = loop { let token = p.peek(parsing)?; match token.kind { Kind::Minus => { neg = !neg; } Kind::Plus => {} Kind::Identifier => break Self::Label(token.lexeme), Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }), ty => Err(p.error(Unexpected(ty), parsing))?, } p.take(); }; p.take(); Ok(out) } } impl<'t> Parsable<'t> for Width { fn parse_with(p: &mut Parser<'t>) -> PResult { let out = match p.peek(Parsing::Width)?.kind() { Kind::Byte => Width::Byte, Kind::Word => Width::Word, _ => return Ok(Width::Word), }; p.take(); Ok(out) } } impl<'t> Parsable<'t> for Reg { fn parse_with(p: &mut Parser<'t>) -> PResult { let out = match p.peek(Parsing::Reg)?.kind { Kind::Reg(r) => r, ty => Err(p.error(Unexpected(ty), Parsing::Reg))?, }; p.take(); Ok(out) } } impl<'t> Parsable<'t> for Expr<'t> { fn parse_with(p: &mut Parser<'t>) -> PResult { p.expr() } } impl<'t, T: Parsable<'t>> Parsable<'t> for Box { fn parse_with(p: &mut Parser<'t>) -> PResult { Ok(Box::new(p.parse()?)) } } impl<'t, T: Parsable<'t>> Parsable<'t> for Vec { fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Vec; p.assert(Kind::OpenBrace, parsing)?; let mut out = vec![]; while Kind::CloseBrace != p.peek(parsing)?.kind { out.push(p.parse()?) } p.assert(Kind::CloseBrace, parsing)?; Ok(out) } } /// Context-sensitive parsing rules impl<'t> Parser<'t> { pub fn string(&mut self) -> PResult<&'t str> { let token = *self.peek(Parsing::Directive)?; match token.kind { Kind::String => { self.take(); Ok(&token.lexeme[1..token.lexeme.len() - 1]) } ty => Err(self.error(Unexpected(ty), Parsing::Directive)), } } pub fn label(&mut self) -> PResult<&'t str> { let p = Parsing::Label; let token = self.next(p)?; assert_eq!(Kind::Identifier, token.kind); self.assert(Kind::Colon, p)?; Ok(token.lexeme) } pub fn reg(&mut self) -> PResult { match self.peek(Parsing::Reg)?.kind { Kind::Reg(r) => { self.take(); Ok(r) } ty => Err(self.error(Unexpected(ty), Parsing::Reg)), } } } pub mod error { use super::Kind; use crate::span::Span; use std::{fmt::Display, num::TryFromIntError}; pub type PResult = Result; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Error { pub parsing: Parsing, pub kind: ErrorKind, pub loc: Span, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ErrorKind { LexError, /// Returned when [Parsing::Expr] fails without consuming NotExpr, DivZero, NonNumeric(Kind), BadIntForDst(u16), TryFromIntError(TryFromIntError), Unexpected(Kind), BufEmpty, Todo, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Parsing { File, Stmt, Label, Directive, Instruction, NoEm, OneEm, Reti, Br, OneArg, TwoArg, Jump, Width, Src, Dst, Reg, Expr, Vec, } impl Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing) } } impl Display for ErrorKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ErrorKind::LexError => write!(f, "lexical error"), ErrorKind::TryFromIntError(e) => write!(f, "{e}"), ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"), ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"), ErrorKind::DivZero => write!(f, "Division by zero"), ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"), ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"), ErrorKind::BufEmpty => write!(f, "Peek buffer empty"), ErrorKind::Todo => write!(f, "Not yet implemented"), } } } impl Display for Parsing { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Parsing::File => "a file".fmt(f), Parsing::Stmt => "a line".fmt(f), Parsing::Label => "a label".fmt(f), Parsing::Directive => "a directive".fmt(f), Parsing::Instruction => "an instruction".fmt(f), Parsing::NoEm => "a no-operand emulated instruction".fmt(f), Parsing::OneEm => "a one-operand emulated instruction".fmt(f), Parsing::Reti => "a `reti` instruction".fmt(f), Parsing::Br => "a `br` instruction".fmt(f), Parsing::OneArg => "a one-operand instruction".fmt(f), Parsing::TwoArg => "a two-operand instruction".fmt(f), Parsing::Jump => "a jump instruction".fmt(f), Parsing::Width => "an instruction width".fmt(f), Parsing::Src => "a source".fmt(f), Parsing::Dst => "a destination".fmt(f), Parsing::Reg => "a register".fmt(f), Parsing::Expr => "a constant expression".fmt(f), Parsing::Vec => "a list".fmt(f), } } } impl std::error::Error for Error {} } #[cfg(test)] mod tests;