msp430-repl/src/parser.rs

599 lines
20 KiB
Rust

// © 2023-2024 John Breaux
//See LICENSE.md for license
//! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast)
pub mod ast;
use self::error::{
Error,
ErrorKind::{self, *},
PResult, Parsing,
};
use crate::{
lexer::{
token::{Reg, Special, Token, TokenKind as Kind},
Lexer,
},
preprocessor::Preprocessor,
span::Span,
};
use ast::*;
#[derive(Clone, Debug)]
pub struct Parser<'t> {
lexer: Preprocessor<'t>,
next: Option<Token<'t>>,
loc: Span<usize>,
}
impl<'t> Parser<'t> {
/// Creates a new [Parser]
pub fn new(text: &'t str) -> Self {
let lexer = Preprocessor::new(text);
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
}
/// Createes a new [Parser] from an existing [Lexer]
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
let lexer = Preprocessor::with_lexer(lexer);
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
}
pub fn parse<T: Parsable<'t>>(&mut self) -> PResult<T> {
Parsable::parse_with(self)
}
pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error {
Error { parsing, kind, loc: self.loc }
}
/// Peek a token out of the lexer
pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> {
if self.next.is_none() {
self.next = self.lexer.scan();
}
self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p))
}
pub fn next(&mut self, p: Parsing) -> PResult<Token<'t>> {
Ok(match self.take() {
Some(token) => token,
None => {
self.peek(p)?;
self.take().expect("should have been populated by peek")
}
})
}
/// Consumes the next token
pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> {
match self.peek(p)?.kind {
kind if kind == expect => {
self.take();
Ok(self)
}
kind => Err(self.error(Unexpected(kind), p)),
}
}
/// Consumes the next token without checking it
pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> {
self.next(p)?;
Ok(self)
}
/// Take the last peeked token
pub fn take(&mut self) -> Option<Token<'t>> {
self.next.take()
}
}
// Expressions
impl<'t> Parser<'t> {
/// Parses an expression
pub fn expr(&mut self) -> PResult<Expr<'t>> {
self.term()
}
/// Parses a term-expression (binary `*`mul, `/`div, `%`rem)
pub fn term(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.factor()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)),
Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)),
Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a factor expression (binary `+`add, `-`sub)
pub fn factor(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.shift()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)),
Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right)
pub fn shift(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.bin()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)),
Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
pub fn bin(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.unary()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)),
Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)),
Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a unary expression (`!`invert, `-`negate)
pub fn unary(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let mut ops = vec![];
loop {
match self.peek(p)?.kind {
Kind::Star => ops.push(UnOp::Deref),
Kind::Minus => ops.push(UnOp::Neg),
Kind::Bang => ops.push(UnOp::Not),
_ if ops.is_empty() => break Ok(self.primary()?),
_ => break Ok(Expr::Unary(ops, self.primary()?.into())),
}
self.take();
}
}
/// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier
pub fn primary(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let Token { lexeme, kind, .. } = *self.peek(p)?;
Ok(match kind {
Kind::OpenParen => {
let out = Expr::Group(self.then(p)?.parse()?);
self.assert(Kind::CloseParen, p)?;
out
}
Kind::Number(n, _) => {
self.take();
Expr::Number(n)
}
Kind::Identifier => {
self.take();
Expr::Ident(lexeme)
}
Kind::Amp => self.then(p)?.addrof()?,
ty => Err(self.error(NonNumeric(ty), p))?,
})
}
pub fn addrof(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let token = self.peek(p)?;
let out = match token.kind {
Kind::Identifier => Expr::AddrOf(token.lexeme),
Kind::Number(n, _) => Expr::Number(n),
ty => Err(self.error(Unexpected(ty), p))?,
};
self.take();
Ok(out)
}
}
pub trait Parsable<'t>: Sized {
fn parse(text: &'t str) -> PResult<Self> {
Self::parse_with(&mut Parser::new(text))
}
fn parse_with(p: &mut Parser<'t>) -> PResult<Self>;
}
impl<'t> Parsable<'t> for Statements<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let mut stmts = vec![];
while p.peek(Parsing::File)?.kind != Kind::Eof {
stmts.push(p.parse()?)
}
Ok(Self { stmts })
}
}
impl<'t> Parsable<'t> for Statement<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let token = *p.peek(Parsing::Stmt)?;
Ok(match token.kind {
Kind::Comment => {
p.take();
Statement::Comment(token.lexeme)
}
Kind::Directive => Statement::Directive(p.parse()?),
Kind::Identifier => Statement::Label(p.label()?),
_ => Statement::Insn(p.parse()?),
})
}
}
impl<'t> Parsable<'t> for Directive<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Directive;
let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?;
let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) };
p.take();
Ok(match lexeme {
".define" => Directive::Define(p.parse()?),
".org" => Directive::Org(p.expr()?.into()),
".word" => Directive::Word(p.parse()?),
".words" => Directive::Words(p.parse()?),
".string" => Directive::String(p.string()?),
_ => Err(p.error(Unexpected(Kind::Directive), parsing))?,
})
}
}
impl<'t> Parsable<'t> for Vec<Token<'t>> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Directive;
let mut tokens = vec![];
loop {
if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind {
break;
}
tokens.push(p.next(parsing)?)
}
p.take();
Ok(tokens)
}
}
impl<'t> Parsable<'t> for Instruction<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let start = p.peek(Parsing::Instruction)?.pos.start;
Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } })
}
}
impl<'t> Parsable<'t> for InstructionKind<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
use crate::lexer::token::OneArg;
// an instruction starts with an opcode
Ok(match p.peek(Parsing::Instruction)?.kind() {
Kind::NoEm(_) => Self::NoEm(p.parse()?),
Kind::OneEm(_) => Self::OneEm(p.parse()?),
Kind::Special(Special::Br) => Self::Br(p.parse()?),
Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?),
Kind::OneArg(_) => Self::OneArg(p.parse()?),
Kind::TwoArg(_) => Self::TwoArg(p.parse()?),
Kind::Jump(_) => Self::Jump(p.parse()?),
ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?,
})
}
}
impl<'t> Parsable<'t> for NoEm {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
match p.next(Parsing::NoEm)?.kind {
Kind::NoEm(opcode) => Ok(Self { opcode }),
ty => Err(p.error(Unexpected(ty), Parsing::NoEm)),
}
}
}
impl<'t> Parsable<'t> for OneEm<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Self {
opcode: match p.next(Parsing::OneEm)?.kind {
Kind::OneEm(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?,
},
width: p.parse()?,
dst: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for OneArg<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Self {
opcode: match p.next(Parsing::OneArg)?.kind {
Kind::OneArg(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?,
},
width: p.parse()?,
src: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for TwoArg<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::TwoArg;
Ok(Self {
opcode: match p.next(parsing)?.kind {
Kind::TwoArg(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
width: p.parse()?,
src: p.parse()?,
dst: p.assert(Kind::Comma, parsing)?.parse()?,
})
}
}
impl<'t> Parsable<'t> for Jump<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Jump;
Ok(Self {
opcode: match p.next(parsing)?.kind {
Kind::Jump(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
dst: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for Reti {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
use crate::lexer::token::OneArg;
p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?;
Ok(Reti)
}
}
impl<'t> Parsable<'t> for Br<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
p.assert(Kind::Special(Special::Br), Parsing::Br)?;
Ok(Self { src: p.parse()? })
}
}
impl<'t> Parsable<'t> for Src<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Src;
Ok(match p.peek(parsing)?.kind {
Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special
Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?), // &addr
Kind::At => {
let reg = match p.then(parsing)?.next(parsing)?.kind {
Kind::Reg(r) => r,
ty => Err(p.error(Unexpected(ty), parsing))?,
};
if let Kind::Plus = p.peek(parsing)?.kind {
p.take();
Src::PostInc(reg)
} else {
Src::Indirect(reg)
}
} // @reg+, @reg
Kind::Reg(_) => Src::Direct(p.parse()?),
_ => {
let expr = p.parse()?;
match p.peek(parsing)?.kind {
Kind::OpenParen => Src::Indexed(expr, {
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
p.assert(Kind::CloseParen, parsing)?;
reg
}),
_ => Src::BareExpr(expr),
}
}
})
}
}
impl<'t> Parsable<'t> for Dst<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Dst;
Ok(match p.peek(parsing)?.kind {
Kind::Hash => match p.then(parsing)?.next(parsing)?.kind {
Kind::Number(0, _) => Dst::Special(DstSpecial::Zero),
Kind::Number(1, _) => Dst::Special(DstSpecial::One),
Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?),
Kind::Reg(_) => Dst::Direct(p.parse()?),
_ => Dst::Indexed(p.expr()?.into(), {
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
p.assert(Kind::CloseParen, parsing)?;
reg
}),
})
}
}
impl<'t> Parsable<'t> for JumpDst<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Jump;
let mut neg = false;
let out = loop {
let token = p.peek(parsing)?;
match token.kind {
Kind::Minus => {
neg = !neg;
}
Kind::Plus => {}
Kind::Identifier => break Self::Label(token.lexeme),
Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }),
ty => Err(p.error(Unexpected(ty), parsing))?,
}
p.take();
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Width {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let out = match p.peek(Parsing::Width)?.kind() {
Kind::Byte => Width::Byte,
Kind::Word => Width::Word,
_ => return Ok(Width::Word),
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Reg {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let out = match p.peek(Parsing::Reg)?.kind {
Kind::Reg(r) => r,
ty => Err(p.error(Unexpected(ty), Parsing::Reg))?,
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Expr<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
p.expr()
}
}
impl<'t, T: Parsable<'t>> Parsable<'t> for Box<T> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Box::new(p.parse()?))
}
}
impl<'t, T: Parsable<'t>> Parsable<'t> for Vec<T> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Vec;
p.assert(Kind::OpenBrace, parsing)?;
let mut out = vec![];
while Kind::CloseBrace != p.peek(parsing)?.kind {
out.push(p.parse()?)
}
p.assert(Kind::CloseBrace, parsing)?;
Ok(out)
}
}
/// Context-sensitive parsing rules
impl<'t> Parser<'t> {
pub fn string(&mut self) -> PResult<&'t str> {
let token = *self.peek(Parsing::Directive)?;
match token.kind {
Kind::String => {
self.take();
Ok(&token.lexeme[1..token.lexeme.len() - 1])
}
ty => Err(self.error(Unexpected(ty), Parsing::Directive)),
}
}
pub fn label(&mut self) -> PResult<&'t str> {
let p = Parsing::Label;
let token = self.next(p)?;
assert_eq!(Kind::Identifier, token.kind);
self.assert(Kind::Colon, p)?;
Ok(token.lexeme)
}
pub fn reg(&mut self) -> PResult<Reg> {
match self.peek(Parsing::Reg)?.kind {
Kind::Reg(r) => {
self.take();
Ok(r)
}
ty => Err(self.error(Unexpected(ty), Parsing::Reg)),
}
}
}
pub mod error {
use super::Kind;
use crate::span::Span;
use std::{fmt::Display, num::TryFromIntError};
pub type PResult<T> = Result<T, Error>;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Error {
pub parsing: Parsing,
pub kind: ErrorKind,
pub loc: Span<usize>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
LexError,
/// Returned when [Parsing::Expr] fails without consuming
NotExpr,
DivZero,
NonNumeric(Kind),
BadIntForDst(u16),
TryFromIntError(TryFromIntError),
Unexpected(Kind),
BufEmpty,
Todo,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Parsing {
File,
Stmt,
Label,
Directive,
Instruction,
NoEm,
OneEm,
Reti,
Br,
OneArg,
TwoArg,
Jump,
Width,
Src,
Dst,
Reg,
Expr,
Vec,
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing)
}
}
impl Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorKind::LexError => write!(f, "lexical error"),
ErrorKind::TryFromIntError(e) => write!(f, "{e}"),
ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"),
ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"),
ErrorKind::DivZero => write!(f, "Division by zero"),
ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"),
ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"),
ErrorKind::BufEmpty => write!(f, "Peek buffer empty"),
ErrorKind::Todo => write!(f, "Not yet implemented"),
}
}
}
impl Display for Parsing {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Parsing::File => "a file".fmt(f),
Parsing::Stmt => "a line".fmt(f),
Parsing::Label => "a label".fmt(f),
Parsing::Directive => "a directive".fmt(f),
Parsing::Instruction => "an instruction".fmt(f),
Parsing::NoEm => "a no-operand emulated instruction".fmt(f),
Parsing::OneEm => "a one-operand emulated instruction".fmt(f),
Parsing::Reti => "a `reti` instruction".fmt(f),
Parsing::Br => "a `br` instruction".fmt(f),
Parsing::OneArg => "a one-operand instruction".fmt(f),
Parsing::TwoArg => "a two-operand instruction".fmt(f),
Parsing::Jump => "a jump instruction".fmt(f),
Parsing::Width => "an instruction width".fmt(f),
Parsing::Src => "a source".fmt(f),
Parsing::Dst => "a destination".fmt(f),
Parsing::Reg => "a register".fmt(f),
Parsing::Expr => "a constant expression".fmt(f),
Parsing::Vec => "a list".fmt(f),
}
}
}
impl std::error::Error for Error {}
}
#[cfg(test)]
mod tests;