msp430-repl/src/tokenizer.rs

194 lines
6.5 KiB
Rust

// © 2023 John Breaux
//! Iterates over &[str], producing [Token]s
// Things we need:
// ✔ 1. Lexer/Tokenizer
// ✔ 1. Instructions
// ✔ 1. Instruction mnemonics /ad.../
// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/
// ✔ 2. Src operands
// ✔ 1. Registers /(r1[0-5]|r[0-9])/
// ✔ 2. Immediate Values /#/
// ✔ 3. Absolute addresses /&/
// ✔ 4. Numbers /[0-9A-Fa-f]+
// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/
// ✔ 4. Label definitions /(^.*):/
// ✔ 5. Comments (may be useful for debugging)
pub mod context;
pub mod token;
use crate::Error;
use context::Context;
use token::{Token, Type};
/// Backtracking through bifurcated timelines
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> {
/// Gets this stream's [Context]
fn context(&self) -> Context;
/// Creates an iterator that skips [Type::Space] in the input
fn ignore_spaces(&'text mut self) -> IgnoreSpaces<'text, Self>
where Self: Sized {
IgnoreSpaces::new(self)
}
/// Returns the next [Token] without advancing
fn peek(&mut self) -> Self::Item;
/// Returns the next [Token] if it is of the expected [Type], without advancing
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
/// Consumes and returns a [Token] if it is the expected [Type]
///
/// Otherwise, does not consume a [Token]
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
/// Ignores a [Token] of the expected [Type], propegating errors.
fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) }
/// Ignores a [Token] of the expected [Type], discarding errors.
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
/// Runs a functor on each
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, Error>, expected: T) -> Result<U, Error>
where T: AsRef<[Type]> {
for &expected in expected.as_ref() {
match f(self, expected).map_err(|e| e.bare()) {
Ok(t) => return Ok(t),
Err(Error::UnexpectedToken { .. }) => continue,
Err(e) => return Err(e.context(self.context())),
}
}
Err(Error::expected(expected, self.peek()).context(self.context()))
}
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
where T: AsRef<[Type]> {
self.any_of(Self::peek_expect, expected)
}
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
///
/// Otherwise, does not consume a [Token]
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
where T: AsRef<[Type]> {
self.any_of(Self::expect, expected)
}
/// Ignores a [Token] of any expected [Type], discarding errors.
fn allow_any_of<T>(&mut self, expected: T)
where T: AsRef<[Type]> {
let _ = self.expect_any_of(expected);
}
/// Ignores a [Token] of any expected [Type], propegating errors.
fn require_any_of<T>(&mut self, expected: T) -> Result<(), Error>
where T: AsRef<[Type]> {
self.any_of(Self::require, expected)
}
}
/// Iterates over &[str], producing [Token]s
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Tokenizer<'t> {
text: &'t str,
idx: usize,
context: Context,
}
impl<'t> Tokenizer<'t> {
/// Produces a new [Tokenizer] from a [str]ing slice
pub fn new<T>(text: &'t T) -> Self
where T: AsRef<str> + ?Sized {
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
}
fn count(&mut self, token: &Token) {
// update the context
self.context.count(token);
// advance the index
self.idx += token.len();
}
}
impl<'text> Iterator for Tokenizer<'text> {
type Item = Token<'text>;
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.text.len() {
return None;
}
let token = Token::from(&self.text[self.idx..]);
// Process [Type::Directive]s
self.count(&token);
Some(token)
}
}
impl<'text> TokenStream<'text> for Tokenizer<'text> {
fn context(&self) -> Context { self.context }
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
// registers are valid identifiers.
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
self.count(&token);
Ok(token)
}
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
Token::expect(&self.text[self.idx..], expected)
}
}
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
inner: &'t mut T,
}
impl<'t, T> IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
pub fn new(t: &'t mut T) -> Self { IgnoreSpaces { inner: t } }
/// Gets a mutable reference to the inner [Iterator]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
}
impl<'t, T> Iterator for IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.inner.next()?;
// Space tokens are greedy, so the next token shouldn't be a Space
match next.variant() {
Type::Space => self.next(),
_ => Some(next),
}
}
}
impl<'t, T> TokenStream<'t> for IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.expect(expected)
}
fn peek(&mut self) -> Self::Item {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.peek()
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.peek_expect(expected)
}
}