194 lines
6.5 KiB
Rust
194 lines
6.5 KiB
Rust
// © 2023 John Breaux
|
|
//! Iterates over &[str], producing [Token]s
|
|
|
|
// Things we need:
|
|
// ✔ 1. Lexer/Tokenizer
|
|
// ✔ 1. Instructions
|
|
// ✔ 1. Instruction mnemonics /ad.../
|
|
// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/
|
|
// ✔ 2. Src operands
|
|
// ✔ 1. Registers /(r1[0-5]|r[0-9])/
|
|
// ✔ 2. Immediate Values /#/
|
|
// ✔ 3. Absolute addresses /&/
|
|
// ✔ 4. Numbers /[0-9A-Fa-f]+
|
|
// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/
|
|
// ✔ 4. Label definitions /(^.*):/
|
|
// ✔ 5. Comments (may be useful for debugging)
|
|
|
|
pub mod context;
|
|
pub mod token;
|
|
|
|
use crate::Error;
|
|
use context::Context;
|
|
use token::{Token, Type};
|
|
|
|
/// Backtracking through bifurcated timelines
|
|
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> {
|
|
/// Gets this stream's [Context]
|
|
fn context(&self) -> Context;
|
|
|
|
/// Creates an iterator that skips [Type::Space] in the input
|
|
fn ignore_spaces(&'text mut self) -> IgnoreSpaces<'text, Self>
|
|
where Self: Sized {
|
|
IgnoreSpaces::new(self)
|
|
}
|
|
|
|
/// Returns the next [Token] without advancing
|
|
fn peek(&mut self) -> Self::Item;
|
|
|
|
/// Returns the next [Token] if it is of the expected [Type], without advancing
|
|
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
|
|
|
|
/// Consumes and returns a [Token] if it is the expected [Type]
|
|
///
|
|
/// Otherwise, does not consume a [Token]
|
|
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
|
|
|
|
/// Ignores a [Token] of the expected [Type], propegating errors.
|
|
fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) }
|
|
|
|
/// Ignores a [Token] of the expected [Type], discarding errors.
|
|
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
|
|
|
|
/// Runs a functor on each
|
|
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, Error>, expected: T) -> Result<U, Error>
|
|
where T: AsRef<[Type]> {
|
|
for &expected in expected.as_ref() {
|
|
match f(self, expected).map_err(|e| e.bare()) {
|
|
Ok(t) => return Ok(t),
|
|
Err(Error::UnexpectedToken { .. }) => continue,
|
|
Err(e) => return Err(e.context(self.context())),
|
|
}
|
|
}
|
|
Err(Error::expected(expected, self.peek()).context(self.context()))
|
|
}
|
|
|
|
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
|
|
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
|
|
where T: AsRef<[Type]> {
|
|
self.any_of(Self::peek_expect, expected)
|
|
}
|
|
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
|
|
///
|
|
/// Otherwise, does not consume a [Token]
|
|
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
|
|
where T: AsRef<[Type]> {
|
|
self.any_of(Self::expect, expected)
|
|
}
|
|
/// Ignores a [Token] of any expected [Type], discarding errors.
|
|
fn allow_any_of<T>(&mut self, expected: T)
|
|
where T: AsRef<[Type]> {
|
|
let _ = self.expect_any_of(expected);
|
|
}
|
|
/// Ignores a [Token] of any expected [Type], propegating errors.
|
|
fn require_any_of<T>(&mut self, expected: T) -> Result<(), Error>
|
|
where T: AsRef<[Type]> {
|
|
self.any_of(Self::require, expected)
|
|
}
|
|
}
|
|
|
|
/// Iterates over &[str], producing [Token]s
|
|
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
|
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct Tokenizer<'t> {
|
|
text: &'t str,
|
|
idx: usize,
|
|
context: Context,
|
|
}
|
|
|
|
impl<'t> Tokenizer<'t> {
|
|
/// Produces a new [Tokenizer] from a [str]ing slice
|
|
pub fn new<T>(text: &'t T) -> Self
|
|
where T: AsRef<str> + ?Sized {
|
|
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
|
|
}
|
|
|
|
fn count(&mut self, token: &Token) {
|
|
// update the context
|
|
self.context.count(token);
|
|
// advance the index
|
|
self.idx += token.len();
|
|
}
|
|
}
|
|
|
|
impl<'text> Iterator for Tokenizer<'text> {
|
|
type Item = Token<'text>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if self.idx >= self.text.len() {
|
|
return None;
|
|
}
|
|
let token = Token::from(&self.text[self.idx..]);
|
|
// Process [Type::Directive]s
|
|
self.count(&token);
|
|
Some(token)
|
|
}
|
|
}
|
|
|
|
impl<'text> TokenStream<'text> for Tokenizer<'text> {
|
|
fn context(&self) -> Context { self.context }
|
|
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
|
|
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
|
|
// registers are valid identifiers.
|
|
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
|
|
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
|
|
self.count(&token);
|
|
Ok(token)
|
|
}
|
|
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
|
|
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
|
|
Token::expect(&self.text[self.idx..], expected)
|
|
}
|
|
}
|
|
|
|
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct IgnoreSpaces<'t, T>
|
|
where T: TokenStream<'t>
|
|
{
|
|
inner: &'t mut T,
|
|
}
|
|
|
|
impl<'t, T> IgnoreSpaces<'t, T>
|
|
where T: TokenStream<'t>
|
|
{
|
|
pub fn new(t: &'t mut T) -> Self { IgnoreSpaces { inner: t } }
|
|
/// Gets a mutable reference to the inner [Iterator]
|
|
pub fn inner_mut(&mut self) -> &mut T { self.inner }
|
|
}
|
|
|
|
impl<'t, T> Iterator for IgnoreSpaces<'t, T>
|
|
where T: TokenStream<'t>
|
|
{
|
|
type Item = Token<'t>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
let next = self.inner.next()?;
|
|
// Space tokens are greedy, so the next token shouldn't be a Space
|
|
match next.variant() {
|
|
Type::Space => self.next(),
|
|
_ => Some(next),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'t, T> TokenStream<'t> for IgnoreSpaces<'t, T>
|
|
where T: TokenStream<'t>
|
|
{
|
|
fn context(&self) -> Context { self.inner.context() }
|
|
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
|
|
self.inner.allow_any_of([Type::Space, Type::Endl]);
|
|
self.inner.expect(expected)
|
|
}
|
|
|
|
fn peek(&mut self) -> Self::Item {
|
|
self.inner.allow_any_of([Type::Space, Type::Endl]);
|
|
self.inner.peek()
|
|
}
|
|
|
|
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
|
|
self.inner.allow_any_of([Type::Space, Type::Endl]);
|
|
self.inner.peek_expect(expected)
|
|
}
|
|
}
|