// © 2023 John Breaux //! Iterates over &[str], producing [Token]s // Things we need: // ✔ 1. Lexer/Tokenizer // ✔ 1. Instructions // ✔ 1. Instruction mnemonics /ad.../ // ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/ // ✔ 2. Src operands // ✔ 1. Registers /(r1[0-5]|r[0-9])/ // ✔ 2. Immediate Values /#/ // ✔ 3. Absolute addresses /&/ // ✔ 4. Numbers /[0-9A-Fa-f]+ // ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/ // ✔ 4. Label definitions /(^.*):/ // ✔ 5. Comments (may be useful for debugging) pub mod context; pub mod token; use crate::Error; use context::Context; use token::{Token, Type}; /// Backtracking through bifurcated timelines pub trait TokenStream<'text>: Iterator> { /// Gets this stream's [Context] fn context(&self) -> Context; /// Creates an iterator that skips [Type::Space] in the input fn ignore_spaces(&'text mut self) -> IgnoreSpaces<'text, Self> where Self: Sized { IgnoreSpaces::new(self) } /// Returns the next [Token] without advancing fn peek(&mut self) -> Self::Item; /// Returns the next [Token] if it is of the expected [Type], without advancing fn peek_expect(&mut self, expected: Type) -> Result; /// Consumes and returns a [Token] if it is the expected [Type] /// /// Otherwise, does not consume a [Token] fn expect(&mut self, expected: Type) -> Result; /// Ignores a [Token] of the expected [Type], propegating errors. fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) } /// Ignores a [Token] of the expected [Type], discarding errors. fn allow(&mut self, expected: Type) { let _ = self.expect(expected); } /// Runs a functor on each fn any_of(&mut self, f: fn(&mut Self, Type) -> Result, expected: T) -> Result where T: AsRef<[Type]> { for &expected in expected.as_ref() { match f(self, expected).map_err(|e| e.bare()) { Ok(t) => return Ok(t), Err(Error::UnexpectedToken { .. }) => continue, Err(e) => return Err(e.context(self.context())), } } Err(Error::expected(expected, self.peek()).context(self.context())) } /// Returns the next [Token] if it is of the expected [Types](Type), without advancing fn peek_expect_any_of(&mut self, expected: T) -> Result where T: AsRef<[Type]> { self.any_of(Self::peek_expect, expected) } /// Consumes and returns a [Token] if it matches any of the expected [Types](Type) /// /// Otherwise, does not consume a [Token] fn expect_any_of(&mut self, expected: T) -> Result where T: AsRef<[Type]> { self.any_of(Self::expect, expected) } /// Ignores a [Token] of any expected [Type], discarding errors. fn allow_any_of(&mut self, expected: T) where T: AsRef<[Type]> { let _ = self.expect_any_of(expected); } /// Ignores a [Token] of any expected [Type], propegating errors. fn require_any_of(&mut self, expected: T) -> Result<(), Error> where T: AsRef<[Type]> { self.any_of(Self::require, expected) } } /// Iterates over &[str], producing [Token]s #[must_use = "iterators are lazy and do nothing unless consumed"] #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Tokenizer<'t> { text: &'t str, idx: usize, context: Context, } impl<'t> Tokenizer<'t> { /// Produces a new [Tokenizer] from a [str]ing slice pub fn new(text: &'t T) -> Self where T: AsRef + ?Sized { Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() } } fn count(&mut self, token: &Token) { // update the context self.context.count(token); // advance the index self.idx += token.len(); } } impl<'text> Iterator for Tokenizer<'text> { type Item = Token<'text>; fn next(&mut self) -> Option { if self.idx >= self.text.len() { return None; } let token = Token::from(&self.text[self.idx..]); // Process [Type::Directive]s self.count(&token); Some(token) } } impl<'text> TokenStream<'text> for Tokenizer<'text> { fn context(&self) -> Context { self.context } // Tokenizer has access to the source buffer, and can implement expect and peek without cloning // itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and // registers are valid identifiers. fn expect(&mut self, expected: Type) -> Result { let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?; self.count(&token); Ok(token) } fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) } fn peek_expect(&mut self, expected: Type) -> Result { Token::expect(&self.text[self.idx..], expected) } } #[must_use = "iterators are lazy and do nothing unless consumed"] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct IgnoreSpaces<'t, T> where T: TokenStream<'t> { inner: &'t mut T, } impl<'t, T> IgnoreSpaces<'t, T> where T: TokenStream<'t> { pub fn new(t: &'t mut T) -> Self { IgnoreSpaces { inner: t } } /// Gets a mutable reference to the inner [Iterator] pub fn inner_mut(&mut self) -> &mut T { self.inner } } impl<'t, T> Iterator for IgnoreSpaces<'t, T> where T: TokenStream<'t> { type Item = Token<'t>; fn next(&mut self) -> Option { let next = self.inner.next()?; // Space tokens are greedy, so the next token shouldn't be a Space match next.variant() { Type::Space => self.next(), _ => Some(next), } } } impl<'t, T> TokenStream<'t> for IgnoreSpaces<'t, T> where T: TokenStream<'t> { fn context(&self) -> Context { self.inner.context() } fn expect(&mut self, expected: Type) -> Result { self.inner.allow_any_of([Type::Space, Type::Endl]); self.inner.expect(expected) } fn peek(&mut self) -> Self::Item { self.inner.allow_any_of([Type::Space, Type::Endl]); self.inner.peek() } fn peek_expect(&mut self, expected: Type) -> Result { self.inner.allow_any_of([Type::Space, Type::Endl]); self.inner.peek_expect(expected) } }