msp430-repl/src/lexer/preprocessed.rs

// © 2023 John Breaux
//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band
//! ".define" rules
use super::*;
use std::collections::{HashMap, VecDeque};

// TODO: Clean this spaghetti mess up

/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define"
/// rules
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(PartialEq, Eq)]
pub struct Preprocessed<'t, T>
where T: TokenStream<'t>
{
    sub_table: HashMap<Token<'t>, Vec<Token<'t>>>,
    sub_types: Vec<Type>,
    queue: VecDeque<Token<'t>>,
    inner: &'t mut T,
}

impl<'t, T> Iterator for Preprocessed<'t, T>
where T: TokenStream<'t>
{
    type Item = Token<'t>;
    fn next(&mut self) -> Option<Self::Item> {
        match self.queue.pop_front() {
            Some(token) => Some(token),
            None => {
                let next = self.inner.next()?;
                if let Some(subs) = self.sub_table.get(&next) {
                    self.queue.extend(subs);
                    return self.next();
                }
                Some(next)
            }
        }
    }
}

impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> {
    /// Creates a new [Preprocessed] [TokenStream]
    pub fn new(inner: &'t mut T) -> Self {
        Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner }
    }

    /// Gets a mutable reference to the inner [TokenStream]
    pub fn inner_mut(&mut self) -> &mut T { self.inner }

    /// Preserve the next token in the queue
    fn enqueue(&mut self, token: Token<'t>) -> Token<'t> {
        self.queue.push_back(token);
        token
    }

    /// Process .define directives in the preprocessor
    fn define(&mut self, token: Token<'t>) -> Result<(), LexError> {
        if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) {
            return Ok(());
        }
        // Tokenize the subdocument
        self.allow(Type::Directive);
        self.allow(Type::Space);

        let Some(k) = self.inner.next() else { return Ok(()) };
        if !self.sub_types.contains(&k.variant()) {
            self.sub_types.push(k.variant());
        };

        self.allow(Type::Space);

        let mut replacement = vec![];
        loop {
            match self.inner.peek().variant() {
                Type::Endl | Type::EndOfFile => break,
                Type::Comment | Type::Space => {
                    // ignore comments
                    self.inner.next();
                }
                _ => {
                    let next = self.inner.next().unwrap();
                    replacement.push(self.enqueue(next));
                }
            }
        }
        self.sub_table.insert(k, replacement);
        Ok(())
    }

    /// Does the preprocessing step
    fn preprocess(&mut self, token: Token<'t>) {
        if let Some(subs) = self.sub_table.get(&token) {
            self.queue.extend(subs);
            self.inner.next();
        }
    }
}

impl<'t, T> TokenStream<'t> for Preprocessed<'t, T>
where T: TokenStream<'t>
{
    fn context(&self) -> Context { self.inner.context() }

    fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
        match self.queue.front() {
            Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()),
            Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
            None => {
                // Only resolve defines when expecting, otherwise you'll run into issues.
                if let Ok(next) = self.inner.expect(expected) {
                    self.define(next)?;
                    return Ok(next);
                }
                if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
                    if let Some(subs) = self.sub_table.get(&next) {
                        self.inner.allow_any_of(&self.sub_types);
                        self.queue.extend(subs);
                    }
                    return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) };
                }
                Err(LexError::expected([expected], self.inner.peek()).context(self.context()))
            }
        }
    }

    fn peek(&mut self) -> Self::Item {
        match self.queue.front() {
            Some(token) => *token,
            None => {
                // Only allow substitution when the next token is unexpected
                let old = self.inner.peek();
                self.preprocess(old);
                match self.queue.front() {
                    Some(&new) => new,
                    None => old,
                }
            }
        }
    }

    fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
        match self.queue.front() {
            Some(&token) if token.is_variant(expected) => Ok(token),
            Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
            None => {
                if let Ok(next) = self.inner.peek_expect(expected) {
                    return Ok(next);
                }
                if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
                    self.preprocess(next);
                    return if self.queue.is_empty() {
                        self.inner.peek_expect(expected)
                    } else {
                        self.peek_expect(expected)
                    };
                }
                Err(LexError::expected([expected], self.inner.peek()))
            }
        }
    }
}

impl<'t, T> std::fmt::Debug for Preprocessed<'t, T>
where T: TokenStream<'t>
{
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Preprocessed")
            .field("sub_table", &self.sub_table)
            .field("sub_types", &self.sub_types)
            .field("queue", &self.queue)
            .field("context", &self.context())
            .finish_non_exhaustive()
    }
}