- Each major module (lexer, parser, assembler) has its own error type - These error types are somewhat interconnected, but their dependency relationships are one-way and well defined - The AST is no longer responsible for assembling itself - The Assembler (assembler::Assembler) will now visit every AST node and accumulate words - Words are assumed to be little-endian. - There are now a set of assembler directives that affect the generated output: - .word <Number>: inserts a single word in the output - .words [<Number>,*]: inserts multiple words in the output - .byte <Number>: Alias for .word - .bytes [<Number>,*]: Alias for .words - .string "String": inserts a null-terminated UTF-8 encoded string - .strings ["String",*]: "" multiple strings - Data is always word-aligned at the moment. - There are now assembler directives that affect the AST during parsing: - .include "path/to/file": Parses the contents of a file directly into the AST - Included files have their own defines, but *share* labels. This is because .defines are a tokenizer construct, and including a file creates a new buffer and tokenizer. - Circular includes are NOT checked for at the moment. It is very easy to exhaust the stack. - General cleanup of several functions, comments, TODOs, etc. - main.rs was moved to make room for upcoming improvements to the UI TODO: - REPL mode is only partially compatible with .define directive - Branching to a label will branch to the data AT the label, not the label itself. I doubt this is correct behavior. - In case br <label> is meant to use the absolute address, I've created a .org directive (currently unimplemented) for specifying the load address of the program.
175 lines
5.9 KiB
Rust
175 lines
5.9 KiB
Rust
// © 2023 John Breaux
|
|
//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band
|
|
//! ".define" rules
|
|
use super::*;
|
|
use std::collections::{HashMap, VecDeque};
|
|
|
|
// TODO: Clean this spaghetti mess up
|
|
|
|
/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define"
|
|
/// rules
|
|
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
|
#[derive(PartialEq, Eq)]
|
|
pub struct Preprocessed<'t, T>
|
|
where T: TokenStream<'t>
|
|
{
|
|
sub_table: HashMap<Token<'t>, Vec<Token<'t>>>,
|
|
sub_types: Vec<Type>,
|
|
queue: VecDeque<Token<'t>>,
|
|
inner: &'t mut T,
|
|
}
|
|
|
|
impl<'t, T> Iterator for Preprocessed<'t, T>
|
|
where T: TokenStream<'t>
|
|
{
|
|
type Item = Token<'t>;
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
match self.queue.pop_front() {
|
|
Some(token) => Some(token),
|
|
None => {
|
|
let next = self.inner.next()?;
|
|
if let Some(subs) = self.sub_table.get(&next) {
|
|
self.queue.extend(subs);
|
|
return self.next();
|
|
}
|
|
Some(next)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> {
|
|
/// Creates a new [Preprocessed] [TokenStream]
|
|
pub fn new(inner: &'t mut T) -> Self {
|
|
Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner }
|
|
}
|
|
|
|
/// Gets a mutable reference to the inner [TokenStream]
|
|
pub fn inner_mut(&mut self) -> &mut T { self.inner }
|
|
|
|
/// Preserve the next token in the queue
|
|
fn enqueue(&mut self, token: Token<'t>) -> Token<'t> {
|
|
self.queue.push_back(token);
|
|
token
|
|
}
|
|
|
|
/// Process .define directives in the preprocessor
|
|
fn define(&mut self, token: Token<'t>) -> Result<(), LexError> {
|
|
if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) {
|
|
return Ok(());
|
|
}
|
|
// Tokenize the subdocument
|
|
self.allow(Type::Directive);
|
|
self.allow(Type::Space);
|
|
|
|
let Some(k) = self.inner.next() else { return Ok(()) };
|
|
if !self.sub_types.contains(&k.variant()) {
|
|
self.sub_types.push(k.variant());
|
|
};
|
|
|
|
self.allow(Type::Space);
|
|
|
|
let mut replacement = vec![];
|
|
loop {
|
|
match self.inner.peek().variant() {
|
|
Type::Endl | Type::EndOfFile => break,
|
|
Type::Comment | Type::Space => {
|
|
// ignore comments
|
|
self.inner.next();
|
|
}
|
|
_ => {
|
|
let next = self.inner.next().unwrap();
|
|
replacement.push(self.enqueue(next));
|
|
}
|
|
}
|
|
}
|
|
self.sub_table.insert(k, replacement);
|
|
Ok(())
|
|
}
|
|
|
|
/// Does the preprocessing step
|
|
fn preprocess(&mut self, token: Token<'t>) {
|
|
if let Some(subs) = self.sub_table.get(&token) {
|
|
self.queue.extend(subs);
|
|
self.inner.next();
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'t, T> TokenStream<'t> for Preprocessed<'t, T>
|
|
where T: TokenStream<'t>
|
|
{
|
|
fn context(&self) -> Context { self.inner.context() }
|
|
|
|
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
|
match self.queue.front() {
|
|
Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()),
|
|
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
|
|
None => {
|
|
// Only resolve defines when expecting, otherwise you'll run into issues.
|
|
if let Ok(next) = self.inner.expect(expected) {
|
|
self.define(next)?;
|
|
return Ok(next);
|
|
}
|
|
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
|
|
if let Some(subs) = self.sub_table.get(&next) {
|
|
self.inner.allow_any_of(&self.sub_types);
|
|
self.queue.extend(subs);
|
|
}
|
|
return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) };
|
|
}
|
|
Err(LexError::expected([expected], self.inner.peek()).context(self.context()))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn peek(&mut self) -> Self::Item {
|
|
match self.queue.front() {
|
|
Some(token) => *token,
|
|
None => {
|
|
// Only allow substitution when the next token is unexpected
|
|
let old = self.inner.peek();
|
|
self.preprocess(old);
|
|
match self.queue.front() {
|
|
Some(&new) => new,
|
|
None => old,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
|
match self.queue.front() {
|
|
Some(&token) if token.is_variant(expected) => Ok(token),
|
|
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
|
|
None => {
|
|
if let Ok(next) = self.inner.peek_expect(expected) {
|
|
return Ok(next);
|
|
}
|
|
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
|
|
self.preprocess(next);
|
|
return if self.queue.is_empty() {
|
|
self.inner.peek_expect(expected)
|
|
} else {
|
|
self.peek_expect(expected)
|
|
};
|
|
}
|
|
Err(LexError::expected([expected], self.inner.peek()))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'t, T> std::fmt::Debug for Preprocessed<'t, T>
|
|
where T: TokenStream<'t>
|
|
{
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("Preprocessed")
|
|
.field("sub_table", &self.sub_table)
|
|
.field("sub_types", &self.sub_types)
|
|
.field("queue", &self.queue)
|
|
.field("context", &self.context())
|
|
.finish_non_exhaustive()
|
|
}
|
|
}
|