diff --git a/src/error.rs b/src/error.rs index 337e228..80b94c3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -4,7 +4,7 @@ use std::fmt::Display; use super::{ - tokenizer::token::{OwnedToken, Types}, + lexer::token::{OwnedToken, Types}, *, }; @@ -89,6 +89,18 @@ impl Error { _ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() }, } } + + pub fn mask_expectation(mut self, expected: Type) -> Self { + match self { + Error::UnexpectedToken { got, .. } => self = Error::UnexpectedToken { expected, got }, + Error::AllExpectationsFailed { got, .. } => self = Error::UnexpectedToken { expected, got }, + Error::Contextual(context, err) => { + self = Error::Contextual(context, Box::new(err.mask_expectation(expected))) + } + _ => (), + } + self + } } impl Display for Error { diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..792bea6 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,82 @@ +// © 2023 John Breaux +//! Iterates over &[str], producing [Token]s + +// Things we need: +// ✔ 1. Lexer/Tokenizer +// ✔ 1. Instructions +// ✔ 1. Instruction mnemonics /ad.../ +// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/ +// ✔ 2. Operands +// ✔ 1. Registers /(r1[0-5]|r[0-9])/ +// ✔ 2. Immediate Values /#/ +// ✔ 3. Absolute addresses /&/ +// ✔ 4. Numbers /[0-9A-Fa-f]+ +// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/ +// ✔ 3. Label definitions /(^.*):/ +// ✔ 4. Comments (may be useful for debugging) + +pub mod context; +pub mod ignore; +pub mod preprocessed; +pub mod token; +pub mod token_stream; + +use crate::Error; +use context::Context; +use token::{Token, Type}; +use token_stream::TokenStream; + +/// Iterates over &[str], producing [Token]s +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Tokenizer<'t> { + text: &'t str, + idx: usize, + context: Context, +} + +impl<'t> Tokenizer<'t> { + /// Produces a new [Tokenizer] from a [str]ing slice + pub fn new(text: &'t T) -> Self + where T: AsRef + ?Sized { + Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() } + } + + fn count(&mut self, token: &Token) { + // update the context + self.context.count(token); + // advance the index + self.idx += token.len(); + } +} + +impl<'text> Iterator for Tokenizer<'text> { + type Item = Token<'text>; + + fn next(&mut self) -> Option { + if self.idx >= self.text.len() { + return None; + } + let token = Token::from(&self.text[self.idx..]); + // Process [Type::Directive]s + // Count the token + self.count(&token); + Some(token) + } +} + +impl<'text> TokenStream<'text> for Tokenizer<'text> { + fn context(&self) -> Context { self.context } + // Tokenizer has access to the source buffer, and can implement expect and peek without cloning + // itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and + // registers are valid identifiers. + fn expect(&mut self, expected: Type) -> Result { + let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?; + self.count(&token); + Ok(token) + } + fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) } + fn peek_expect(&mut self, expected: Type) -> Result { + Token::expect(&self.text[self.idx..], expected) + } +} diff --git a/src/tokenizer/context.rs b/src/lexer/context.rs similarity index 85% rename from src/tokenizer/context.rs rename to src/lexer/context.rs index 9576e98..61790b0 100644 --- a/src/tokenizer/context.rs +++ b/src/lexer/context.rs @@ -4,8 +4,8 @@ use super::*; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Context { line: usize, - tokens: usize, position: usize, + tokens: usize, } impl Context { @@ -18,7 +18,7 @@ impl Context { Type::EndOfFile => return, Type::Endl => { self.line += 1; - self.position = 0; + self.position = 1; } _ => self.position += t.len(), } @@ -26,11 +26,9 @@ impl Context { } } impl Default for Context { - fn default() -> Self { Self { line: 1, tokens: 0, position: 0 } } + fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } } } impl std::fmt::Display for Context { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}:{}", self.line, self.position) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) } } diff --git a/src/lexer/ignore.rs b/src/lexer/ignore.rs new file mode 100644 index 0000000..06586b0 --- /dev/null +++ b/src/lexer/ignore.rs @@ -0,0 +1,53 @@ +use super::*; +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Ignore<'t, T> +where T: TokenStream<'t> +{ + ignore: Type, + inner: &'t mut T, +} + +impl<'t, T> Ignore<'t, T> +where T: TokenStream<'t> +{ + /// Creates a new + pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } } + /// Gets a mutable reference to the inner [Iterator] + pub fn inner_mut(&mut self) -> &mut T { self.inner } +} + +impl<'t, T> Iterator for Ignore<'t, T> +where T: TokenStream<'t> +{ + type Item = Token<'t>; + + fn next(&mut self) -> Option { + let next = self.inner.next()?; + // Space tokens are greedy, so the next token shouldn't be a Space + match next.variant() { + Type::Space => self.next(), + _ => Some(next), + } + } +} + +impl<'t, T> TokenStream<'t> for Ignore<'t, T> +where T: TokenStream<'t> +{ + fn context(&self) -> Context { self.inner.context() } + fn expect(&mut self, expected: Type) -> Result { + self.inner.allow(self.ignore); + self.inner.expect(expected) + } + + fn peek(&mut self) -> Self::Item { + self.inner.allow(self.ignore); + self.inner.peek() + } + + fn peek_expect(&mut self, expected: Type) -> Result { + self.inner.allow(self.ignore); + self.inner.peek_expect(expected) + } +} diff --git a/src/lexer/preprocessed.rs b/src/lexer/preprocessed.rs new file mode 100644 index 0000000..d97ec96 --- /dev/null +++ b/src/lexer/preprocessed.rs @@ -0,0 +1,166 @@ +// © 2023 John Breaux +//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band ".define" +//! rules +use super::*; +use std::collections::{HashMap, VecDeque}; + +// TODO: Clean this spaghetti mess up + +/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define" +/// rules +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[derive(PartialEq, Eq)] +pub struct Preprocessed<'t, T> +where T: TokenStream<'t> +{ + sub_table: HashMap, Vec>>, + sub_types: Vec, + queue: VecDeque>, + inner: &'t mut T, +} + +impl<'t, T> Iterator for Preprocessed<'t, T> +where T: TokenStream<'t> +{ + type Item = Token<'t>; + fn next(&mut self) -> Option { + match self.queue.pop_front() { + Some(token) => Some(token), + None => { + let next = self.inner.next()?; + if let Some(subs) = self.sub_table.get(&next) { + self.queue.extend(subs); + return self.next(); + } + Some(next) + } + } + } +} + +impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> { + /// Creates a new [Preprocessed] [TokenStream] + pub fn new(inner: &'t mut T) -> Self { + Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner } + } + + /// Gets a mutable reference to the inner [TokenStream] + pub fn inner_mut(&mut self) -> &mut T { self.inner } + + fn define(&mut self, token: Token<'t>) -> Result<(), Error> { + if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) { + return Ok(()); + } + // Tokenize the subdocument + self.allow(Type::Directive); + + self.require(Type::Space).map_err(|e| e.context(self.context()))?; + + let Some(k) = self.inner.next() else { return Ok(()) }; + if !self.sub_types.contains(&k.variant()) { + self.sub_types.push(k.variant()); + }; + + self.require(Type::Space).map_err(|e| e.context(self.context()))?; + + let mut replacement = vec![]; + loop { + match self.inner.peek().variant() { + Type::Endl | Type::EndOfFile => break, + Type::Comment | Type::Space => { + // ignore comments + self.inner.next(); + } + _ => replacement.push(self.inner.next().unwrap()), + } + } + self.sub_table.insert(k, replacement); + Ok(()) + } + + /// Does the preprocessing step + fn preprocess(&mut self, token: Token<'t>) { + if let Some(subs) = self.sub_table.get(&token) { + self.queue.extend(subs); + self.inner.next(); + } + } +} + +impl<'t, T> TokenStream<'t> for Preprocessed<'t, T> +where T: TokenStream<'t> +{ + fn context(&self) -> Context { self.inner.context() } + + fn expect(&mut self, expected: Type) -> Result { + match self.queue.front() { + Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()), + Some(&token) => Err(Error::expected([expected], token).context(self.context())), + None => { + // Only resolve defines when expecting, otherwise you'll run into issues. + if let Ok(next) = self.inner.expect(expected) { + self.define(next)?; + return Ok(next); + } + if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) { + if let Some(subs) = self.sub_table.get(&next) { + self.inner.allow_any_of(&self.sub_types); + self.queue.extend(subs); + } + return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) }; + } + Err(Error::expected([expected], self.inner.peek())) + } + } + // TODO: preprocessor step + } + + fn peek(&mut self) -> Self::Item { + match self.queue.front() { + Some(token) => *token, + None => { + // Only allow substitution when the next token is unexpected + let old = self.inner.peek(); + self.preprocess(old); + match self.queue.front() { + Some(&new) => new, + None => old, + } + } + } + } + + fn peek_expect(&mut self, expected: Type) -> Result { + match self.queue.front() { + Some(&token) if token.is_variant(expected) => Ok(token), + Some(&token) => Err(Error::expected([expected], token).context(self.context())), + None => { + if let Ok(next) = self.inner.peek_expect(expected) { + return Ok(next); + } + if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) { + self.preprocess(next); + return if self.queue.is_empty() { + self.inner.peek_expect(expected) + } else { + self.peek_expect(expected) + }; + } + Err(Error::expected([expected], self.inner.peek())) + } + } + } +} + +impl<'t, T> std::fmt::Debug for Preprocessed<'t, T> +where T: TokenStream<'t> +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Preprocessed") + .field("sub_table", &self.sub_table) + .field("sub_types", &self.sub_types) + .field("queue", &self.queue) + .field("context", &self.context()) + .finish_non_exhaustive() + } +} diff --git a/src/tokenizer/token.rs b/src/lexer/token.rs similarity index 94% rename from src/tokenizer/token.rs rename to src/lexer/token.rs index 5df804e..60934b4 100644 --- a/src/tokenizer/token.rs +++ b/src/lexer/token.rs @@ -55,7 +55,7 @@ impl<$t> From<&$t str> for $type { } /// A [Token] is a [semantically tagged](Type) sequence of characters -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Token<'text> { /// The type of this token variant: Type, @@ -67,8 +67,8 @@ impl<'text> Token<'text> { /// Returns the [Type] of this [Token] pub fn variant(&self) -> Type { self.variant } - /// Returns the Lexeme (originating string slice) of this token - pub fn lexeme(&self) -> &str { self.lexeme } + /// Returns the lexeme (originating string slice) of this token + pub fn lexeme(&self) -> &'text str { self.lexeme } /// Parses this [Token] into another type pub fn parse(&self) -> Result::Err> @@ -94,14 +94,14 @@ impl<'text> Debug for Token<'text> { impl<'text> Display for Token<'text> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.variant { - Type::Endl | Type::EndOfFile | Type::Invalid => Display::fmt(&self.variant, f), + Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f), v => write!(f, "{v} \"{}\"", self.lexeme), } } } /// A [token Type](Type) is a semantic tag for a sequence of characters -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Type { /// contiguous whitespace, excluding newline Space, @@ -150,6 +150,7 @@ pub enum Type { /// Separator (comma) Separator, /// End of File marker + #[default] EndOfFile, /// Invalid token Invalid, @@ -160,10 +161,10 @@ regex_impl! {<'text> Token<'text> { regex!(Type::Space = r"^[\s--\n]+") } pub fn expect_endl(text: &str) -> Option { - regex!(Type::Endl = r"^[\s]+") + regex!(Type::Endl = r"^\n[\s--\n]*") } pub fn expect_comment(text: &str) -> Option { - regex!(Type::Comment = r"^(;|//).*") + regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*") } pub fn expect_label(text: &str) -> Option { regex!(Type::Label = r"^:") @@ -179,7 +180,7 @@ regex_impl! {<'text> Token<'text> { } pub fn expect_register(text: &str) -> Option { // old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)") - regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)") + regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)") } pub fn expect_radix_marker_dec(text: &str) -> Option { regex!(Type::RadixMarkerDec = r"(?i)^0d") @@ -194,7 +195,7 @@ regex_impl! {<'text> Token<'text> { regex!(Type::RadixMarkerBin = r"(?i)^0b") } pub fn expect_number(text: &str) -> Option { - regex!(Type::Number = r"^+?[[:xdigit:]]+") + regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)") } pub fn expect_minus(text: &str) -> Option { regex!(Type::Minus = r"^-") @@ -218,7 +219,7 @@ regex_impl! {<'text> Token<'text> { regex!(Type::Immediate = r"^#") } pub fn expect_directive(text: &str) -> Option { - regex!(Type::Directive = r"^\.\w+( .*)?") + regex!(Type::Directive = r"^\.\S+") } pub fn expect_identifier(text: &str) -> Option { regex!(Type::Identifier = r"^[A-Za-z_]\w*") diff --git a/src/lexer/token_stream.rs b/src/lexer/token_stream.rs new file mode 100644 index 0000000..446dc5d --- /dev/null +++ b/src/lexer/token_stream.rs @@ -0,0 +1,75 @@ +use super::*; + +use super::ignore::Ignore; +use super::preprocessed::Preprocessed; + +/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token) +pub trait TokenStream<'text>: Iterator> + std::fmt::Debug { + /// Gets this stream's [Context] + fn context(&self) -> Context; + + /// Creates an iterator that skips [Type::Space] in the input + fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self> + where Self: Sized { + Ignore::new(variant, self) + } + + /// Creates a [TokenStream] that performs live substitution of the input + fn preprocessed(&'text mut self) -> Preprocessed<'text, Self> + where Self: Sized { + Preprocessed::new(self) + } + + /// Returns the next [Token] without advancing + fn peek(&mut self) -> Self::Item; + + /// Returns the next [Token] if it is of the expected [Type], without advancing + fn peek_expect(&mut self, expected: Type) -> Result; + + /// Consumes and returns a [Token] if it is the expected [Type] + /// + /// Otherwise, does not consume a [Token] + fn expect(&mut self, expected: Type) -> Result; + + /// Ignores a [Token] of the expected [Type], propegating errors. + fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) } + + /// Ignores a [Token] of the expected [Type], discarding errors. + fn allow(&mut self, expected: Type) { let _ = self.expect(expected); } + + /// Runs a function on each + fn any_of(&mut self, f: fn(&mut Self, Type) -> Result, expected: T) -> Result + where T: AsRef<[Type]> { + for &expected in expected.as_ref() { + match f(self, expected).map_err(|e| e.bare()) { + Ok(t) => return Ok(t), + Err(Error::UnexpectedToken { .. }) => continue, + Err(e) => return Err(e.context(self.context())), + } + } + Err(Error::expected(expected, self.peek()).context(self.context())) + } + + /// Returns the next [Token] if it is of the expected [Types](Type), without advancing + fn peek_expect_any_of(&mut self, expected: T) -> Result + where T: AsRef<[Type]> { + self.any_of(Self::peek_expect, expected) + } + /// Consumes and returns a [Token] if it matches any of the expected [Types](Type) + /// + /// Otherwise, does not consume a [Token] + fn expect_any_of(&mut self, expected: T) -> Result + where T: AsRef<[Type]> { + self.any_of(Self::expect, expected) + } + /// Ignores a [Token] of any expected [Type], discarding errors. + fn allow_any_of(&mut self, expected: T) + where T: AsRef<[Type]> { + let _ = self.expect_any_of(expected); + } + /// Ignores a [Token] of any expected [Type], propegating errors. + fn require_any_of(&mut self, expected: T) -> Result<(), Error> + where T: AsRef<[Type]> { + self.any_of(Self::require, expected) + } +} diff --git a/src/lib.rs b/src/lib.rs index 96384b2..51d3056 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,17 @@ // © 2023 John Breaux //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption pub mod preamble { + //! Common imports for msp430-asm use super::*; pub use error::Error; pub use hash::{FromHash, Hash}; pub use linker::{Linker, Visitor}; pub use parser::Parser; - pub use tokenizer::{ + pub use lexer::{ context::Context, token::{Token, Type}, - TokenStream, Tokenizer, + token_stream::TokenStream, + Tokenizer, }; } @@ -18,4 +20,4 @@ pub mod error; pub mod hash; pub mod linker; pub mod parser; -pub mod tokenizer; +pub mod lexer; diff --git a/src/main.rs b/src/main.rs index 4c36e87..2a16f8a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,20 +16,22 @@ fn main() -> Result<(), Error> { if repl { while let Ok(len) = std::io::stdin().read_line(&mut buf) { match len { - 0 => break, // No newline (reached EOF) - 1 => continue, // Line is empty + 0 => break, // No newline (reached EOF) + 1 => { + // create a token steam + match Parser::default().parse(&buf) { + Ok(tree) => println!("{tree:x}"), + Err(error) => println!("{error}"), + } + buf.clear(); // Reuse buf's allocation + continue; + } // Line is empty _ => (), } - match Parser::default().parse(&buf) { - Ok(line) => println!("{line:x}"), - Err(error) => println!("{error}"), - } - buf.clear(); // Reuse buf's allocation } } else { std::io::stdin().lock().read_to_string(&mut buf).map_err(|_| Error::EndOfFile)?; - let mut tk = Tokenizer::new(&buf); - let tree = Parser::default().parse_with(&mut tk); + let tree = Parser::default().parse(&buf); match &tree { Ok(tree) => println!("{tree:x}"), Err(error) => eprintln!("{error}"), diff --git a/src/parser.rs b/src/parser.rs index d50b741..0c5a576 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -27,15 +27,15 @@ pub mod preamble { } use preamble::*; -pub(crate) mod parsable; +pub mod parsable; -pub(crate) mod comment; -pub(crate) mod directive; -pub(crate) mod identifier; -pub(crate) mod instruction; -pub(crate) mod label; +pub mod comment; +pub mod directive; +pub mod identifier; +pub mod instruction; +pub mod label; -pub(crate) mod line { +pub mod line { // © 2023 John Breaux use super::*; @@ -57,21 +57,33 @@ pub(crate) mod line { impl Parsable for Line { fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { - if let Ok(token) = stream.peek_expect_any_of([Type::Insn, Type::Comment, Type::Directive, Type::Identifier]) - { - return Ok(match token.variant() { + Ok( + match stream + .peek_expect_any_of([ + Type::Endl, + Type::Insn, + Type::Comment, + Type::Directive, + Type::Identifier, + Type::EndOfFile, + ])? + .variant() + { + Type::Endl => { + stream.next(); + Self::Empty + } Type::Insn => Self::Insn(Instruction::parse(p, stream)?), Type::Comment => Self::Comment(Comment::parse(p, stream)?), Type::Directive => Self::Directive(Directive::parse(p, stream)?), Type::Identifier => Self::Label(Label::parse(p, stream)?), - _ => unreachable!(), - }); - } - let token = stream.expect_any_of([Type::EndOfFile])?; - Ok(match token.variant() { - Type::EndOfFile => Self::EndOfFile, - _ => unreachable!(), - }) + Type::EndOfFile => { + stream.next(); + Self::EndOfFile + } + _ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"), + }, + ) } } impl Display for Line { @@ -96,7 +108,7 @@ pub(crate) mod line { } } -pub(crate) mod root { +pub mod root { // © 2023 John Breaux use super::*; @@ -162,20 +174,19 @@ pub struct Parser { } impl Parser { - pub fn parse_with<'t, T>(self, stream: &'t mut T) -> Result - where T: TokenStream<'t> { - Root::parse(&self, &mut stream.ignore_spaces()) + pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result { + Root::parse(&self, &mut stream.ignore(Type::Space)) } pub fn parse(self, input: &T) -> Result where T: AsRef + ?Sized { - Root::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces()) + Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space)) } pub fn parse_one(self, input: &T) -> Result where T: AsRef + ?Sized { - Line::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces()) + Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space)) } - /// Sets the default radix for [Token](crate::tokenizer::token::Token) -> [Number] + /// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number] /// conversion pub fn radix(mut self, radix: u32) { self.radix = radix; } diff --git a/src/parser/label.rs b/src/parser/label.rs index f9f7614..f213ed8 100644 --- a/src/parser/label.rs +++ b/src/parser/label.rs @@ -7,7 +7,11 @@ pub struct Label(pub Identifier); impl Parsable for Label { fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { - Ok(Self(Identifier::parse(p, stream).and_then(|t| stream.require(Type::Label).and(Ok(t)))?)) + Ok(Self( + Identifier::parse(p, stream) + .and_then(|t| stream.require(Type::Label).and(Ok(t))) + .map_err(|e| e.context(stream.context()))?, + )) } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs deleted file mode 100644 index 5fad89d..0000000 --- a/src/tokenizer.rs +++ /dev/null @@ -1,193 +0,0 @@ -// © 2023 John Breaux -//! Iterates over &[str], producing [Token]s - -// Things we need: -// ✔ 1. Lexer/Tokenizer -// ✔ 1. Instructions -// ✔ 1. Instruction mnemonics /ad.../ -// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/ -// ✔ 2. Operands -// ✔ 1. Registers /(r1[0-5]|r[0-9])/ -// ✔ 2. Immediate Values /#/ -// ✔ 3. Absolute addresses /&/ -// ✔ 4. Numbers /[0-9A-Fa-f]+ -// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/ -// ✔ 3. Label definitions /(^.*):/ -// ✔ 4. Comments (may be useful for debugging) - -pub mod context; -pub mod token; - -use crate::Error; -use context::Context; -use token::{Token, Type}; - -/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token) -pub trait TokenStream<'text>: Iterator> { - /// Gets this stream's [Context] - fn context(&self) -> Context; - - /// Creates an iterator that skips [Type::Space] in the input - fn ignore_spaces(&'text mut self) -> IgnoreSpaces<'text, Self> - where Self: Sized { - IgnoreSpaces::new(self) - } - - /// Returns the next [Token] without advancing - fn peek(&mut self) -> Self::Item; - - /// Returns the next [Token] if it is of the expected [Type], without advancing - fn peek_expect(&mut self, expected: Type) -> Result; - - /// Consumes and returns a [Token] if it is the expected [Type] - /// - /// Otherwise, does not consume a [Token] - fn expect(&mut self, expected: Type) -> Result; - - /// Ignores a [Token] of the expected [Type], propegating errors. - fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) } - - /// Ignores a [Token] of the expected [Type], discarding errors. - fn allow(&mut self, expected: Type) { let _ = self.expect(expected); } - - /// Runs a function on each - fn any_of(&mut self, f: fn(&mut Self, Type) -> Result, expected: T) -> Result - where T: AsRef<[Type]> { - for &expected in expected.as_ref() { - match f(self, expected).map_err(|e| e.bare()) { - Ok(t) => return Ok(t), - Err(Error::UnexpectedToken { .. }) => continue, - Err(e) => return Err(e.context(self.context())), - } - } - Err(Error::expected(expected, self.peek()).context(self.context())) - } - - /// Returns the next [Token] if it is of the expected [Types](Type), without advancing - fn peek_expect_any_of(&mut self, expected: T) -> Result - where T: AsRef<[Type]> { - self.any_of(Self::peek_expect, expected) - } - /// Consumes and returns a [Token] if it matches any of the expected [Types](Type) - /// - /// Otherwise, does not consume a [Token] - fn expect_any_of(&mut self, expected: T) -> Result - where T: AsRef<[Type]> { - self.any_of(Self::expect, expected) - } - /// Ignores a [Token] of any expected [Type], discarding errors. - fn allow_any_of(&mut self, expected: T) - where T: AsRef<[Type]> { - let _ = self.expect_any_of(expected); - } - /// Ignores a [Token] of any expected [Type], propegating errors. - fn require_any_of(&mut self, expected: T) -> Result<(), Error> - where T: AsRef<[Type]> { - self.any_of(Self::require, expected) - } -} - -/// Iterates over &[str], producing [Token]s -#[must_use = "iterators are lazy and do nothing unless consumed"] -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Tokenizer<'t> { - text: &'t str, - idx: usize, - context: Context, -} - -impl<'t> Tokenizer<'t> { - /// Produces a new [Tokenizer] from a [str]ing slice - pub fn new(text: &'t T) -> Self - where T: AsRef + ?Sized { - Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() } - } - - fn count(&mut self, token: &Token) { - // update the context - self.context.count(token); - // advance the index - self.idx += token.len(); - } -} - -impl<'text> Iterator for Tokenizer<'text> { - type Item = Token<'text>; - - fn next(&mut self) -> Option { - if self.idx >= self.text.len() { - return None; - } - let token = Token::from(&self.text[self.idx..]); - // Process [Type::Directive]s - self.count(&token); - Some(token) - } -} - -impl<'text> TokenStream<'text> for Tokenizer<'text> { - fn context(&self) -> Context { self.context } - // Tokenizer has access to the source buffer, and can implement expect and peek without cloning - // itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and - // registers are valid identifiers. - fn expect(&mut self, expected: Type) -> Result { - let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?; - self.count(&token); - Ok(token) - } - fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) } - fn peek_expect(&mut self, expected: Type) -> Result { - Token::expect(&self.text[self.idx..], expected) - } -} - -#[must_use = "iterators are lazy and do nothing unless consumed"] -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct IgnoreSpaces<'t, T> -where T: TokenStream<'t> -{ - inner: &'t mut T, -} - -impl<'t, T> IgnoreSpaces<'t, T> -where T: TokenStream<'t> -{ - pub fn new(t: &'t mut T) -> Self { IgnoreSpaces { inner: t } } - /// Gets a mutable reference to the inner [Iterator] - pub fn inner_mut(&mut self) -> &mut T { self.inner } -} - -impl<'t, T> Iterator for IgnoreSpaces<'t, T> -where T: TokenStream<'t> -{ - type Item = Token<'t>; - - fn next(&mut self) -> Option { - let next = self.inner.next()?; - // Space tokens are greedy, so the next token shouldn't be a Space - match next.variant() { - Type::Space => self.next(), - _ => Some(next), - } - } -} - -impl<'t, T> TokenStream<'t> for IgnoreSpaces<'t, T> -where T: TokenStream<'t> -{ - fn context(&self) -> Context { self.inner.context() } - fn expect(&mut self, expected: Type) -> Result { - self.inner.allow_any_of([Type::Space, Type::Endl]); - self.inner.expect(expected) - } - - fn peek(&mut self) -> Self::Item { - self.inner.allow_any_of([Type::Space, Type::Endl]); - self.inner.peek() - } - - fn peek_expect(&mut self, expected: Type) -> Result { - self.inner.allow_any_of([Type::Space, Type::Endl]); - self.inner.peek_expect(expected) - } -} diff --git a/valid.asm b/valid.asm index c819644..5dc88b6 100755 --- a/valid.asm +++ b/valid.asm @@ -2,6 +2,14 @@ ; examples of valid assembly ; +; testing defines +.define asdfgh #1000 +.define qwerty @sp+ +br asdfgh +mov qwerty, r15 + + + _register_mode: .define numbered r1 mov r0, r1