lexer: Overhaul to support token substitution

This commit is contained in:
John 2023-08-25 03:01:53 -05:00
parent 53f1f765f1
commit 82420f0665
13 changed files with 469 additions and 248 deletions

View File

@ -4,7 +4,7 @@
use std::fmt::Display; use std::fmt::Display;
use super::{ use super::{
tokenizer::token::{OwnedToken, Types}, lexer::token::{OwnedToken, Types},
*, *,
}; };
@ -89,6 +89,18 @@ impl Error {
_ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() }, _ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() },
} }
} }
pub fn mask_expectation(mut self, expected: Type) -> Self {
match self {
Error::UnexpectedToken { got, .. } => self = Error::UnexpectedToken { expected, got },
Error::AllExpectationsFailed { got, .. } => self = Error::UnexpectedToken { expected, got },
Error::Contextual(context, err) => {
self = Error::Contextual(context, Box::new(err.mask_expectation(expected)))
}
_ => (),
}
self
}
} }
impl Display for Error { impl Display for Error {

82
src/lexer.rs Normal file
View File

@ -0,0 +1,82 @@
// © 2023 John Breaux
//! Iterates over &[str], producing [Token]s
// Things we need:
// ✔ 1. Lexer/Tokenizer
// ✔ 1. Instructions
// ✔ 1. Instruction mnemonics /ad.../
// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/
// ✔ 2. Operands
// ✔ 1. Registers /(r1[0-5]|r[0-9])/
// ✔ 2. Immediate Values /#/
// ✔ 3. Absolute addresses /&/
// ✔ 4. Numbers /[0-9A-Fa-f]+
// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/
// ✔ 3. Label definitions /(^.*):/
// ✔ 4. Comments (may be useful for debugging)
pub mod context;
pub mod ignore;
pub mod preprocessed;
pub mod token;
pub mod token_stream;
use crate::Error;
use context::Context;
use token::{Token, Type};
use token_stream::TokenStream;
/// Iterates over &[str], producing [Token]s
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Tokenizer<'t> {
text: &'t str,
idx: usize,
context: Context,
}
impl<'t> Tokenizer<'t> {
/// Produces a new [Tokenizer] from a [str]ing slice
pub fn new<T>(text: &'t T) -> Self
where T: AsRef<str> + ?Sized {
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
}
fn count(&mut self, token: &Token) {
// update the context
self.context.count(token);
// advance the index
self.idx += token.len();
}
}
impl<'text> Iterator for Tokenizer<'text> {
type Item = Token<'text>;
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.text.len() {
return None;
}
let token = Token::from(&self.text[self.idx..]);
// Process [Type::Directive]s
// Count the token
self.count(&token);
Some(token)
}
}
impl<'text> TokenStream<'text> for Tokenizer<'text> {
fn context(&self) -> Context { self.context }
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
// registers are valid identifiers.
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
self.count(&token);
Ok(token)
}
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
Token::expect(&self.text[self.idx..], expected)
}
}

View File

@ -4,8 +4,8 @@ use super::*;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Context { pub struct Context {
line: usize, line: usize,
tokens: usize,
position: usize, position: usize,
tokens: usize,
} }
impl Context { impl Context {
@ -18,7 +18,7 @@ impl Context {
Type::EndOfFile => return, Type::EndOfFile => return,
Type::Endl => { Type::Endl => {
self.line += 1; self.line += 1;
self.position = 0; self.position = 1;
} }
_ => self.position += t.len(), _ => self.position += t.len(),
} }
@ -26,11 +26,9 @@ impl Context {
} }
} }
impl Default for Context { impl Default for Context {
fn default() -> Self { Self { line: 1, tokens: 0, position: 0 } } fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } }
} }
impl std::fmt::Display for Context { impl std::fmt::Display for Context {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) }
write!(f, "{}:{}", self.line, self.position)
}
} }

53
src/lexer/ignore.rs Normal file
View File

@ -0,0 +1,53 @@
use super::*;
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Ignore<'t, T>
where T: TokenStream<'t>
{
ignore: Type,
inner: &'t mut T,
}
impl<'t, T> Ignore<'t, T>
where T: TokenStream<'t>
{
/// Creates a new
pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } }
/// Gets a mutable reference to the inner [Iterator]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
}
impl<'t, T> Iterator for Ignore<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.inner.next()?;
// Space tokens are greedy, so the next token shouldn't be a Space
match next.variant() {
Type::Space => self.next(),
_ => Some(next),
}
}
}
impl<'t, T> TokenStream<'t> for Ignore<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
self.inner.allow(self.ignore);
self.inner.expect(expected)
}
fn peek(&mut self) -> Self::Item {
self.inner.allow(self.ignore);
self.inner.peek()
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
self.inner.allow(self.ignore);
self.inner.peek_expect(expected)
}
}

166
src/lexer/preprocessed.rs Normal file
View File

@ -0,0 +1,166 @@
// © 2023 John Breaux
//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band ".define"
//! rules
use super::*;
use std::collections::{HashMap, VecDeque};
// TODO: Clean this spaghetti mess up
/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define"
/// rules
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(PartialEq, Eq)]
pub struct Preprocessed<'t, T>
where T: TokenStream<'t>
{
sub_table: HashMap<Token<'t>, Vec<Token<'t>>>,
sub_types: Vec<Type>,
queue: VecDeque<Token<'t>>,
inner: &'t mut T,
}
impl<'t, T> Iterator for Preprocessed<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
match self.queue.pop_front() {
Some(token) => Some(token),
None => {
let next = self.inner.next()?;
if let Some(subs) = self.sub_table.get(&next) {
self.queue.extend(subs);
return self.next();
}
Some(next)
}
}
}
}
impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> {
/// Creates a new [Preprocessed] [TokenStream]
pub fn new(inner: &'t mut T) -> Self {
Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner }
}
/// Gets a mutable reference to the inner [TokenStream]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
fn define(&mut self, token: Token<'t>) -> Result<(), Error> {
if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) {
return Ok(());
}
// Tokenize the subdocument
self.allow(Type::Directive);
self.require(Type::Space).map_err(|e| e.context(self.context()))?;
let Some(k) = self.inner.next() else { return Ok(()) };
if !self.sub_types.contains(&k.variant()) {
self.sub_types.push(k.variant());
};
self.require(Type::Space).map_err(|e| e.context(self.context()))?;
let mut replacement = vec![];
loop {
match self.inner.peek().variant() {
Type::Endl | Type::EndOfFile => break,
Type::Comment | Type::Space => {
// ignore comments
self.inner.next();
}
_ => replacement.push(self.inner.next().unwrap()),
}
}
self.sub_table.insert(k, replacement);
Ok(())
}
/// Does the preprocessing step
fn preprocess(&mut self, token: Token<'t>) {
if let Some(subs) = self.sub_table.get(&token) {
self.queue.extend(subs);
self.inner.next();
}
}
}
impl<'t, T> TokenStream<'t> for Preprocessed<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
match self.queue.front() {
Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()),
Some(&token) => Err(Error::expected([expected], token).context(self.context())),
None => {
// Only resolve defines when expecting, otherwise you'll run into issues.
if let Ok(next) = self.inner.expect(expected) {
self.define(next)?;
return Ok(next);
}
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
if let Some(subs) = self.sub_table.get(&next) {
self.inner.allow_any_of(&self.sub_types);
self.queue.extend(subs);
}
return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) };
}
Err(Error::expected([expected], self.inner.peek()))
}
}
// TODO: preprocessor step
}
fn peek(&mut self) -> Self::Item {
match self.queue.front() {
Some(token) => *token,
None => {
// Only allow substitution when the next token is unexpected
let old = self.inner.peek();
self.preprocess(old);
match self.queue.front() {
Some(&new) => new,
None => old,
}
}
}
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
match self.queue.front() {
Some(&token) if token.is_variant(expected) => Ok(token),
Some(&token) => Err(Error::expected([expected], token).context(self.context())),
None => {
if let Ok(next) = self.inner.peek_expect(expected) {
return Ok(next);
}
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
self.preprocess(next);
return if self.queue.is_empty() {
self.inner.peek_expect(expected)
} else {
self.peek_expect(expected)
};
}
Err(Error::expected([expected], self.inner.peek()))
}
}
}
}
impl<'t, T> std::fmt::Debug for Preprocessed<'t, T>
where T: TokenStream<'t>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Preprocessed")
.field("sub_table", &self.sub_table)
.field("sub_types", &self.sub_types)
.field("queue", &self.queue)
.field("context", &self.context())
.finish_non_exhaustive()
}
}

View File

@ -55,7 +55,7 @@ impl<$t> From<&$t str> for $type {
} }
/// A [Token] is a [semantically tagged](Type) sequence of characters /// A [Token] is a [semantically tagged](Type) sequence of characters
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'text> { pub struct Token<'text> {
/// The type of this token /// The type of this token
variant: Type, variant: Type,
@ -67,8 +67,8 @@ impl<'text> Token<'text> {
/// Returns the [Type] of this [Token] /// Returns the [Type] of this [Token]
pub fn variant(&self) -> Type { self.variant } pub fn variant(&self) -> Type { self.variant }
/// Returns the Lexeme (originating string slice) of this token /// Returns the lexeme (originating string slice) of this token
pub fn lexeme(&self) -> &str { self.lexeme } pub fn lexeme(&self) -> &'text str { self.lexeme }
/// Parses this [Token] into another type /// Parses this [Token] into another type
pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err> pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
@ -94,14 +94,14 @@ impl<'text> Debug for Token<'text> {
impl<'text> Display for Token<'text> { impl<'text> Display for Token<'text> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.variant { match self.variant {
Type::Endl | Type::EndOfFile | Type::Invalid => Display::fmt(&self.variant, f), Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f),
v => write!(f, "{v} \"{}\"", self.lexeme), v => write!(f, "{v} \"{}\"", self.lexeme),
} }
} }
} }
/// A [token Type](Type) is a semantic tag for a sequence of characters /// A [token Type](Type) is a semantic tag for a sequence of characters
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Type { pub enum Type {
/// contiguous whitespace, excluding newline /// contiguous whitespace, excluding newline
Space, Space,
@ -150,6 +150,7 @@ pub enum Type {
/// Separator (comma) /// Separator (comma)
Separator, Separator,
/// End of File marker /// End of File marker
#[default]
EndOfFile, EndOfFile,
/// Invalid token /// Invalid token
Invalid, Invalid,
@ -160,10 +161,10 @@ regex_impl! {<'text> Token<'text> {
regex!(Type::Space = r"^[\s--\n]+") regex!(Type::Space = r"^[\s--\n]+")
} }
pub fn expect_endl(text: &str) -> Option<Self> { pub fn expect_endl(text: &str) -> Option<Self> {
regex!(Type::Endl = r"^[\s]+") regex!(Type::Endl = r"^\n[\s--\n]*")
} }
pub fn expect_comment(text: &str) -> Option<Self> { pub fn expect_comment(text: &str) -> Option<Self> {
regex!(Type::Comment = r"^(;|//).*") regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*")
} }
pub fn expect_label(text: &str) -> Option<Self> { pub fn expect_label(text: &str) -> Option<Self> {
regex!(Type::Label = r"^:") regex!(Type::Label = r"^:")
@ -179,7 +180,7 @@ regex_impl! {<'text> Token<'text> {
} }
pub fn expect_register(text: &str) -> Option<Self> { pub fn expect_register(text: &str) -> Option<Self> {
// old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)") // old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)") regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)")
} }
pub fn expect_radix_marker_dec(text: &str) -> Option<Self> { pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerDec = r"(?i)^0d") regex!(Type::RadixMarkerDec = r"(?i)^0d")
@ -194,7 +195,7 @@ regex_impl! {<'text> Token<'text> {
regex!(Type::RadixMarkerBin = r"(?i)^0b") regex!(Type::RadixMarkerBin = r"(?i)^0b")
} }
pub fn expect_number(text: &str) -> Option<Self> { pub fn expect_number(text: &str) -> Option<Self> {
regex!(Type::Number = r"^+?[[:xdigit:]]+") regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)")
} }
pub fn expect_minus(text: &str) -> Option<Self> { pub fn expect_minus(text: &str) -> Option<Self> {
regex!(Type::Minus = r"^-") regex!(Type::Minus = r"^-")
@ -218,7 +219,7 @@ regex_impl! {<'text> Token<'text> {
regex!(Type::Immediate = r"^#") regex!(Type::Immediate = r"^#")
} }
pub fn expect_directive(text: &str) -> Option<Self> { pub fn expect_directive(text: &str) -> Option<Self> {
regex!(Type::Directive = r"^\.\w+( .*)?") regex!(Type::Directive = r"^\.\S+")
} }
pub fn expect_identifier(text: &str) -> Option<Self> { pub fn expect_identifier(text: &str) -> Option<Self> {
regex!(Type::Identifier = r"^[A-Za-z_]\w*") regex!(Type::Identifier = r"^[A-Za-z_]\w*")

75
src/lexer/token_stream.rs Normal file
View File

@ -0,0 +1,75 @@
use super::*;
use super::ignore::Ignore;
use super::preprocessed::Preprocessed;
/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> + std::fmt::Debug {
/// Gets this stream's [Context]
fn context(&self) -> Context;
/// Creates an iterator that skips [Type::Space] in the input
fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self>
where Self: Sized {
Ignore::new(variant, self)
}
/// Creates a [TokenStream] that performs live substitution of the input
fn preprocessed(&'text mut self) -> Preprocessed<'text, Self>
where Self: Sized {
Preprocessed::new(self)
}
/// Returns the next [Token] without advancing
fn peek(&mut self) -> Self::Item;
/// Returns the next [Token] if it is of the expected [Type], without advancing
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
/// Consumes and returns a [Token] if it is the expected [Type]
///
/// Otherwise, does not consume a [Token]
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
/// Ignores a [Token] of the expected [Type], propegating errors.
fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) }
/// Ignores a [Token] of the expected [Type], discarding errors.
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
/// Runs a function on each
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, Error>, expected: T) -> Result<U, Error>
where T: AsRef<[Type]> {
for &expected in expected.as_ref() {
match f(self, expected).map_err(|e| e.bare()) {
Ok(t) => return Ok(t),
Err(Error::UnexpectedToken { .. }) => continue,
Err(e) => return Err(e.context(self.context())),
}
}
Err(Error::expected(expected, self.peek()).context(self.context()))
}
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
where T: AsRef<[Type]> {
self.any_of(Self::peek_expect, expected)
}
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
///
/// Otherwise, does not consume a [Token]
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
where T: AsRef<[Type]> {
self.any_of(Self::expect, expected)
}
/// Ignores a [Token] of any expected [Type], discarding errors.
fn allow_any_of<T>(&mut self, expected: T)
where T: AsRef<[Type]> {
let _ = self.expect_any_of(expected);
}
/// Ignores a [Token] of any expected [Type], propegating errors.
fn require_any_of<T>(&mut self, expected: T) -> Result<(), Error>
where T: AsRef<[Type]> {
self.any_of(Self::require, expected)
}
}

View File

@ -1,15 +1,17 @@
// © 2023 John Breaux // © 2023 John Breaux
//! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption
pub mod preamble { pub mod preamble {
//! Common imports for msp430-asm
use super::*; use super::*;
pub use error::Error; pub use error::Error;
pub use hash::{FromHash, Hash}; pub use hash::{FromHash, Hash};
pub use linker::{Linker, Visitor}; pub use linker::{Linker, Visitor};
pub use parser::Parser; pub use parser::Parser;
pub use tokenizer::{ pub use lexer::{
context::Context, context::Context,
token::{Token, Type}, token::{Token, Type},
TokenStream, Tokenizer, token_stream::TokenStream,
Tokenizer,
}; };
} }
@ -18,4 +20,4 @@ pub mod error;
pub mod hash; pub mod hash;
pub mod linker; pub mod linker;
pub mod parser; pub mod parser;
pub mod tokenizer; pub mod lexer;

View File

@ -16,20 +16,22 @@ fn main() -> Result<(), Error> {
if repl { if repl {
while let Ok(len) = std::io::stdin().read_line(&mut buf) { while let Ok(len) = std::io::stdin().read_line(&mut buf) {
match len { match len {
0 => break, // No newline (reached EOF) 0 => break, // No newline (reached EOF)
1 => continue, // Line is empty 1 => {
// create a token steam
match Parser::default().parse(&buf) {
Ok(tree) => println!("{tree:x}"),
Err(error) => println!("{error}"),
}
buf.clear(); // Reuse buf's allocation
continue;
} // Line is empty
_ => (), _ => (),
} }
match Parser::default().parse(&buf) {
Ok(line) => println!("{line:x}"),
Err(error) => println!("{error}"),
}
buf.clear(); // Reuse buf's allocation
} }
} else { } else {
std::io::stdin().lock().read_to_string(&mut buf).map_err(|_| Error::EndOfFile)?; std::io::stdin().lock().read_to_string(&mut buf).map_err(|_| Error::EndOfFile)?;
let mut tk = Tokenizer::new(&buf); let tree = Parser::default().parse(&buf);
let tree = Parser::default().parse_with(&mut tk);
match &tree { match &tree {
Ok(tree) => println!("{tree:x}"), Ok(tree) => println!("{tree:x}"),
Err(error) => eprintln!("{error}"), Err(error) => eprintln!("{error}"),

View File

@ -27,15 +27,15 @@ pub mod preamble {
} }
use preamble::*; use preamble::*;
pub(crate) mod parsable; pub mod parsable;
pub(crate) mod comment; pub mod comment;
pub(crate) mod directive; pub mod directive;
pub(crate) mod identifier; pub mod identifier;
pub(crate) mod instruction; pub mod instruction;
pub(crate) mod label; pub mod label;
pub(crate) mod line { pub mod line {
// © 2023 John Breaux // © 2023 John Breaux
use super::*; use super::*;
@ -57,21 +57,33 @@ pub(crate) mod line {
impl Parsable for Line { impl Parsable for Line {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error> fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> { where T: TokenStream<'text> {
if let Ok(token) = stream.peek_expect_any_of([Type::Insn, Type::Comment, Type::Directive, Type::Identifier]) Ok(
{ match stream
return Ok(match token.variant() { .peek_expect_any_of([
Type::Endl,
Type::Insn,
Type::Comment,
Type::Directive,
Type::Identifier,
Type::EndOfFile,
])?
.variant()
{
Type::Endl => {
stream.next();
Self::Empty
}
Type::Insn => Self::Insn(Instruction::parse(p, stream)?), Type::Insn => Self::Insn(Instruction::parse(p, stream)?),
Type::Comment => Self::Comment(Comment::parse(p, stream)?), Type::Comment => Self::Comment(Comment::parse(p, stream)?),
Type::Directive => Self::Directive(Directive::parse(p, stream)?), Type::Directive => Self::Directive(Directive::parse(p, stream)?),
Type::Identifier => Self::Label(Label::parse(p, stream)?), Type::Identifier => Self::Label(Label::parse(p, stream)?),
_ => unreachable!(), Type::EndOfFile => {
}); stream.next();
} Self::EndOfFile
let token = stream.expect_any_of([Type::EndOfFile])?; }
Ok(match token.variant() { _ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"),
Type::EndOfFile => Self::EndOfFile, },
_ => unreachable!(), )
})
} }
} }
impl Display for Line { impl Display for Line {
@ -96,7 +108,7 @@ pub(crate) mod line {
} }
} }
pub(crate) mod root { pub mod root {
// © 2023 John Breaux // © 2023 John Breaux
use super::*; use super::*;
@ -162,20 +174,19 @@ pub struct Parser {
} }
impl Parser { impl Parser {
pub fn parse_with<'t, T>(self, stream: &'t mut T) -> Result<Root, Error> pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result<Root, Error> {
where T: TokenStream<'t> { Root::parse(&self, &mut stream.ignore(Type::Space))
Root::parse(&self, &mut stream.ignore_spaces())
} }
pub fn parse<T>(self, input: &T) -> Result<Root, Error> pub fn parse<T>(self, input: &T) -> Result<Root, Error>
where T: AsRef<str> + ?Sized { where T: AsRef<str> + ?Sized {
Root::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces()) Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
} }
pub fn parse_one<T>(self, input: &T) -> Result<Line, Error> pub fn parse_one<T>(self, input: &T) -> Result<Line, Error>
where T: AsRef<str> + ?Sized { where T: AsRef<str> + ?Sized {
Line::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces()) Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
} }
/// Sets the default radix for [Token](crate::tokenizer::token::Token) -> [Number] /// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number]
/// conversion /// conversion
pub fn radix(mut self, radix: u32) { self.radix = radix; } pub fn radix(mut self, radix: u32) { self.radix = radix; }

View File

@ -7,7 +7,11 @@ pub struct Label(pub Identifier);
impl Parsable for Label { impl Parsable for Label {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error> fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> { where T: TokenStream<'text> {
Ok(Self(Identifier::parse(p, stream).and_then(|t| stream.require(Type::Label).and(Ok(t)))?)) Ok(Self(
Identifier::parse(p, stream)
.and_then(|t| stream.require(Type::Label).and(Ok(t)))
.map_err(|e| e.context(stream.context()))?,
))
} }
} }

View File

@ -1,193 +0,0 @@
// © 2023 John Breaux
//! Iterates over &[str], producing [Token]s
// Things we need:
// ✔ 1. Lexer/Tokenizer
// ✔ 1. Instructions
// ✔ 1. Instruction mnemonics /ad.../
// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/
// ✔ 2. Operands
// ✔ 1. Registers /(r1[0-5]|r[0-9])/
// ✔ 2. Immediate Values /#/
// ✔ 3. Absolute addresses /&/
// ✔ 4. Numbers /[0-9A-Fa-f]+
// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/
// ✔ 3. Label definitions /(^.*):/
// ✔ 4. Comments (may be useful for debugging)
pub mod context;
pub mod token;
use crate::Error;
use context::Context;
use token::{Token, Type};
/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> {
/// Gets this stream's [Context]
fn context(&self) -> Context;
/// Creates an iterator that skips [Type::Space] in the input
fn ignore_spaces(&'text mut self) -> IgnoreSpaces<'text, Self>
where Self: Sized {
IgnoreSpaces::new(self)
}
/// Returns the next [Token] without advancing
fn peek(&mut self) -> Self::Item;
/// Returns the next [Token] if it is of the expected [Type], without advancing
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
/// Consumes and returns a [Token] if it is the expected [Type]
///
/// Otherwise, does not consume a [Token]
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
/// Ignores a [Token] of the expected [Type], propegating errors.
fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) }
/// Ignores a [Token] of the expected [Type], discarding errors.
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
/// Runs a function on each
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, Error>, expected: T) -> Result<U, Error>
where T: AsRef<[Type]> {
for &expected in expected.as_ref() {
match f(self, expected).map_err(|e| e.bare()) {
Ok(t) => return Ok(t),
Err(Error::UnexpectedToken { .. }) => continue,
Err(e) => return Err(e.context(self.context())),
}
}
Err(Error::expected(expected, self.peek()).context(self.context()))
}
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
where T: AsRef<[Type]> {
self.any_of(Self::peek_expect, expected)
}
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
///
/// Otherwise, does not consume a [Token]
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
where T: AsRef<[Type]> {
self.any_of(Self::expect, expected)
}
/// Ignores a [Token] of any expected [Type], discarding errors.
fn allow_any_of<T>(&mut self, expected: T)
where T: AsRef<[Type]> {
let _ = self.expect_any_of(expected);
}
/// Ignores a [Token] of any expected [Type], propegating errors.
fn require_any_of<T>(&mut self, expected: T) -> Result<(), Error>
where T: AsRef<[Type]> {
self.any_of(Self::require, expected)
}
}
/// Iterates over &[str], producing [Token]s
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Tokenizer<'t> {
text: &'t str,
idx: usize,
context: Context,
}
impl<'t> Tokenizer<'t> {
/// Produces a new [Tokenizer] from a [str]ing slice
pub fn new<T>(text: &'t T) -> Self
where T: AsRef<str> + ?Sized {
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
}
fn count(&mut self, token: &Token) {
// update the context
self.context.count(token);
// advance the index
self.idx += token.len();
}
}
impl<'text> Iterator for Tokenizer<'text> {
type Item = Token<'text>;
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.text.len() {
return None;
}
let token = Token::from(&self.text[self.idx..]);
// Process [Type::Directive]s
self.count(&token);
Some(token)
}
}
impl<'text> TokenStream<'text> for Tokenizer<'text> {
fn context(&self) -> Context { self.context }
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
// registers are valid identifiers.
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
self.count(&token);
Ok(token)
}
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
Token::expect(&self.text[self.idx..], expected)
}
}
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
inner: &'t mut T,
}
impl<'t, T> IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
pub fn new(t: &'t mut T) -> Self { IgnoreSpaces { inner: t } }
/// Gets a mutable reference to the inner [Iterator]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
}
impl<'t, T> Iterator for IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.inner.next()?;
// Space tokens are greedy, so the next token shouldn't be a Space
match next.variant() {
Type::Space => self.next(),
_ => Some(next),
}
}
}
impl<'t, T> TokenStream<'t> for IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.expect(expected)
}
fn peek(&mut self) -> Self::Item {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.peek()
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.peek_expect(expected)
}
}

View File

@ -2,6 +2,14 @@
; examples of valid assembly ; examples of valid assembly
; ;
; testing defines
.define asdfgh #1000
.define qwerty @sp+
br asdfgh
mov qwerty, r15
_register_mode: _register_mode:
.define numbered r1 .define numbered r1
mov r0, r1 mov r0, r1