diff --git a/src/error.rs b/src/error.rs index e6ca794..03513fe 100644 --- a/src/error.rs +++ b/src/error.rs @@ -16,6 +16,8 @@ pub enum Error { /// Produced by [Parser](crate::parser::Parser::parse()) ParseError(parser::root::Root, Box), Contextual(Context, Box), + /// Produced by [Token] when the input is entirely unexpected. + UnexpectedSymbol(String), /// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type] UnexpectedToken { expected: Type, @@ -45,7 +47,7 @@ pub enum Error { RegisterTooHigh(u16), /// Produced by /// [SecondaryOperand](parser::instruction::encoding::secondary_operand) - /// when the joke "secondary immediate" form is specified + /// when the joke "secondary immediate" form is out of range 0..=1 FatSecondaryImmediate(isize), /// Produced by [Number](parser::instruction::encoding::number) when the number is too /// wide to fit in 16 bits (outside the range `(-2^15) .. (2^16-1)` ) @@ -93,9 +95,10 @@ impl Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::Contextual(ctx, error) => write!(f, "{ctx}: {error}"), - Error::ParseError(_, error) => write!(f, "Error encountered while parsing:\n{error}"), + Error::ParseError(_, error) => write!(f, "{error}"), + Error::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""), Error::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."), - Error::AllExpectationsFailed { expected, got } => write!(f, "Expected one of {expected}, got {got}."), + Error::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."), Error::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."), Error::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"), Error::NotARegister(reg) => write!(f, "{reg} is not a register"), diff --git a/src/lib.rs b/src/lib.rs index a8464c6..96384b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! An assembler for the TI MSP430 +//! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption pub mod preamble { use super::*; pub use error::Error; diff --git a/src/parser/parsable.rs b/src/parser/parsable.rs index 50fdadf..4d255d1 100644 --- a/src/parser/parsable.rs +++ b/src/parser/parsable.rs @@ -19,7 +19,7 @@ pub trait Parsable { match Self::parse(p, stream).map_err(|e| e.bare()) { Ok(tt) => Ok(Some(tt)), Err(Error::UnexpectedToken { .. }) | Err(Error::AllExpectationsFailed { .. }) => Ok(None), - Err(e) => Err(e), + Err(e) => Err(e.context(stream.context())), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c842e94..5fad89d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -6,14 +6,14 @@ // ✔ 1. Instructions // ✔ 1. Instruction mnemonics /ad.../ // ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/ -// ✔ 2. Src operands +// ✔ 2. Operands // ✔ 1. Registers /(r1[0-5]|r[0-9])/ // ✔ 2. Immediate Values /#/ // ✔ 3. Absolute addresses /&/ // ✔ 4. Numbers /[0-9A-Fa-f]+ // ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/ -// ✔ 4. Label definitions /(^.*):/ -// ✔ 5. Comments (may be useful for debugging) +// ✔ 3. Label definitions /(^.*):/ +// ✔ 4. Comments (may be useful for debugging) pub mod context; pub mod token; @@ -22,7 +22,7 @@ use crate::Error; use context::Context; use token::{Token, Type}; -/// Backtracking through bifurcated timelines +/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token) pub trait TokenStream<'text>: Iterator> { /// Gets this stream's [Context] fn context(&self) -> Context; @@ -50,7 +50,7 @@ pub trait TokenStream<'text>: Iterator> { /// Ignores a [Token] of the expected [Type], discarding errors. fn allow(&mut self, expected: Type) { let _ = self.expect(expected); } - /// Runs a functor on each + /// Runs a function on each fn any_of(&mut self, f: fn(&mut Self, Type) -> Result, expected: T) -> Result where T: AsRef<[Type]> { for &expected in expected.as_ref() { diff --git a/src/tokenizer/token.rs b/src/tokenizer/token.rs index 232c4b6..44ec23a 100644 --- a/src/tokenizer/token.rs +++ b/src/tokenizer/token.rs @@ -1,8 +1,7 @@ // © 2023 John Breaux //! Defines the [Token] //! -//! A [Token] represents all valid sequences of characters, -//! sorted by meaning +//! A [Token] is a [semantically tagged](Type) sequence of characters use crate::Error; use regex::Regex; @@ -57,6 +56,7 @@ impl<$t> From<&$t str> for $type { }; } +/// A [Token] is a [semantically tagged](Type) sequence of characters #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Token<'text> { /// The type of this token @@ -96,12 +96,13 @@ impl<'text> Debug for Token<'text> { impl<'text> Display for Token<'text> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.variant { - Type::Endl | Type::EndOfFile => write!(f, "{}", self.variant), - v => write!(f, "\"{}\" ({v})", self.lexeme), + Type::Endl | Type::EndOfFile | Type::Invalid => Display::fmt(&self.variant, f), + v => write!(f, "{v} \"{}\"", self.lexeme), } } } +/// A [token Type](Type) is a semantic tag for a sequence of characters #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Type { /// contiguous whitespace, excluding newline @@ -152,6 +153,8 @@ pub enum Type { Separator, /// End of File marker EndOfFile, + /// Invalid token + Invalid, } regex_impl! {<'text> Token<'text> { @@ -201,10 +204,10 @@ regex_impl! {<'text> Token<'text> { pub fn expect_plus(text: &str) -> Option { regex!(Type::Plus = r"^\+") } - pub fn expect_open_idx(text: &str) -> Option { + pub fn expect_l_paren(text: &str) -> Option { regex!(Type::LParen = r"^\(") } - pub fn expect_close_idx(text: &str) -> Option { + pub fn expect_r_paren(text: &str) -> Option { regex!(Type::RParen = r"^\)") } pub fn expect_indrect(text: &str) -> Option { @@ -228,40 +231,44 @@ regex_impl! {<'text> Token<'text> { pub fn expect_end_of_file(text: &str) -> Option { regex!(Type::EndOfFile = r"^$") } + pub fn expect_anything(text: &str) -> Option { + regex!(Type::Invalid = r"^.*") + } }} impl Display for Type { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Space => write!(f, "space"), - Self::Endl => write!(f, "newline"), - Self::Comment => write!(f, "comment"), - Self::Label => write!(f, "label definition"), - Self::Insn => write!(f, "instruction mnemonic"), - Self::ByteWidth => write!(f, "byte-width marker"), - Self::WordWidth => write!(f, "word-width marker"), - Self::Register => write!(f, "register mnemonic"), - Self::RadixMarkerDec => write!(f, "decimal radix marker"), - Self::RadixMarkerHex => write!(f, "hexadecimal radix marker"), - Self::RadixMarkerOct => write!(f, "octal radix marker"), - Self::RadixMarkerBin => write!(f, "binary radix marker"), - Self::Number => write!(f, "number"), - Self::Minus => write!(f, "minus sign"), - Self::Plus => write!(f, "plus sign"), - Self::LParen => write!(f, "left parenthesis"), - Self::RParen => write!(f, "right parenthesis"), - Self::Indirect => write!(f, "indirect mode marker"), - Self::Absolute => write!(f, "absolute mode marker"), - Self::Immediate => write!(f, "immediate mode marker"), - Self::Identifier => write!(f, "identifier"), - Self::Directive => write!(f, "directive"), - Self::Separator => write!(f, "comma"), - Self::EndOfFile => write!(f, "EOF"), + Self::Space => Display::fmt("space", f), + Self::Endl => Display::fmt("newline", f), + Self::Comment => Display::fmt("comment", f), + Self::Label => Display::fmt("label definition", f), + Self::Insn => Display::fmt("opcode", f), + Self::ByteWidth => Display::fmt("byte-width", f), + Self::WordWidth => Display::fmt("word-width", f), + Self::Register => Display::fmt("register", f), + Self::RadixMarkerDec => Display::fmt("decimal marker", f), + Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f), + Self::RadixMarkerOct => Display::fmt("octal marker", f), + Self::RadixMarkerBin => Display::fmt("binary marker", f), + Self::Number => Display::fmt("number", f), + Self::Minus => Display::fmt("minus sign", f), + Self::Plus => Display::fmt("plus sign", f), + Self::LParen => Display::fmt("left parenthesis", f), + Self::RParen => Display::fmt("right parenthesis", f), + Self::Indirect => Display::fmt("indirect", f), + Self::Absolute => Display::fmt("absolute", f), + Self::Immediate => Display::fmt("immediate", f), + Self::Identifier => Display::fmt("identifier", f), + Self::Directive => Display::fmt("directive", f), + Self::Separator => Display::fmt("comma", f), + Self::EndOfFile => Display::fmt("EOF", f), + Self::Invalid => Display::fmt("invalid token", f), } } } -/// Owned version of a token, which can outlive its parent buffer +/// A [Token] which can outlive its parent buffer #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct OwnedToken { /// The type of this token @@ -285,6 +292,7 @@ impl From> for OwnedToken { } } +/// [Types] are an owned array of [types](Type), with a custom [Display] implementation #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Types(Vec); @@ -296,10 +304,10 @@ impl> From for Types { impl Display for Types { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { for (idx, t) in self.0.iter().enumerate() { - write!(f, "{t}")?; + Display::fmt(t, f)?; match idx { - i if i < self.0.len() - 2 => write!(f, ", ")?, - i if i < self.0.len() - 1 => write!(f, " or ")?, + i if i < self.0.len() - 2 => Display::fmt(", ", f)?, + i if i < self.0.len() - 1 => Display::fmt(" or ", f)?, _ => (), } }