diff --git a/src/error.rs b/src/error.rs index 80b94c3..a024e8d 100644 --- a/src/error.rs +++ b/src/error.rs @@ -15,6 +15,7 @@ use super::{ pub enum Error { /// Produced by [Parser](crate::parser::Parser::parse()) ParseError(parser::root::Root, Box), + /// Any other error, tagged with [Context]. Created by [`Error::context()`] Contextual(Context, Box), /// Produced by [Token] when the input is entirely unexpected. UnexpectedSymbol(String), diff --git a/src/hash.rs b/src/hash.rs index 0f8a98f..2ca7dce 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! Convenience trait for dealing with hashable data +//! Convenience functions and traits for dealing with hashable data pub type Hash = u64; pub trait FromHash: From { /// Hashes anything that implements [type@Hash] using the [DefaultHasher](std::collections::hash_map::DefaultHasher) diff --git a/src/lexer.rs b/src/lexer.rs index 792bea6..568f272 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! Iterates over &[str], producing [Token]s +//! Iterates over [`&str`](str), producing [`Token`s](Token) // Things we need: // ✔ 1. Lexer/Tokenizer diff --git a/src/lexer/context.rs b/src/lexer/context.rs index 61790b0..9791fcc 100644 --- a/src/lexer/context.rs +++ b/src/lexer/context.rs @@ -1,4 +1,8 @@ -//! Stores contextual information about the current tokenizer state, useful for printing errors +// © 2023 John Breaux +//! A [Context] stores contextual information about the current tokenizer state +//! +//! This data is trivially copyable and can be provided in error messages using the +//! [Error::Contextual] specialization) use super::*; /// Stores contextual information about the current tokenizer state, useful for printing errors #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] diff --git a/src/lexer/ignore.rs b/src/lexer/ignore.rs index 06586b0..f9b4eb7 100644 --- a/src/lexer/ignore.rs +++ b/src/lexer/ignore.rs @@ -1,3 +1,5 @@ +// © 2023 John Breaux +//! Removes a single [kind](Type) of [`Token`] from a [`TokenStream`] use super::*; #[must_use = "iterators are lazy and do nothing unless consumed"] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -11,8 +13,9 @@ where T: TokenStream<'t> impl<'t, T> Ignore<'t, T> where T: TokenStream<'t> { - /// Creates a new + /// Creates a new [Ignore], which ignores the [ignore Type](Type) pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } } + /// Gets a mutable reference to the inner [Iterator] pub fn inner_mut(&mut self) -> &mut T { self.inner } } @@ -21,7 +24,6 @@ impl<'t, T> Iterator for Ignore<'t, T> where T: TokenStream<'t> { type Item = Token<'t>; - fn next(&mut self) -> Option { let next = self.inner.next()?; // Space tokens are greedy, so the next token shouldn't be a Space diff --git a/src/lexer/token_stream.rs b/src/lexer/token_stream.rs index 446dc5d..1039eee 100644 --- a/src/lexer/token_stream.rs +++ b/src/lexer/token_stream.rs @@ -1,3 +1,5 @@ +// © 2023 John Breaux +//! A TokenStream is a specialized [Iterator] which produces [Tokens](Token) use super::*; use super::ignore::Ignore; diff --git a/src/lib.rs b/src/lib.rs index 51d3056..3784dce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,23 +1,73 @@ // © 2023 John Breaux //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption +//! +//! This project aims to assemble any valid msp430 instructions, while being lenient about the +//! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it +//! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a +//! useful message to the author.) +//! +//! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines, +//! unless syntactically relevant. It will also discard comma-separators between operands of a +//! two-operand instruction. +//! +//! It returns an AST structured as follows +//! ```text +//! Root +//! ├─ Line +//! │ └─ Empty +//! ├─ Line +//! │ └─ Comment +//! ├─ Line +//! │ └─ Directive // Pre- or Post-processor directive +//! ├─ Linel +//! │ └─ Label // Label definition +//! ├─ Line +//! │ └─ Instruction +//! │ ├─ Opcode +//! │ └─ Encoding::Single +//! │ ├─ Width +//! │ └─ PrimaryOperand +//! │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. +//! │ └─ Number // Index, absolute address or immediate value. +//! ├─ Line +//! │ └─ Instruction +//! │ ├─ Opcode +//! │ └─ Encoding::Double +//! │ ├─ Width +//! │ ├─ PrimaryOperand +//! │ │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. +//! │ │ └─ Number // Index, absolute address or immediate value. +//! │ └─ SecondaryOperand +//! │ ├─ Register // Direct or indexed register +//! │ └─ Number // Index or absolute address +//! ├─ Line +//! │ └─ Instruction +//! │ ├─ Opcode +//! │ └─ Encoding::Jump +//! │ └─ JumpTarget +//! │ └─ Number // Even, PC-relative offset in range (-1024..=1022) +//! └─ Line +//! └─ EndOfFile +//! ``` + pub mod preamble { //! Common imports for msp430-asm use super::*; pub use error::Error; pub use hash::{FromHash, Hash}; - pub use linker::{Linker, Visitor}; - pub use parser::Parser; pub use lexer::{ context::Context, token::{Token, Type}, token_stream::TokenStream, Tokenizer, }; + pub use linker::{Linker, Visitor}; + pub use parser::Parser; } use preamble::*; pub mod error; pub mod hash; +pub mod lexer; pub mod linker; pub mod parser; -pub mod lexer; diff --git a/src/parser.rs b/src/parser.rs index 0c5a576..4fc8e14 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! Parses [Tokens](crate::Token) into an [abstract syntax tree](Root) +//! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root) use crate::{Error, Hash, TokenStream, Type}; use std::fmt::{Debug, Display, LowerHex}; @@ -37,13 +37,22 @@ pub mod label; pub mod line { // © 2023 John Breaux + //! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same document line will be treated as if they took up multiple [`Line`s](Line). + //! + //! A line contains one of: + //! - [`Label`] + //! - [`Instruction`] + //! - [`Directive`] + //! - [`Comment`] + //! - [Nothing](Line::Empty) use super::*; - /// A line is one of: + /// A line contains any one of: /// - [`Label`] (definition) /// - [`Instruction`] /// - [`Directive`] /// - [`Comment`] + /// - Nothing at all #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Line { Empty, diff --git a/src/parser/comment.rs b/src/parser/comment.rs index 49c8de1..edefaa8 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! A [Comment] stores the contents of a line comment, including the preceding `;` or `//` +//! A [`Comment`] stores the contents of a line comment, including the preceding `;` or `//` use super::*; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Comment(pub String); diff --git a/src/parser/directive.rs b/src/parser/directive.rs index fd1dc73..9f4d169 100644 --- a/src/parser/directive.rs +++ b/src/parser/directive.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! A [Directive] issues commands directly to the [Tokenizer](crate::Tokenizer) and +//! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and //! [Linker](crate::Linker) use super::*; use crate::hash::FromHash; diff --git a/src/parser/identifier.rs b/src/parser/identifier.rs index c397fa6..dd88815 100644 --- a/src/parser/identifier.rs +++ b/src/parser/identifier.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! An [Identifier] stores the hash of a named identifier +//! An [Identifier] stores the name of an identifier use super::*; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Identifier { diff --git a/src/parser/instruction.rs b/src/parser/instruction.rs index 999340c..a61828b 100644 --- a/src/parser/instruction.rs +++ b/src/parser/instruction.rs @@ -1,13 +1,13 @@ // © 2023 John Breaux -//! An [Instruction] contains the [Opcode] and [Encoding] information for a single msp430 +//! An [`Instruction`] contains the [`Opcode`] and [`Encoding`] information for a single msp430 //! instruction //! //! -//! Note: [Opcode] and [Encoding] are very tightly coupled, because they represent interdependent -//! parts of the same instruction. This is why [Opcode]::resolve() returns an [EncodingParser] -- -//! otherwise, there's an explosion of states that I can't really cope with on my own. Really, -//! there's about 9 valid classes of instruction, some of which are only used for one or two of the -//! MSP430's instructions. +//! Note: [`Opcode`] and [`Encoding`] are very tightly coupled, because they represent +//! interdependent parts of the same instruction. This is why [`Opcode`]::resolve() returns an +//! [`EncodingParser`] -- otherwise, there's an explosion of states that I can't really cope with on +//! my own. Really, there's about 9 valid classes of instruction, some of which are only used for +//! one or two of the MSP430's instructions. use super::*; diff --git a/src/parser/instruction/encoding.rs b/src/parser/instruction/encoding.rs index c3683c3..1b65f95 100644 --- a/src/parser/instruction/encoding.rs +++ b/src/parser/instruction/encoding.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! An [Encoding] represents the set of arguments for the [msp430's instructions](Opcode) +//! An [`Encoding`] represents the set of arguments for a given [msp430 opcode](Opcode) use super::*; pub mod number; diff --git a/src/parser/instruction/encoding/builder.rs b/src/parser/instruction/encoding/builder.rs index c70a843..4c62910 100644 --- a/src/parser/instruction/encoding/builder.rs +++ b/src/parser/instruction/encoding/builder.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! Builder API for [EncodingParser] +//! Builder API for [`EncodingParser`] use super::*; #[derive(Debug, Default)] pub struct SingleBuilder { diff --git a/src/parser/instruction/encoding/encoding_parser.rs b/src/parser/instruction/encoding/encoding_parser.rs index 118938c..71b57e1 100644 --- a/src/parser/instruction/encoding/encoding_parser.rs +++ b/src/parser/instruction/encoding/encoding_parser.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! An [EncodingParser] builds an [Encoding] from a [TokenStream] +//! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`] use super::*; #[derive(Debug)] diff --git a/src/parser/instruction/encoding/jump_target.rs b/src/parser/instruction/encoding/jump_target.rs index 669caf5..dcb729d 100644 --- a/src/parser/instruction/encoding/jump_target.rs +++ b/src/parser/instruction/encoding/jump_target.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! A [JumpTarget] contains the [pc-relative offset](Number) or [label](Identifier) +//! A [`JumpTarget`] contains the [pc-relative offset](Number) or [label](Identifier) //! for a [Jump](Encoding::Jump) [instruction] use super::*; diff --git a/src/parser/instruction/encoding/number.rs b/src/parser/instruction/encoding/number.rs index 3f3671f..ed4c3f3 100644 --- a/src/parser/instruction/encoding/number.rs +++ b/src/parser/instruction/encoding/number.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! A [Number] represents a 16-bit signed or unsigned word +//! A [`Number`] represents a 16-bit signed or unsigned word use super::*; // TODO: Allow identifiers/expressions in place of numbers diff --git a/src/parser/instruction/encoding/primary_operand.rs b/src/parser/instruction/encoding/primary_operand.rs index 8e72984..02009a0 100644 --- a/src/parser/instruction/encoding/primary_operand.rs +++ b/src/parser/instruction/encoding/primary_operand.rs @@ -1,6 +1,6 @@ // © 2023 John Breaux -//! A [PrimaryOperand] contains the first [Register], addressing mode, and Extension -//! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction] +//! A [`PrimaryOperand`] contains the first [`Register`], addressing mode, and Extension +//! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [`Instruction`] use super::*; /// Contains the first [Register], addressing mode, and Extension Word for a diff --git a/src/parser/instruction/encoding/register.rs b/src/parser/instruction/encoding/register.rs index 387f504..e2e1715 100644 --- a/src/parser/instruction/encoding/register.rs +++ b/src/parser/instruction/encoding/register.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html) +//! A [`Register`] represents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html) use super::*; use std::str::FromStr; diff --git a/src/parser/instruction/encoding/secondary_operand.rs b/src/parser/instruction/encoding/secondary_operand.rs index b393983..f0aab77 100644 --- a/src/parser/instruction/encoding/secondary_operand.rs +++ b/src/parser/instruction/encoding/secondary_operand.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! A [SecondaryOperand] contains the second [Register], addressing mode, and Extension +//! A [`SecondaryOperand`] contains the second [`Register`], addressing mode, and Extension //! Word for a [two-operand](Encoding::Double) [instruction] use super::*; diff --git a/src/parser/instruction/encoding/width.rs b/src/parser/instruction/encoding/width.rs index 7e3c155..0fd5974 100644 --- a/src/parser/instruction/encoding/width.rs +++ b/src/parser/instruction/encoding/width.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! A [Width] represents whether an instruction operates on whole words or bytes +//! A [`Width`] represents whether an instruction operates on whole words or bytes use super::*; /// Represents an instruction's operand width. diff --git a/src/parser/instruction/opcode.rs b/src/parser/instruction/opcode.rs index bd38c5f..4790db7 100644 --- a/src/parser/instruction/opcode.rs +++ b/src/parser/instruction/opcode.rs @@ -1,5 +1,5 @@ // © 2023 John Breaux -//! An [Opcode] encodes an msp430 operation +//! An [`Opcode`] encodes an msp430 operation use super::*; use std::str::FromStr; diff --git a/src/parser/label.rs b/src/parser/label.rs index f213ed8..f7e50e4 100644 --- a/src/parser/label.rs +++ b/src/parser/label.rs @@ -1,6 +1,8 @@ // © 2023 John Breaux +//! The definition of a label use super::*; +/// The definition of a label #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Label(pub Identifier); diff --git a/src/parser/parsable.rs b/src/parser/parsable.rs index 4d255d1..9aaa434 100644 --- a/src/parser/parsable.rs +++ b/src/parser/parsable.rs @@ -1,4 +1,5 @@ // © 2023 John Breaux +//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html) use super::*; /// Parses tokens from [stream](TokenStream) into Self node pub trait Parsable {