2024-02-01 20:11:02 +00:00
3 changed files with 79 additions and 50 deletions
--- a/src/assembler.rs
+++ b/src/assembler.rs
@@ -184,8 +184,8 @@ impl<'t> Assemble<'t> for OneEm<'t> {
    }
 }
 impl<'t> Assemble<'t> for OneArg<'t> {
-    /// [ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]
-    /// [  0  0  0  1  0  0 [op:3  ] bw [Ad ] [dst_reg:4] ]
+    /// `[ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]`
+    /// `[  0  0  0  1  0  0 [op:3  ] bw [Ad ] [dst_reg:4] ]`
    fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
        let Self { opcode, width, src } = self;
        let (src_reg, src_mode, src_ext) = source(src);
@@ -199,8 +199,8 @@ impl<'t> Assemble<'t> for OneArg<'t> {
    }
 }
 impl<'t> Assemble<'t> for TwoArg<'t> {
-    /// [ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]
-    /// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]
+    /// `[ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]`
+    /// `[ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]`
    fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
        let Self { opcode, width, src, dst } = self;
        let (src_reg, src_mode, src_ext) = source(src);
@@ -224,8 +224,8 @@ impl<'t> Assemble<'t> for TwoArg<'t> {
    }
 }
 impl<'t> Assemble<'t> for Jump<'t> {
-    /// [ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]
-    /// [  0  0  1 [cond:3] +- [word_offset:10          ] ]
+    /// `[ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]`
+    /// `[  0  0  1 [cond:3] +- [word_offset:10          ] ]`
    fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
        let Self { opcode, dst } = self;
        let word = 1 << 13
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,58 +1,87 @@
 // © 2023 John Breaux
 //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption
 //!
-//! This project aims to assemble any valid msp430 instructions, while being lenient about the
-//! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it
-//! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a
-//! useful message to the author.)
+//! This project aims to assemble any valid msp430 instructions, while including important quality
+//! of life features such as constant expression evaluation.
 //!
-//! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines,
-//! unless syntactically relevant. It will also discard comma-separators between operands of a
-//! two-operand instruction.
+//! ## Tokenization
+//! The [`Lexer`](lexer::Lexer) will ignore whitespace, except newlines. It borrows a text buffer,
+//! and outputs [tokens](lexer::token::Token) of various [TokenKinds](lexer::token::TokenKind).
 //!
-//! It returns an AST structured as follows
+//! ## Preprocessing
+//! The [`Preprocessor`](preprocessor::Preprocessor) will filter
+//! [newlines](lexer::token::TokenKind::Newline), unless used to terminate a `.define` directive.
+//!
+//! ## Parsing
+//! The [`Parser`](parser::Parser) consumes a [Lexer](lexer::Lexer)
+//! and returns an [AST](parser::ast) structured roughly as follows:
 //! ```text
-//! Root
-//! ├─ Line
-//! │  └─ Empty
-//! ├─ Line
+//! Statements
+//! ├─ Stmt
 //! │  └─ Comment
-//! ├─ Line
+//! ├─ Stmt
 //! │  └─ Directive                 // Pre- or Post-processor directive
-//! ├─ Linel
+//! ├─ Stmt
 //! │  └─ Label                     // Label definition
-//! ├─ Line
-//! │  └─ Instruction
+//! ├─ Stmt
+//! │  └─ Insn
+//! │     └─ NoEm                   // A zero-operand "emulated" instruction
+//! ├─ Stmt
+//! │  └─ Insn
+//! │     └─ OneEm                  // A one-operand "emulated" instruction
 //! │        ├─ Opcode
-//! │     └─ Encoding::Single
 //! │        ├─ Width
-//! │        └─ PrimaryOperand
-//! │           ├─ Identifier       // Label, for relative-addressed data/code
-//! │           ├─ Register         // Direct, indexed, indirect or indirect-post-increment register.
-//! │           └─ Number           // Index, absolute address or immediate value.
-//! ├─ Line
-//! │  └─ Instruction
+//! │        └─ Dst                 // A destination register has several addressing modes:
+//! │           └─ Direct           // - The contents of a register
+//! │           ╶─ Indexed          // - The register, as a pointer, plus a byte index
+//! │           ╶─ Absolute         // - An immediate absolute address
+//! │           ╶─ Special          // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
+//! ├─ Stmt
+//! │  └─ Insn
+//! │     └─ OneArg                 // A one-operand instruction
 //! │        ├─ Opcode
-//! │     └─ Encoding::Double
 //! │        ├─ Width
-//! │        ├─ PrimaryOperand
-//! │           ├─ Identifier       // Label, for relative-addressed data/code
-//! │        │  ├─ Register         // Direct, indexed, indirect or indirect-post-increment register.
-//! │        │  └─ Number           // Index, absolute address or immediate value.
-//! │        └─ SecondaryOperand
-//! │           ├─ Identifier       // Label, for relative-addressed data/code
-//! │           ├─ Register         // Direct or indexed register
-//! │           └─ Number           // Index or absolute address
-//! ├─ Line
-//! │  └─ Instruction
+//! │        └─ Src                 // A source register has even more addressing modes:
+//! │           └─ Direct           // - The contents of a register
+//! │           ╶─ Indexed          // - The register, as a pointer, plus a byte index
+//! │           ╶─ Indirect         // - The word at the address stored in the register
+//! │                               //   (like Indexed, but without an extension word.)
+//! │           ╶─ PostIncrement    // - Indirect, but the register is post-incremented by 1
+//! │                               //   (or, if it's the PC or SP, by 2)
+//! │           ╶─ Absolute         // - An immediate absolute address
+//! │           ╶─ Immediate        // - An immediate 16-bit number
+//! │           ╶─ Special          // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
+//! ├─ Stmt
+//! │  └─ Insn
+//! │     └─ TwoArg                 // A two-operand instruction
 //! │        ├─ Opcode
-//! │     └─ Encoding::Jump
-//! │        └─ JumpTarget
-//! │           ├─ Identifier       // Label
-//! │           └─ Number           // Even, PC-relative offset in range (-1024..=1022)
-//! └─ Line
-//!    └─ EndOfFile
+//! │        ├─ Width
+//! │        ├─ Src
+//! │        └─ Dst
+//! └─ Stmt
+//!    └─ Insn
+//!       └─ Jump                   // A relative jump instruction
+//!          ├─ Opcode              // The jump condition
+//!          └─ JumpDst             // A jump instruction's destination can be either:
+//!             └─ Rel              // - An even, signed 11-bit offset
+//!             ╶─ Label            // - A label to jump to
 //! ```
+//!
+//! ## Canonicalization
+//! After parsing, tokens must be [canonicalized](parser::ast::canonical::Canonicalize):
+//! - Expressions which act exclusively on numbers are eagerly evaluated
+//!   - Expressions which begin with a numeric part are repacked for late evaluation
+//! - "Emulated" instructions are desugared into their canonical counterparts
+//!
+//! ## Assembly
+//! The [Assembler](assembler::Assembler) takes an [AST](parser::ast), and
+//! 1. Encodes all [Instructions](parser::ast::Instruction) into 16-bit words
+//! 2. Records all jump labels, for backpatching
+//! 3. Records all expressions, for late evaluation
+//! 4. Performs late evaluation and backpatching
+//!
+//! If a non-canonical instruction is found, the assembler will print a warning,
+//! and canonicalize it.

 pub mod span;

--- a/src/parser.rs
+++ b/src/parser.rs
@@ -511,7 +511,7 @@ pub mod error {
    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
    pub enum ErrorKind {
        LexError,
-        /// Returned when [Parsing::ConstExpr] fails without consuming
+        /// Returned when [Parsing::Expr] fails without consuming
        NotExpr,
        DivZero,
        NonNumeric(Kind),