diff --git a/src/assembler.rs b/src/assembler.rs index 28e3170..a6e342d 100644 --- a/src/assembler.rs +++ b/src/assembler.rs @@ -184,8 +184,8 @@ impl<'t> Assemble<'t> for OneEm<'t> { } } impl<'t> Assemble<'t> for OneArg<'t> { - /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] - /// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ] + /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]` + /// `[ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]` fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { let Self { opcode, width, src } = self; let (src_reg, src_mode, src_ext) = source(src); @@ -199,8 +199,8 @@ impl<'t> Assemble<'t> for OneArg<'t> { } } impl<'t> Assemble<'t> for TwoArg<'t> { - /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] - /// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ] + /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]` + /// `[ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]` fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { let Self { opcode, width, src, dst } = self; let (src_reg, src_mode, src_ext) = source(src); @@ -224,8 +224,8 @@ impl<'t> Assemble<'t> for TwoArg<'t> { } } impl<'t> Assemble<'t> for Jump<'t> { - /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] - /// [ 0 0 1 [cond:3] +- [word_offset:10 ] ] + /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]` + /// `[ 0 0 1 [cond:3] +- [word_offset:10 ] ]` fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { let Self { opcode, dst } = self; let word = 1 << 13 diff --git a/src/lib.rs b/src/lib.rs index 615d18f..88338f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,58 +1,87 @@ // © 2023 John Breaux //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption //! -//! This project aims to assemble any valid msp430 instructions, while being lenient about the -//! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it -//! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a -//! useful message to the author.) +//! This project aims to assemble any valid msp430 instructions, while including important quality +//! of life features such as constant expression evaluation. //! -//! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines, -//! unless syntactically relevant. It will also discard comma-separators between operands of a -//! two-operand instruction. +//! ## Tokenization +//! The [`Lexer`](lexer::Lexer) will ignore whitespace, except newlines. It borrows a text buffer, +//! and outputs [tokens](lexer::token::Token) of various [TokenKinds](lexer::token::TokenKind). //! -//! It returns an AST structured as follows +//! ## Preprocessing +//! The [`Preprocessor`](preprocessor::Preprocessor) will filter +//! [newlines](lexer::token::TokenKind::Newline), unless used to terminate a `.define` directive. +//! +//! ## Parsing +//! The [`Parser`](parser::Parser) consumes a [Lexer](lexer::Lexer) +//! and returns an [AST](parser::ast) structured roughly as follows: //! ```text -//! Root -//! ├─ Line -//! │ └─ Empty -//! ├─ Line +//! Statements +//! ├─ Stmt //! │ └─ Comment -//! ├─ Line +//! ├─ Stmt //! │ └─ Directive // Pre- or Post-processor directive -//! ├─ Linel +//! ├─ Stmt //! │ └─ Label // Label definition -//! ├─ Line -//! │ └─ Instruction -//! │ ├─ Opcode -//! │ └─ Encoding::Single +//! ├─ Stmt +//! │ └─ Insn +//! │ └─ NoEm // A zero-operand "emulated" instruction +//! ├─ Stmt +//! │ └─ Insn +//! │ └─ OneEm // A one-operand "emulated" instruction +//! │ ├─ Opcode //! │ ├─ Width -//! │ └─ PrimaryOperand -//! │ ├─ Identifier // Label, for relative-addressed data/code -//! │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. -//! │ └─ Number // Index, absolute address or immediate value. -//! ├─ Line -//! │ └─ Instruction -//! │ ├─ Opcode -//! │ └─ Encoding::Double +//! │ └─ Dst // A destination register has several addressing modes: +//! │ └─ Direct // - The contents of a register +//! │ ╶─ Indexed // - The register, as a pointer, plus a byte index +//! │ ╶─ Absolute // - An immediate absolute address +//! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings. +//! ├─ Stmt +//! │ └─ Insn +//! │ └─ OneArg // A one-operand instruction +//! │ ├─ Opcode //! │ ├─ Width -//! │ ├─ PrimaryOperand -//! │ ├─ Identifier // Label, for relative-addressed data/code -//! │ │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. -//! │ │ └─ Number // Index, absolute address or immediate value. -//! │ └─ SecondaryOperand -//! │ ├─ Identifier // Label, for relative-addressed data/code -//! │ ├─ Register // Direct or indexed register -//! │ └─ Number // Index or absolute address -//! ├─ Line -//! │ └─ Instruction -//! │ ├─ Opcode -//! │ └─ Encoding::Jump -//! │ └─ JumpTarget -//! │ ├─ Identifier // Label -//! │ └─ Number // Even, PC-relative offset in range (-1024..=1022) -//! └─ Line -//! └─ EndOfFile +//! │ └─ Src // A source register has even more addressing modes: +//! │ └─ Direct // - The contents of a register +//! │ ╶─ Indexed // - The register, as a pointer, plus a byte index +//! │ ╶─ Indirect // - The word at the address stored in the register +//! │ // (like Indexed, but without an extension word.) +//! │ ╶─ PostIncrement // - Indirect, but the register is post-incremented by 1 +//! │ // (or, if it's the PC or SP, by 2) +//! │ ╶─ Absolute // - An immediate absolute address +//! │ ╶─ Immediate // - An immediate 16-bit number +//! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings. +//! ├─ Stmt +//! │ └─ Insn +//! │ └─ TwoArg // A two-operand instruction +//! │ ├─ Opcode +//! │ ├─ Width +//! │ ├─ Src +//! │ └─ Dst +//! └─ Stmt +//! └─ Insn +//! └─ Jump // A relative jump instruction +//! ├─ Opcode // The jump condition +//! └─ JumpDst // A jump instruction's destination can be either: +//! └─ Rel // - An even, signed 11-bit offset +//! ╶─ Label // - A label to jump to //! ``` +//! +//! ## Canonicalization +//! After parsing, tokens must be [canonicalized](parser::ast::canonical::Canonicalize): +//! - Expressions which act exclusively on numbers are eagerly evaluated +//! - Expressions which begin with a numeric part are repacked for late evaluation +//! - "Emulated" instructions are desugared into their canonical counterparts +//! +//! ## Assembly +//! The [Assembler](assembler::Assembler) takes an [AST](parser::ast), and +//! 1. Encodes all [Instructions](parser::ast::Instruction) into 16-bit words +//! 2. Records all jump labels, for backpatching +//! 3. Records all expressions, for late evaluation +//! 4. Performs late evaluation and backpatching +//! +//! If a non-canonical instruction is found, the assembler will print a warning, +//! and canonicalize it. pub mod span; diff --git a/src/parser.rs b/src/parser.rs index 451487e..32a0744 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -511,7 +511,7 @@ pub mod error { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ErrorKind { LexError, - /// Returned when [Parsing::ConstExpr] fails without consuming + /// Returned when [Parsing::Expr] fails without consuming NotExpr, DivZero, NonNumeric(Kind),