v0.3.0 #1

Merged
j merged 12 commits from v0.3.0 into main 2024-02-01 20:11:02 +00:00
3 changed files with 79 additions and 50 deletions
Showing only changes of commit f6c1914720 - Show all commits

View File

@ -184,8 +184,8 @@ impl<'t> Assemble<'t> for OneEm<'t> {
} }
} }
impl<'t> Assemble<'t> for OneArg<'t> { impl<'t> Assemble<'t> for OneArg<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ] /// `[ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src } = self; let Self { opcode, width, src } = self;
let (src_reg, src_mode, src_ext) = source(src); let (src_reg, src_mode, src_ext) = source(src);
@ -199,8 +199,8 @@ impl<'t> Assemble<'t> for OneArg<'t> {
} }
} }
impl<'t> Assemble<'t> for TwoArg<'t> { impl<'t> Assemble<'t> for TwoArg<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ] /// `[ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src, dst } = self; let Self { opcode, width, src, dst } = self;
let (src_reg, src_mode, src_ext) = source(src); let (src_reg, src_mode, src_ext) = source(src);
@ -224,8 +224,8 @@ impl<'t> Assemble<'t> for TwoArg<'t> {
} }
} }
impl<'t> Assemble<'t> for Jump<'t> { impl<'t> Assemble<'t> for Jump<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// [ 0 0 1 [cond:3] +- [word_offset:10 ] ] /// `[ 0 0 1 [cond:3] +- [word_offset:10 ] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, dst } = self; let Self { opcode, dst } = self;
let word = 1 << 13 let word = 1 << 13

View File

@ -1,58 +1,87 @@
// © 2023 John Breaux // © 2023 John Breaux
//! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption
//! //!
//! This project aims to assemble any valid msp430 instructions, while being lenient about the //! This project aims to assemble any valid msp430 instructions, while including important quality
//! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it //! of life features such as constant expression evaluation.
//! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a
//! useful message to the author.)
//! //!
//! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines, //! ## Tokenization
//! unless syntactically relevant. It will also discard comma-separators between operands of a //! The [`Lexer`](lexer::Lexer) will ignore whitespace, except newlines. It borrows a text buffer,
//! two-operand instruction. //! and outputs [tokens](lexer::token::Token) of various [TokenKinds](lexer::token::TokenKind).
//! //!
//! It returns an AST structured as follows //! ## Preprocessing
//! The [`Preprocessor`](preprocessor::Preprocessor) will filter
//! [newlines](lexer::token::TokenKind::Newline), unless used to terminate a `.define` directive.
//!
//! ## Parsing
//! The [`Parser`](parser::Parser) consumes a [Lexer](lexer::Lexer)
//! and returns an [AST](parser::ast) structured roughly as follows:
//! ```text //! ```text
//! Root //! Statements
//! ├─ Line //! ├─ Stmt
//! │ └─ Empty
//! ├─ Line
//! │ └─ Comment //! │ └─ Comment
//! ├─ Line //! ├─ Stmt
//! │ └─ Directive // Pre- or Post-processor directive //! │ └─ Directive // Pre- or Post-processor directive
//! ├─ Linel //! ├─ Stmt
//! │ └─ Label // Label definition //! │ └─ Label // Label definition
//! ├─ Line //! ├─ Stmt
//! │ └─ Instruction //! │ └─ Insn
//! │ ├─ Opcode //! │ └─ NoEm // A zero-operand "emulated" instruction
//! │ └─ Encoding::Single //! ├─ Stmt
//! │ └─ Insn
//! │ └─ OneEm // A one-operand "emulated" instruction
//! │ ├─ Opcode
//! │ ├─ Width //! │ ├─ Width
//! │ └─ PrimaryOperand //! │ └─ Dst // A destination register has several addressing modes:
//! │ ├─ Identifier // Label, for relative-addressed data/code //! │ └─ Direct // - The contents of a register
//! │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. //! │ ╶─ Indexed // - The register, as a pointer, plus a byte index
//! │ └─ Number // Index, absolute address or immediate value. //! │ ╶─ Absolute // - An immediate absolute address
//! ├─ Line //! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
//! │ └─ Instruction //! ├─ Stmt
//! │ ├─ Opcode //! │ └─ Insn
//! │ └─ Encoding::Double //! │ └─ OneArg // A one-operand instruction
//! │ ├─ Opcode
//! │ ├─ Width //! │ ├─ Width
//! │ ├─ PrimaryOperand //! │ └─ Src // A source register has even more addressing modes:
//! │ ├─ Identifier // Label, for relative-addressed data/code //! │ └─ Direct // - The contents of a register
//! │ │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. //! │ ╶─ Indexed // - The register, as a pointer, plus a byte index
//! │ │ └─ Number // Index, absolute address or immediate value. //! │ ╶─ Indirect // - The word at the address stored in the register
//! │ └─ SecondaryOperand //! │ // (like Indexed, but without an extension word.)
//! │ ├─ Identifier // Label, for relative-addressed data/code //! │ ╶─ PostIncrement // - Indirect, but the register is post-incremented by 1
//! │ ├─ Register // Direct or indexed register //! │ // (or, if it's the PC or SP, by 2)
//! │ └─ Number // Index or absolute address //! │ ╶─ Absolute // - An immediate absolute address
//! ├─ Line //! │ ╶─ Immediate // - An immediate 16-bit number
//! │ └─ Instruction //! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
//! │ ├─ Opcode //! ├─ Stmt
//! │ └─ Encoding::Jump //! │ └─ Insn
//! │ └─ JumpTarget //! │ └─ TwoArg // A two-operand instruction
//! │ ├─ Identifier // Label //! │ ├─ Opcode
//! │ └─ Number // Even, PC-relative offset in range (-1024..=1022) //! │ ├─ Width
//! └─ Line //! │ ├─ Src
//! └─ EndOfFile //! │ └─ Dst
//! └─ Stmt
//! └─ Insn
//! └─ Jump // A relative jump instruction
//! ├─ Opcode // The jump condition
//! └─ JumpDst // A jump instruction's destination can be either:
//! └─ Rel // - An even, signed 11-bit offset
//! ╶─ Label // - A label to jump to
//! ``` //! ```
//!
//! ## Canonicalization
//! After parsing, tokens must be [canonicalized](parser::ast::canonical::Canonicalize):
//! - Expressions which act exclusively on numbers are eagerly evaluated
//! - Expressions which begin with a numeric part are repacked for late evaluation
//! - "Emulated" instructions are desugared into their canonical counterparts
//!
//! ## Assembly
//! The [Assembler](assembler::Assembler) takes an [AST](parser::ast), and
//! 1. Encodes all [Instructions](parser::ast::Instruction) into 16-bit words
//! 2. Records all jump labels, for backpatching
//! 3. Records all expressions, for late evaluation
//! 4. Performs late evaluation and backpatching
//!
//! If a non-canonical instruction is found, the assembler will print a warning,
//! and canonicalize it.
pub mod span; pub mod span;

View File

@ -511,7 +511,7 @@ pub mod error {
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind { pub enum ErrorKind {
LexError, LexError,
/// Returned when [Parsing::ConstExpr] fails without consuming /// Returned when [Parsing::Expr] fails without consuming
NotExpr, NotExpr,
DivZero, DivZero,
NonNumeric(Kind), NonNumeric(Kind),