v0.3.0 #1

Merged
j merged 12 commits from v0.3.0 into main 2024-02-01 20:11:02 +00:00
3 changed files with 79 additions and 50 deletions
Showing only changes of commit f6c1914720 - Show all commits

View File

@ -184,8 +184,8 @@ impl<'t> Assemble<'t> for OneEm<'t> {
}
}
impl<'t> Assemble<'t> for OneArg<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// `[ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src } = self;
let (src_reg, src_mode, src_ext) = source(src);
@ -199,8 +199,8 @@ impl<'t> Assemble<'t> for OneArg<'t> {
}
}
impl<'t> Assemble<'t> for TwoArg<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// `[ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src, dst } = self;
let (src_reg, src_mode, src_ext) = source(src);
@ -224,8 +224,8 @@ impl<'t> Assemble<'t> for TwoArg<'t> {
}
}
impl<'t> Assemble<'t> for Jump<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ 0 0 1 [cond:3] +- [word_offset:10 ] ]
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// `[ 0 0 1 [cond:3] +- [word_offset:10 ] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, dst } = self;
let word = 1 << 13

View File

@ -1,58 +1,87 @@
// © 2023 John Breaux
//! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption
//!
//! This project aims to assemble any valid msp430 instructions, while being lenient about the
//! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it
//! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a
//! useful message to the author.)
//! This project aims to assemble any valid msp430 instructions, while including important quality
//! of life features such as constant expression evaluation.
//!
//! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines,
//! unless syntactically relevant. It will also discard comma-separators between operands of a
//! two-operand instruction.
//! ## Tokenization
//! The [`Lexer`](lexer::Lexer) will ignore whitespace, except newlines. It borrows a text buffer,
//! and outputs [tokens](lexer::token::Token) of various [TokenKinds](lexer::token::TokenKind).
//!
//! It returns an AST structured as follows
//! ## Preprocessing
//! The [`Preprocessor`](preprocessor::Preprocessor) will filter
//! [newlines](lexer::token::TokenKind::Newline), unless used to terminate a `.define` directive.
//!
//! ## Parsing
//! The [`Parser`](parser::Parser) consumes a [Lexer](lexer::Lexer)
//! and returns an [AST](parser::ast) structured roughly as follows:
//! ```text
//! Root
//! ├─ Line
//! │ └─ Empty
//! ├─ Line
//! Statements
//! ├─ Stmt
//! │ └─ Comment
//! ├─ Line
//! ├─ Stmt
//! │ └─ Directive // Pre- or Post-processor directive
//! ├─ Linel
//! ├─ Stmt
//! │ └─ Label // Label definition
//! ├─ Line
//! │ └─ Instruction
//! ├─ Stmt
//! │ └─ Insn
//! │ └─ NoEm // A zero-operand "emulated" instruction
//! ├─ Stmt
//! │ └─ Insn
//! │ └─ OneEm // A one-operand "emulated" instruction
//! │ ├─ Opcode
//! │ └─ Encoding::Single
//! │ ├─ Width
//! │ └─ PrimaryOperand
//! │ ├─ Identifier // Label, for relative-addressed data/code
//! │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register.
//! │ └─ Number // Index, absolute address or immediate value.
//! ├─ Line
//! │ └─ Instruction
//! │ └─ Dst // A destination register has several addressing modes:
//! │ └─ Direct // - The contents of a register
//! │ ╶─ Indexed // - The register, as a pointer, plus a byte index
//! │ ╶─ Absolute // - An immediate absolute address
//! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
//! ├─ Stmt
//! │ └─ Insn
//! │ └─ OneArg // A one-operand instruction
//! │ ├─ Opcode
//! │ └─ Encoding::Double
//! │ ├─ Width
//! │ ├─ PrimaryOperand
//! │ ├─ Identifier // Label, for relative-addressed data/code
//! │ │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register.
//! │ │ └─ Number // Index, absolute address or immediate value.
//! │ └─ SecondaryOperand
//! │ ├─ Identifier // Label, for relative-addressed data/code
//! │ ├─ Register // Direct or indexed register
//! │ └─ Number // Index or absolute address
//! ├─ Line
//! │ └─ Instruction
//! │ └─ Src // A source register has even more addressing modes:
//! │ └─ Direct // - The contents of a register
//! │ ╶─ Indexed // - The register, as a pointer, plus a byte index
//! │ ╶─ Indirect // - The word at the address stored in the register
//! │ // (like Indexed, but without an extension word.)
//! │ ╶─ PostIncrement // - Indirect, but the register is post-incremented by 1
//! │ // (or, if it's the PC or SP, by 2)
//! │ ╶─ Absolute // - An immediate absolute address
//! │ ╶─ Immediate // - An immediate 16-bit number
//! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
//! ├─ Stmt
//! │ └─ Insn
//! │ └─ TwoArg // A two-operand instruction
//! │ ├─ Opcode
//! │ └─ Encoding::Jump
//! │ └─ JumpTarget
//! │ ├─ Identifier // Label
//! │ └─ Number // Even, PC-relative offset in range (-1024..=1022)
//! └─ Line
//! └─ EndOfFile
//! │ ├─ Width
//! │ ├─ Src
//! │ └─ Dst
//! └─ Stmt
//! └─ Insn
//! └─ Jump // A relative jump instruction
//! ├─ Opcode // The jump condition
//! └─ JumpDst // A jump instruction's destination can be either:
//! └─ Rel // - An even, signed 11-bit offset
//! ╶─ Label // - A label to jump to
//! ```
//!
//! ## Canonicalization
//! After parsing, tokens must be [canonicalized](parser::ast::canonical::Canonicalize):
//! - Expressions which act exclusively on numbers are eagerly evaluated
//! - Expressions which begin with a numeric part are repacked for late evaluation
//! - "Emulated" instructions are desugared into their canonical counterparts
//!
//! ## Assembly
//! The [Assembler](assembler::Assembler) takes an [AST](parser::ast), and
//! 1. Encodes all [Instructions](parser::ast::Instruction) into 16-bit words
//! 2. Records all jump labels, for backpatching
//! 3. Records all expressions, for late evaluation
//! 4. Performs late evaluation and backpatching
//!
//! If a non-canonical instruction is found, the assembler will print a warning,
//! and canonicalize it.
pub mod span;

View File

@ -511,7 +511,7 @@ pub mod error {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
LexError,
/// Returned when [Parsing::ConstExpr] fails without consuming
/// Returned when [Parsing::Expr] fails without consuming
NotExpr,
DivZero,
NonNumeric(Kind),