v0.3.0 #1
@ -184,8 +184,8 @@ impl<'t> Assemble<'t> for OneEm<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<'t> Assemble<'t> for OneArg<'t> {
|
impl<'t> Assemble<'t> for OneArg<'t> {
|
||||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
|
||||||
/// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]
|
/// `[ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]`
|
||||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||||
let Self { opcode, width, src } = self;
|
let Self { opcode, width, src } = self;
|
||||||
let (src_reg, src_mode, src_ext) = source(src);
|
let (src_reg, src_mode, src_ext) = source(src);
|
||||||
@ -199,8 +199,8 @@ impl<'t> Assemble<'t> for OneArg<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<'t> Assemble<'t> for TwoArg<'t> {
|
impl<'t> Assemble<'t> for TwoArg<'t> {
|
||||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
|
||||||
/// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]
|
/// `[ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]`
|
||||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||||
let Self { opcode, width, src, dst } = self;
|
let Self { opcode, width, src, dst } = self;
|
||||||
let (src_reg, src_mode, src_ext) = source(src);
|
let (src_reg, src_mode, src_ext) = source(src);
|
||||||
@ -224,8 +224,8 @@ impl<'t> Assemble<'t> for TwoArg<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<'t> Assemble<'t> for Jump<'t> {
|
impl<'t> Assemble<'t> for Jump<'t> {
|
||||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
|
||||||
/// [ 0 0 1 [cond:3] +- [word_offset:10 ] ]
|
/// `[ 0 0 1 [cond:3] +- [word_offset:10 ] ]`
|
||||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||||
let Self { opcode, dst } = self;
|
let Self { opcode, dst } = self;
|
||||||
let word = 1 << 13
|
let word = 1 << 13
|
||||||
|
115
src/lib.rs
115
src/lib.rs
@ -1,58 +1,87 @@
|
|||||||
// © 2023 John Breaux
|
// © 2023 John Breaux
|
||||||
//! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption
|
//! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption
|
||||||
//!
|
//!
|
||||||
//! This project aims to assemble any valid msp430 instructions, while being lenient about the
|
//! This project aims to assemble any valid msp430 instructions, while including important quality
|
||||||
//! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it
|
//! of life features such as constant expression evaluation.
|
||||||
//! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a
|
|
||||||
//! useful message to the author.)
|
|
||||||
//!
|
//!
|
||||||
//! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines,
|
//! ## Tokenization
|
||||||
//! unless syntactically relevant. It will also discard comma-separators between operands of a
|
//! The [`Lexer`](lexer::Lexer) will ignore whitespace, except newlines. It borrows a text buffer,
|
||||||
//! two-operand instruction.
|
//! and outputs [tokens](lexer::token::Token) of various [TokenKinds](lexer::token::TokenKind).
|
||||||
//!
|
//!
|
||||||
//! It returns an AST structured as follows
|
//! ## Preprocessing
|
||||||
|
//! The [`Preprocessor`](preprocessor::Preprocessor) will filter
|
||||||
|
//! [newlines](lexer::token::TokenKind::Newline), unless used to terminate a `.define` directive.
|
||||||
|
//!
|
||||||
|
//! ## Parsing
|
||||||
|
//! The [`Parser`](parser::Parser) consumes a [Lexer](lexer::Lexer)
|
||||||
|
//! and returns an [AST](parser::ast) structured roughly as follows:
|
||||||
//! ```text
|
//! ```text
|
||||||
//! Root
|
//! Statements
|
||||||
//! ├─ Line
|
//! ├─ Stmt
|
||||||
//! │ └─ Empty
|
|
||||||
//! ├─ Line
|
|
||||||
//! │ └─ Comment
|
//! │ └─ Comment
|
||||||
//! ├─ Line
|
//! ├─ Stmt
|
||||||
//! │ └─ Directive // Pre- or Post-processor directive
|
//! │ └─ Directive // Pre- or Post-processor directive
|
||||||
//! ├─ Linel
|
//! ├─ Stmt
|
||||||
//! │ └─ Label // Label definition
|
//! │ └─ Label // Label definition
|
||||||
//! ├─ Line
|
//! ├─ Stmt
|
||||||
//! │ └─ Instruction
|
//! │ └─ Insn
|
||||||
//! │ ├─ Opcode
|
//! │ └─ NoEm // A zero-operand "emulated" instruction
|
||||||
//! │ └─ Encoding::Single
|
//! ├─ Stmt
|
||||||
|
//! │ └─ Insn
|
||||||
|
//! │ └─ OneEm // A one-operand "emulated" instruction
|
||||||
|
//! │ ├─ Opcode
|
||||||
//! │ ├─ Width
|
//! │ ├─ Width
|
||||||
//! │ └─ PrimaryOperand
|
//! │ └─ Dst // A destination register has several addressing modes:
|
||||||
//! │ ├─ Identifier // Label, for relative-addressed data/code
|
//! │ └─ Direct // - The contents of a register
|
||||||
//! │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register.
|
//! │ ╶─ Indexed // - The register, as a pointer, plus a byte index
|
||||||
//! │ └─ Number // Index, absolute address or immediate value.
|
//! │ ╶─ Absolute // - An immediate absolute address
|
||||||
//! ├─ Line
|
//! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
|
||||||
//! │ └─ Instruction
|
//! ├─ Stmt
|
||||||
//! │ ├─ Opcode
|
//! │ └─ Insn
|
||||||
//! │ └─ Encoding::Double
|
//! │ └─ OneArg // A one-operand instruction
|
||||||
|
//! │ ├─ Opcode
|
||||||
//! │ ├─ Width
|
//! │ ├─ Width
|
||||||
//! │ ├─ PrimaryOperand
|
//! │ └─ Src // A source register has even more addressing modes:
|
||||||
//! │ ├─ Identifier // Label, for relative-addressed data/code
|
//! │ └─ Direct // - The contents of a register
|
||||||
//! │ │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register.
|
//! │ ╶─ Indexed // - The register, as a pointer, plus a byte index
|
||||||
//! │ │ └─ Number // Index, absolute address or immediate value.
|
//! │ ╶─ Indirect // - The word at the address stored in the register
|
||||||
//! │ └─ SecondaryOperand
|
//! │ // (like Indexed, but without an extension word.)
|
||||||
//! │ ├─ Identifier // Label, for relative-addressed data/code
|
//! │ ╶─ PostIncrement // - Indirect, but the register is post-incremented by 1
|
||||||
//! │ ├─ Register // Direct or indexed register
|
//! │ // (or, if it's the PC or SP, by 2)
|
||||||
//! │ └─ Number // Index or absolute address
|
//! │ ╶─ Absolute // - An immediate absolute address
|
||||||
//! ├─ Line
|
//! │ ╶─ Immediate // - An immediate 16-bit number
|
||||||
//! │ └─ Instruction
|
//! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
|
||||||
//! │ ├─ Opcode
|
//! ├─ Stmt
|
||||||
//! │ └─ Encoding::Jump
|
//! │ └─ Insn
|
||||||
//! │ └─ JumpTarget
|
//! │ └─ TwoArg // A two-operand instruction
|
||||||
//! │ ├─ Identifier // Label
|
//! │ ├─ Opcode
|
||||||
//! │ └─ Number // Even, PC-relative offset in range (-1024..=1022)
|
//! │ ├─ Width
|
||||||
//! └─ Line
|
//! │ ├─ Src
|
||||||
//! └─ EndOfFile
|
//! │ └─ Dst
|
||||||
|
//! └─ Stmt
|
||||||
|
//! └─ Insn
|
||||||
|
//! └─ Jump // A relative jump instruction
|
||||||
|
//! ├─ Opcode // The jump condition
|
||||||
|
//! └─ JumpDst // A jump instruction's destination can be either:
|
||||||
|
//! └─ Rel // - An even, signed 11-bit offset
|
||||||
|
//! ╶─ Label // - A label to jump to
|
||||||
//! ```
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## Canonicalization
|
||||||
|
//! After parsing, tokens must be [canonicalized](parser::ast::canonical::Canonicalize):
|
||||||
|
//! - Expressions which act exclusively on numbers are eagerly evaluated
|
||||||
|
//! - Expressions which begin with a numeric part are repacked for late evaluation
|
||||||
|
//! - "Emulated" instructions are desugared into their canonical counterparts
|
||||||
|
//!
|
||||||
|
//! ## Assembly
|
||||||
|
//! The [Assembler](assembler::Assembler) takes an [AST](parser::ast), and
|
||||||
|
//! 1. Encodes all [Instructions](parser::ast::Instruction) into 16-bit words
|
||||||
|
//! 2. Records all jump labels, for backpatching
|
||||||
|
//! 3. Records all expressions, for late evaluation
|
||||||
|
//! 4. Performs late evaluation and backpatching
|
||||||
|
//!
|
||||||
|
//! If a non-canonical instruction is found, the assembler will print a warning,
|
||||||
|
//! and canonicalize it.
|
||||||
|
|
||||||
pub mod span;
|
pub mod span;
|
||||||
|
|
||||||
|
@ -511,7 +511,7 @@ pub mod error {
|
|||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
pub enum ErrorKind {
|
pub enum ErrorKind {
|
||||||
LexError,
|
LexError,
|
||||||
/// Returned when [Parsing::ConstExpr] fails without consuming
|
/// Returned when [Parsing::Expr] fails without consuming
|
||||||
NotExpr,
|
NotExpr,
|
||||||
DivZero,
|
DivZero,
|
||||||
NonNumeric(Kind),
|
NonNumeric(Kind),
|
||||||
|
Loading…
Reference in New Issue
Block a user