From a9ee7d3bc94a11023e31fa14defc99ef5d442349 Mon Sep 17 00:00:00 2001
From: John Breaux <j@soft.fish>
Date: Sat, 19 Aug 2023 23:02:24 -0500
Subject: [PATCH] msp430-asm: init repo with proof-of-concept code

---
 .gitignore                                    |   1 +
 .rustfmt.toml                                 |  14 +
 Cargo.toml                                    |  12 +
 src/error.rs                                  | 107 ++++++
 src/hash.rs                                   |  17 +
 src/lib.rs                                    |  21 ++
 src/linker.rs                                 |  20 ++
 src/main.rs                                   | 104 ++++++
 src/parser.rs                                 | 212 ++++++++++++
 src/parser/comment.rs                         |  15 +
 src/parser/directive.rs                       |  32 ++
 src/parser/identifier.rs                      |  34 ++
 src/parser/instruction.rs                     |  67 ++++
 src/parser/instruction/encoding.rs            |  81 +++++
 src/parser/instruction/encoding/builder.rs    |  76 +++++
 .../instruction/encoding/encoding_parser.rs   |  39 +++
 .../instruction/encoding/jump_target.rs       |  39 +++
 src/parser/instruction/encoding/number.rs     |  75 +++++
 .../instruction/encoding/primary_operand.rs   | 141 ++++++++
 src/parser/instruction/encoding/register.rs   | 111 +++++++
 .../instruction/encoding/secondary_operand.rs |  95 ++++++
 src/parser/instruction/encoding/width.rs      |  31 ++
 src/parser/instruction/opcode.rs              | 258 +++++++++++++++
 src/parser/label.rs                           |  16 +
 src/parser/parsable.rs                        |  44 +++
 src/tokenizer.rs                              | 193 +++++++++++
 src/tokenizer/context.rs                      |  36 ++
 src/tokenizer/token.rs                        | 309 ++++++++++++++++++
 valid.asm                                     | 260 +++++++++++++++
 29 files changed, 2460 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .rustfmt.toml
 create mode 100644 Cargo.toml
 create mode 100644 src/error.rs
 create mode 100644 src/hash.rs
 create mode 100644 src/lib.rs
 create mode 100644 src/linker.rs
 create mode 100644 src/main.rs
 create mode 100644 src/parser.rs
 create mode 100644 src/parser/comment.rs
 create mode 100644 src/parser/directive.rs
 create mode 100644 src/parser/identifier.rs
 create mode 100644 src/parser/instruction.rs
 create mode 100644 src/parser/instruction/encoding.rs
 create mode 100644 src/parser/instruction/encoding/builder.rs
 create mode 100644 src/parser/instruction/encoding/encoding_parser.rs
 create mode 100644 src/parser/instruction/encoding/jump_target.rs
 create mode 100644 src/parser/instruction/encoding/number.rs
 create mode 100644 src/parser/instruction/encoding/primary_operand.rs
 create mode 100644 src/parser/instruction/encoding/register.rs
 create mode 100644 src/parser/instruction/encoding/secondary_operand.rs
 create mode 100644 src/parser/instruction/encoding/width.rs
 create mode 100644 src/parser/instruction/opcode.rs
 create mode 100644 src/parser/label.rs
 create mode 100644 src/parser/parsable.rs
 create mode 100644 src/tokenizer.rs
 create mode 100644 src/tokenizer/context.rs
 create mode 100644 src/tokenizer/token.rs
 create mode 100755 valid.asm

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/.rustfmt.toml b/.rustfmt.toml
new file mode 100644
index 0000000..6d174c4
--- /dev/null
+++ b/.rustfmt.toml
@@ -0,0 +1,14 @@
+unstable_features = true
+max_width = 120
+wrap_comments = true
+comment_width = 100
+
+# Allow structs to fill an entire line
+use_small_heuristics = "Max"
+# Allow small functions on single line
+fn_single_line = true
+
+# Alignment
+enum_discrim_align_threshold = 12
+#struct_field_align_threshold = 12
+where_single_line = true
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..81a52c6
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "msp430-asm"
+version = "0.1.0"
+edition = "2021"
+authors = ["John Breaux"]
+publish = false
+
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+regex = "1.9.3"
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..1a9c96c
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,107 @@
+// © 2023 John Breaux
+// TODO: Be incredibly specific about the source of the errors
+
+use std::fmt::Display;
+
+use super::{
+    tokenizer::token::{OwnedToken, Types},
+    *,
+};
+
+// TODO: Store error context in error. for example:
+// Error {ExpectationFailed{...}, WhileParsing(Register)}
+
+#[derive(Debug)]
+pub enum Error {
+    /// Produced by [Parser](crate::parser::Parser::parse<T>())
+    ParseError(parser::root::Root, Box<dyn std::error::Error + 'static>),
+    Contextual(Context, Box<Self>),
+    /// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type]
+    UnexpectedToken {
+        expected: Type,
+        got: OwnedToken,
+    },
+    /// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the expected
+    /// [Types](Type)
+    AllExpectationsFailed {
+        expected: Types,
+        got: OwnedToken,
+    },
+    /// Produced by
+    /// [Number](parser::instruction::encoding::number::Number)[::parse()](parser::parsable::Parsable::parse())
+    /// when the parsed number contains digits too high for the specified radix
+    UnexpectedDigits(String, u32),
+    /// Produced by
+    /// [Opcode](parser::instruction::opcode::Opcode)[::parse()](parser::parsable::Parsable::parse())
+    /// when the opcode passed lexing but did not match recognized opcodes.
+    ///
+    /// This should be interpreted as a failure in lexing.
+    UnrecognizedOpcode(String),
+    NotARegister(String),
+    RegisterTooHigh(u16),
+    FatSecondaryImmediate(isize),
+    NumberTooWide(isize),
+    JumpedTooFar(isize),
+    JumpedOdd(isize),
+    EndOfFile,
+}
+
+impl Error {
+    pub fn context(self, c: Context) -> Self {
+        match self {
+            Self::Contextual(..) => self,
+            _ => Self::Contextual(c, Box::new(self)),
+        }
+    }
+
+    // Extracts the root of the error tree
+    pub fn bare(self) -> Self {
+        match self {
+            Self::Contextual(_, bare) => bare.bare(),
+            _ => self,
+        }
+    }
+
+    pub fn swap(mut self, other: Self) -> Self {
+        if let Self::Contextual(_, err) = &mut self {
+            _ = std::mem::replace(err.as_mut(), other)
+        }
+        self
+    }
+
+    pub fn expected<E: AsRef<[Type]>, T: Into<OwnedToken>>(expected: E, got: T) -> Self {
+        match expected.as_ref().len() {
+            1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() },
+            _ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() },
+        }
+    }
+}
+
+impl Display for Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Error::Contextual(ctx, error) => write!(f, "{ctx}: {error}"),
+            Error::ParseError(_, error) => write!(f, "Error encountered while parsing:\n{error}"),
+            Error::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."),
+            Error::AllExpectationsFailed { expected, got } => write!(f, "Expected one of {expected}, got {got}."),
+            Error::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."),
+            Error::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"),
+            Error::NotARegister(reg) => write!(f, "{reg} is not a register"),
+            Error::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"),
+            Error::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"),
+            Error::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"),
+            Error::JumpedTooFar(num) => write!(f, "{num} is too far away (jump targets must be in range (-3fc..=3fe"),
+            Error::JumpedOdd(num) => write!(f, "Jump target {num} should not be odd."),
+            Error::EndOfFile => write!(f, "Unexpected end of file"),
+        }
+    }
+}
+
+impl std::error::Error for Error {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match self {
+            Error::ParseError(_, e) => Some(e.as_ref()),
+            _ => None,
+        }
+    }
+}
diff --git a/src/hash.rs b/src/hash.rs
new file mode 100644
index 0000000..0f8a98f
--- /dev/null
+++ b/src/hash.rs
@@ -0,0 +1,17 @@
+// © 2023 John Breaux
+//! Convenience trait for dealing with hashable data
+pub type Hash = u64;
+pub trait FromHash: From<Hash> {
+    /// Hashes anything that implements [type@Hash] using the [DefaultHasher](std::collections::hash_map::DefaultHasher)
+    fn hash<T: std::hash::Hash>(hashable: T) -> Hash {
+        use std::hash::Hasher;
+        let mut hasher = std::collections::hash_map::DefaultHasher::new();
+        hashable.hash(&mut hasher);
+        hasher.finish()
+    }
+    fn from_hash<T: std::hash::Hash>(hashable: T) -> Self
+    where Self: Sized {
+        Self::from(Self::hash(hashable))
+    }
+}
+impl<T: From<Hash>> FromHash for T {}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..a8464c6
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,21 @@
+// © 2023 John Breaux
+//! An assembler for the TI MSP430
+pub mod preamble {
+    use super::*;
+    pub use error::Error;
+    pub use hash::{FromHash, Hash};
+    pub use linker::{Linker, Visitor};
+    pub use parser::Parser;
+    pub use tokenizer::{
+        context::Context,
+        token::{Token, Type},
+        TokenStream, Tokenizer,
+    };
+}
+
+use preamble::*;
+pub mod error;
+pub mod hash;
+pub mod linker;
+pub mod parser;
+pub mod tokenizer;
diff --git a/src/linker.rs b/src/linker.rs
new file mode 100644
index 0000000..8dab54c
--- /dev/null
+++ b/src/linker.rs
@@ -0,0 +1,20 @@
+// © 2023 John Breaux
+/// TODO: tree traversal and label resolution
+use crate::parser::preamble::*;
+pub trait Visitor<T> {
+    // visit_node for all nodes
+    fn visit_register(&mut self, r: &Register) -> T;
+    fn visit_number(&mut self, n: &Number) -> T;
+    fn visit_width(&mut self, w: &Width) -> T;
+    fn visit_primary_operand(&mut self, p: &PrimaryOperand) -> T;
+    fn visit_secondary_operand(&mut self, d: &SecondaryOperand) -> T;
+    fn visit_jump_target(&mut self, t: &JumpTarget) -> T;
+    fn visit_encoding(&mut self, e: &Encoding) -> T;
+    fn visit_opcode(&mut self, o: &Opcode) -> T;
+    fn visit_instruction(&mut self, i: &Instruction) -> T;
+    fn visit_directive(&mut self, d: &Directive) -> T;
+    // the most important one: resolve identifiers
+    fn visit_identifier(&mut self, i: &Identifier) -> T;
+}
+/// TODO: [Linker]
+pub struct Linker;
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..37ba9f3
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,104 @@
+//! Simple frontend for the assembler
+
+use std::io::Read;
+
+use msp430_asm::preamble::*;
+
+// const ASM: &str = r"
+// //.org 8000
+// //.define INT #2400
+// //entry:
+//     mov.b 8000(sp), r15 ; pop into sp
+//     rrc @pc+
+//     add #64, r8
+//     call #10        // call INT
+// ";
+
+fn main() -> Result<(), Error> {
+    // Get args
+    let mut repl = true;
+    for arg in std::env::args() {
+        match arg.as_str() {
+            "-" | "-f" | "--file" => repl = false,
+            _ => (),
+        }
+    }
+
+    // Decide if repl mode is enabled
+    let mut buf = String::new();
+
+    if repl {
+        // print!("> ");
+        // let _ = std::io::stdout().flush();
+        while let Ok(len) = std::io::stdin().read_line(&mut buf) {
+            match len {
+                0 => break,
+                1 => continue,
+                _ => (),
+            }
+            if len < 1 {
+                break;
+            }
+            // print!("\nLexer:  ");
+            // tokenizer_dump(&mut Tokenizer::new(&buf));
+            //print!("Parser: ");
+            match Parser::default().parse(&buf) {
+                Ok(line) => println!("{line:x}"),
+                //Ok(tree) => println!("=> {tree}\n  => {tree:x}"),
+                Err(error) => println!("{error}"),
+            }
+            buf.clear();
+            // print!("> ");
+            // let _ = std::io::stdout().flush();
+        }
+    } else {
+        std::io::stdin().lock().read_to_string(&mut buf).map_err(|_| Error::EndOfFile)?;
+        let mut tk = Tokenizer::new(&buf);
+
+        // println!("Lexer:  ");
+        // tokenizer_dump(&mut Tokenizer::new(&buf));
+        let tree = Parser::default().parse_with(&mut tk);
+        match &tree {
+            Ok(tree) => println!("{tree:x}"),
+            Err(error) => eprintln!("{error}"),
+        }
+    }
+
+    Ok(())
+}
+
+#[allow(dead_code)]
+fn tokenizer_dump<'text, T: TokenStream<'text>>(t: &mut T) {
+    for token in t {
+        match token.variant() {
+            //Token::Space => (),
+            Type::Endl => {
+                println!();
+                continue;
+            }
+            Type::Comment => (),
+            Type::Label => (),
+            Type::Insn => (),
+            Type::ByteWidth => (),
+            Type::WordWidth => (),
+            Type::Register => (),
+            Type::RadixMarkerHex => (),
+            Type::RadixMarkerOct => (),
+            Type::RadixMarkerBin => (),
+            Type::Number => (),
+            Type::Minus => (),
+            Type::LParen => (),
+            Type::RParen => (),
+            Type::Indirect => (),
+            Type::Plus => (),
+            Type::Absolute => (),
+            Type::Immediate => (),
+            Type::Identifier => (),
+            Type::Directive => (),
+            Type::Separator => (),
+            Type::EndOfFile => (),
+            _ => continue,
+        };
+        print!("{token:?} ");
+    }
+}
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..de352e9
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,212 @@
+// © 2023 John Breaux
+//! Parses [Tokens](crate::Token) into an [abstract syntax tree](Root)
+
+use crate::{Error, Hash, TokenStream, Type};
+use std::fmt::{Debug, Display, LowerHex};
+
+pub mod preamble {
+    //! All the different AST node types
+    use super::*;
+    // Traits
+    pub use parsable::Parsable;
+
+    pub use comment::Comment;
+    pub use directive::Directive;
+    pub use identifier::Identifier;
+    pub use instruction::{
+        encoding::{
+            encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand,
+            register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding,
+        },
+        opcode::Opcode,
+        Instruction,
+    };
+    pub use label::Label;
+    pub use line::Line;
+    pub use root::Root;
+}
+use preamble::*;
+
+pub(crate) mod parsable;
+
+pub(crate) mod comment;
+pub(crate) mod directive;
+pub(crate) mod identifier;
+pub(crate) mod instruction;
+pub(crate) mod label;
+
+pub(crate) mod line {
+    // © 2023 John Breaux
+    use super::*;
+
+    /// A line is one of:
+    /// - [`Label`] (definition)
+    /// - [`Instruction`]
+    /// - [`Directive`]
+    /// - [`Comment`]
+    #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+    pub enum Line {
+        Empty,
+        Label(Label), // TODO: Label resolution
+        Insn(Instruction),
+        Directive(Directive),
+        Comment(Comment),
+        EndOfFile, // Expected end of file
+    }
+
+    impl Parsable for Line {
+        fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
+        where T: TokenStream<'text> {
+            if let Ok(token) = stream.peek_expect_any_of([Type::Comment, Type::Directive, Type::Insn, Type::Identifier])
+            {
+                return Ok(match token.variant() {
+                    Type::Comment => Self::Comment(Comment::parse(p, stream)?),
+                    Type::Directive => Self::Directive(Directive::parse(p, stream)?),
+                    Type::Identifier => Self::Label(Label::parse(p, stream)?),
+                    Type::Insn => Self::Insn(Instruction::parse(p, stream)?),
+                    _ => unreachable!(),
+                });
+            }
+            // TODO: preserve comments
+            let token = stream.expect_any_of([Type::EndOfFile])?;
+            Ok(match token.variant() {
+                Type::EndOfFile => Self::EndOfFile,
+                _ => unreachable!(),
+            })
+        }
+    }
+    impl Display for Line {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            match self {
+                Self::Empty => writeln!(f, "\n"),
+                Self::Label(arg0) => Display::fmt(arg0, f),
+                Self::Insn(arg0) => Display::fmt(arg0, f),
+                Self::Directive(arg0) => Display::fmt(arg0, f),
+                Self::Comment(arg0) => Display::fmt(arg0, f),
+                Self::EndOfFile => write!(f, "; End of file."),
+            }
+        }
+    }
+    impl LowerHex for Line {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            match self {
+                Line::Insn(arg0) => LowerHex::fmt(arg0, f),
+                _ => Ok(()),
+            }
+        }
+    }
+}
+
+pub(crate) mod root {
+    // © 2023 John Breaux
+    use super::*;
+
+    /// Contains the entire AST
+    #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
+    pub struct Root(pub Vec<Line>);
+
+    // TODO: Get data out of ParseTree
+    // TODO: Maybe implement some sort of follower
+    impl Parsable for Root {
+        fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
+        where T: TokenStream<'text> {
+            let mut lines = vec![];
+            loop {
+                match Line::parse(p, stream) {
+                    Ok(Line::EndOfFile) => break,
+                    Ok(line) => lines.push(line),
+                    Err(e) => {
+                        let ret = Self(lines);
+                        eprintln!("{ret}");
+                        eprintln!("Error:{e}\n");
+                        eprint!("Remaining:");
+                        stream.for_each(|t| eprint!("{t}"));
+                        eprintln!();
+                        return Err(Error::ParseError(ret, Box::new(e)));
+                    }
+                }
+            }
+            Ok(Root(lines))
+        }
+    }
+
+    impl Display for Root {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            for line in self.0.iter() {
+                f.pad(&format!("{line} "))?;
+            }
+            Ok(())
+        }
+    }
+    impl LowerHex for Root {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            for line in self.0.iter() {
+                LowerHex::fmt(line, f)?;
+            }
+            Ok(())
+        }
+    }
+    impl Debug for Root {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            for line in self.0.iter() {
+                Display::fmt(line, f)?;
+                Debug::fmt(line, f)?;
+            }
+            Ok(())
+        }
+    }
+}
+
+/// The type for [Parser] callbacks
+pub type EmitComment = Box<dyn FnMut(&str)>;
+pub type DefineLabel = Box<dyn FnMut(&Identifier) -> Result<(), Error>>;
+
+pub struct Parser {
+    radix: u32,
+    // TODO: callbacks for emitted token sequences?!
+    on_label: Option<DefineLabel>,
+    on_comment: Option<EmitComment>,
+}
+
+impl Parser {
+    pub fn parse_with<'t, T>(self, stream: &'t mut T) -> Result<Root, Error>
+    where T: TokenStream<'t> {
+        Root::parse(&self, &mut stream.ignore_spaces())
+    }
+    pub fn parse<T>(self, input: &T) -> Result<Root, Error>
+    where T: AsRef<str> + ?Sized {
+        Root::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces())
+    }
+    pub fn parse_one<T>(self, input: &T) -> Result<Line, Error>
+    where T: AsRef<str> + ?Sized {
+        Line::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces())
+    }
+
+    /// Sets the default radix for [Token](crate::tokenizer::token::Token) -> [Number]
+    /// conversion
+    pub fn radix(mut self, radix: u32) { self.radix = radix; }
+
+    /// Inform the caller of a new identifier definition
+    pub fn define_label(&mut self, l: &Identifier) -> Result<(), Error> {
+        match self.on_label.as_mut() {
+            Some(f) => f(l),
+            _ => Ok(()),
+        }
+    }
+    /// Inform the caller of an identifier being used
+    pub fn emit_comment(&mut self, d: &str) {
+        if let Some(f) = self.on_comment.as_mut() {
+            f(d)
+        }
+    }
+}
+
+impl Default for Parser {
+    fn default() -> Self { Self { radix: 16, on_label: None, on_comment: None } }
+}
+
+impl Debug for Parser {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive()
+    }
+}
diff --git a/src/parser/comment.rs b/src/parser/comment.rs
new file mode 100644
index 0000000..2cb97ab
--- /dev/null
+++ b/src/parser/comment.rs
@@ -0,0 +1,15 @@
+// © 2023 John Breaux
+use super::*;
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Comment(pub String);
+
+impl Parsable for Comment {
+    fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: TokenStream<'text> {
+        let token = stream.expect(Type::Comment)?;
+        Ok(Self(token.lexeme().to_string()))
+    }
+}
+impl Display for Comment {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) }
+}
diff --git a/src/parser/directive.rs b/src/parser/directive.rs
new file mode 100644
index 0000000..fd1dc73
--- /dev/null
+++ b/src/parser/directive.rs
@@ -0,0 +1,32 @@
+// © 2023 John Breaux
+//! A [Directive] issues commands directly to the [Tokenizer](crate::Tokenizer) and
+//! [Linker](crate::Linker)
+use super::*;
+use crate::hash::FromHash;
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Directive(pub Hash, pub String);
+
+impl Directive {
+    fn str<S: ToString>(mut self, s: S) -> Self {
+        self.1 = s.to_string();
+        self
+    }
+}
+
+impl From<Hash> for Directive {
+    fn from(value: Hash) -> Self { Self(value, String::new()) }
+}
+
+impl Parsable for Directive {
+    fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: TokenStream<'text> {
+        // expect a directive
+        let d = stream.expect(Type::Directive)?;
+        // send the directive to the listener
+        Ok(Self::from_hash(d.lexeme()).str(d.lexeme()))
+    }
+}
+impl Display for Directive {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.1) }
+}
diff --git a/src/parser/identifier.rs b/src/parser/identifier.rs
new file mode 100644
index 0000000..bd25609
--- /dev/null
+++ b/src/parser/identifier.rs
@@ -0,0 +1,34 @@
+// © 2023 John Breaux
+use super::*;
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Identifier {
+    Hash(Hash),
+    Str(String),
+}
+
+impl Identifier {
+    fn str<T: AsRef<str>>(s: T) -> Self { Self::Str(s.as_ref().into()) }
+}
+
+impl From<Hash> for Identifier {
+    fn from(value: Hash) -> Self { Self::Hash(value) }
+}
+
+impl Parsable for Identifier {
+    fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: TokenStream<'text> {
+        let token = stream.expect(Type::Identifier)?;
+        match token.variant() {
+            Type::Identifier => Ok(Self::str(token.lexeme())),
+            _ => unreachable!("Expected Identifier, got {token:?}"),
+        }
+    }
+}
+impl Display for Identifier {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Identifier::Hash(_) => Display::fmt("Unresolved", f),
+            Identifier::Str(s) => Display::fmt(s, f),
+        }
+    }
+}
diff --git a/src/parser/instruction.rs b/src/parser/instruction.rs
new file mode 100644
index 0000000..a64ab6f
--- /dev/null
+++ b/src/parser/instruction.rs
@@ -0,0 +1,67 @@
+// © 2023 John Breaux
+//! An [Instruction] contains the [Opcode] and [Encoding] information for a single msp430
+//! instruction
+//! 
+//! 
+//! Note: [Opcode] and [Encoding] are very tightly coupled, because they represent interdependent parts
+//! of the same instruction. This is why [Opcode]::resolve() returns an [EncodingParser] -- otherwise,
+//! there's an explosion of states that I can't really cope with on my own. Really, there's about 9
+//! valid classes of instruction, some of which are only used for one or two of the MSP430's
+//! instructions.
+
+use super::*;
+
+pub mod encoding;
+pub mod opcode;
+
+/// Represents an entire MSP430 instruction
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Instruction(Opcode, Encoding);
+
+impl Instruction {
+    pub fn opcode(&self) -> &Opcode { &self.0 }
+    pub fn encoding(&self) -> &Encoding { &self.1 }
+    /// Gets the Instruction as a [u16]
+    pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() }
+    /// Gets the [extension words]
+    pub fn ext_words(&self) -> (Option<u16>, Option<u16>) { self.1.extwords() }
+}
+
+impl Parsable for Instruction {
+    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
+    where
+        Self: Sized,
+        T: crate::TokenStream<'text>,
+    {
+        // parse an opcode
+        let insn = stream.expect(Type::Insn)?;
+        let opcode: Opcode = insn.parse()?;
+        // resolve the opcode to a final opcode and an encoding
+        let (opcode, encoding) = opcode.resolve();
+        // parse the encoding
+        let encoding = encoding.parse(p, stream)?;
+        Ok(Self(opcode, encoding))
+    }
+}
+
+impl From<Instruction> for u16 {
+    fn from(value: Instruction) -> Self { value.word() }
+}
+
+impl Display for Instruction {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) }
+}
+
+impl LowerHex for Instruction {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let (word, (ext_src, ext_dst)) = (self.word(), self.ext_words());
+        write!(f, "{:04x} ", word.swap_bytes())?;
+        if let Some(e) = ext_src {
+            write!(f, "{:04x} ", e.swap_bytes())?
+        }
+        if let Some(e) = ext_dst {
+            write!(f, "{:04x} ", e.swap_bytes())?
+        }
+        Ok(())
+    }
+}
diff --git a/src/parser/instruction/encoding.rs b/src/parser/instruction/encoding.rs
new file mode 100644
index 0000000..b1bef8f
--- /dev/null
+++ b/src/parser/instruction/encoding.rs
@@ -0,0 +1,81 @@
+// © 2023 John Breaux
+//! An [Encoding] represents the set of arguments for the [msp430's instructions](Opcode)
+use super::*;
+
+pub mod number;
+pub mod register;
+pub mod width;
+
+pub mod jump_target;
+pub mod primary_operand;
+pub mod secondary_operand;
+
+mod builder;
+pub mod encoding_parser;
+
+use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder};
+use encoding_parser::EncodingParser;
+
+/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html)
+///
+/// # Examples
+/// ```rust
+/// use msp430_asm::{*, parser::{Encoding, EncodingParser}};
+/// // Create a token sequence
+/// let asm_file = r".b 8000(r15)";
+/// // Create a single-operand encoding parser
+/// let single: EncodingParser = Encoding::single().end();
+/// // Parse an Encoding from it
+/// let encoding: Encoding = single
+///     .parse(&Parser::default(), &mut Tokenizer::new(asm_file).ignore_spaces())
+///     .unwrap();
+/// // Print the Encoding
+/// println!("{encoding}");
+/// ```
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Encoding {
+    Single { width: Width, dst: PrimaryOperand },
+    Jump { target: JumpTarget },
+    Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand },
+}
+impl Encoding {
+    /// Returns a builder for [Encoding::Single]
+    pub fn single() -> SingleBuilder { Default::default() }
+    /// Returns a builder for [Encoding::Jump]
+    pub fn jump() -> JumpBuilder { Default::default() }
+    /// Returns a builder for [Encoding::Double]
+    pub fn double() -> DoubleBuilder { Default::default() }
+    /// Returns a builder for [Encoding::Double]
+    ///
+    /// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and
+    /// dst are the same
+    pub fn reflexive() -> ReflexiveBuilder { Default::default() }
+    ///
+    pub fn word(&self) -> u16 {
+        match *self {
+            Encoding::Single { width, dst } => u16::from(width) | dst.mode() | dst.register() as u16,
+            Encoding::Jump { target } => target.word(),
+            Encoding::Double { width, src, dst } => {
+                u16::from(width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8)
+            }
+        }
+    }
+    /// Returns extwords for instruction
+    pub fn extwords(&self) -> (Option<u16>, Option<u16>) {
+        match self {
+            Encoding::Double { src, dst, .. } => (src.ext_word(), dst.ext_word()),
+            Encoding::Single { dst, .. } => (dst.ext_word(), None),
+            Encoding::Jump { .. } => (None, None),
+        }
+    }
+}
+
+impl Display for Encoding {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Encoding::Single { width, dst } => write!(f, "{width} {dst}"),
+            Encoding::Jump { target } => write!(f, " {target}"),
+            Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"),
+        }
+    }
+}
diff --git a/src/parser/instruction/encoding/builder.rs b/src/parser/instruction/encoding/builder.rs
new file mode 100644
index 0000000..c70a843
--- /dev/null
+++ b/src/parser/instruction/encoding/builder.rs
@@ -0,0 +1,76 @@
+// © 2023 John Breaux
+//! Builder API for [EncodingParser]
+use super::*;
+#[derive(Debug, Default)]
+pub struct SingleBuilder {
+    width: Option<Width>,
+    dst: Option<PrimaryOperand>,
+}
+impl SingleBuilder {
+    pub fn width(mut self, width: bool) -> Self {
+        self.width = Some(width.into());
+        self
+    }
+    /// Sets the [PrimaryOperand] field
+    pub fn operand(mut self, dst: PrimaryOperand) -> Self {
+        self.dst = Some(dst);
+        self
+    }
+    /// Build
+    pub fn end(&self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } }
+}
+
+#[derive(Debug, Default)]
+pub struct JumpBuilder {
+    target: Option<JumpTarget>,
+}
+impl JumpBuilder {
+    pub fn target(mut self, target: JumpTarget) -> Self {
+        self.target = Some(target);
+        self
+    }
+    pub fn end(&self) -> EncodingParser { EncodingParser::Jump { target: self.target } }
+}
+
+#[derive(Debug, Default)]
+pub struct DoubleBuilder {
+    width: Option<Width>,
+    src: Option<PrimaryOperand>,
+    dst: Option<SecondaryOperand>,
+}
+impl DoubleBuilder {
+    /// Sets the [Width] field
+    pub fn width(mut self, width: bool) -> Self {
+        self.width = Some(width.into());
+        self
+    }
+    /// Sets the [PrimaryOperand] field
+    pub fn src(mut self, src: PrimaryOperand) -> Self {
+        self.src = Some(src);
+        self
+    }
+    /// Sets the [PrimaryOperand] field
+    pub fn dst(mut self, dst: SecondaryOperand) -> Self {
+        self.dst = Some(dst);
+        self
+    }
+    pub fn end(&self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } }
+}
+
+#[derive(Debug, Default)]
+pub struct ReflexiveBuilder {
+    width: Option<Width>,
+    reg: Option<SecondaryOperand>,
+}
+impl ReflexiveBuilder {
+    /// Sets the [Width] field
+    pub fn width(mut self, width: bool) -> Self {
+        self.width = Some(width.into());
+        self
+    }
+    pub fn reg(mut self, reg: SecondaryOperand) -> Self {
+        self.reg = Some(reg);
+        self
+    }
+    pub fn end(&self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } }
+}
diff --git a/src/parser/instruction/encoding/encoding_parser.rs b/src/parser/instruction/encoding/encoding_parser.rs
new file mode 100644
index 0000000..118938c
--- /dev/null
+++ b/src/parser/instruction/encoding/encoding_parser.rs
@@ -0,0 +1,39 @@
+// © 2023 John Breaux
+//! An [EncodingParser] builds an [Encoding] from a [TokenStream]
+use super::*;
+
+#[derive(Debug)]
+/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream]
+pub enum EncodingParser {
+    Single { width: Option<Width>, dst: Option<PrimaryOperand> },
+    Jump { target: Option<JumpTarget> },
+    Double { width: Option<Width>, src: Option<PrimaryOperand>, dst: Option<SecondaryOperand> },
+    Reflexive { width: Option<Width>, reg: Option<SecondaryOperand> },
+}
+impl EncodingParser {
+    /// Constructs an [Encoding] from this [EncodingParser], filling holes
+    /// with the tokenstream
+    pub fn parse<'text, T>(&self, p: &Parser, stream: &mut T) -> Result<Encoding, Error>
+    where T: crate::TokenStream<'text> {
+        Ok(match self {
+            Self::Single { width, dst } => {
+                let width = width.unwrap_or_else(|| Width::parse_or_default(p, stream));
+                let dst = if let Some(dst) = dst { *dst } else { PrimaryOperand::parse(p, stream)? };
+                Encoding::Single { width, dst }
+            }
+            Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) },
+            Self::Double { width, src, dst } => {
+                let width = width.unwrap_or_else(|| Width::parse_or_default(p, stream));
+                let src = if let Some(src) = src { *src } else { PrimaryOperand::parse(p, stream)? };
+                let dst = if let Some(dst) = dst { *dst } else { SecondaryOperand::parse(p, stream)? };
+
+                Encoding::Double { width, src, dst }
+            }
+            Self::Reflexive { width, reg } => {
+                let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default());
+                let reg = if let Some(reg) = reg { *reg } else { SecondaryOperand::parse(p, stream)? };
+                Encoding::Double { width, src: reg.into(), dst: reg }
+            }
+        })
+    }
+}
diff --git a/src/parser/instruction/encoding/jump_target.rs b/src/parser/instruction/encoding/jump_target.rs
new file mode 100644
index 0000000..347c42b
--- /dev/null
+++ b/src/parser/instruction/encoding/jump_target.rs
@@ -0,0 +1,39 @@
+// © 2023 John Breaux
+//! A [JumpTarget] contains the [pc-relative offset](Number) or [Identifier]
+//! for a [Jump instruction encoding](Encoding::Jump)
+use super::*;
+
+/// The target of a [Jump](Encoding::Jump)
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct JumpTarget(Number);
+
+impl JumpTarget {
+    pub fn word(&self) -> u16 { u16::from(self.0) & 0x3ff }
+}
+
+impl Parsable for JumpTarget {
+    /// - Identifier
+    /// - Number
+    /// - Negative
+    ///   - Number
+    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: crate::TokenStream<'text> {
+        // Try to parse a number
+        let target = Number::parse(p, stream)?;
+        match target.into() {
+            i if i % 2 != 0 => Err(Error::JumpedOdd(i).context(stream.context()))?,
+            i if (-1024..=1022).contains(&(i - 2)) => Ok(Self((target - 2) >> 1)),
+            i => Err(Error::JumpedTooFar(i).context(stream.context()))?,
+        }
+    }
+}
+
+impl From<JumpTarget> for u16 {
+    fn from(value: JumpTarget) -> Self { value.0.into() }
+}
+
+impl Display for JumpTarget {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", (1 + isize::from(self.0)) << 1)
+    }
+}
diff --git a/src/parser/instruction/encoding/number.rs b/src/parser/instruction/encoding/number.rs
new file mode 100644
index 0000000..18943b8
--- /dev/null
+++ b/src/parser/instruction/encoding/number.rs
@@ -0,0 +1,75 @@
+// © 2023 John Breaux
+//! A [Number] represents a 16-bit signed or unsigned word
+use super::*;
+
+// TODO: Allow identifiers/expressions  in place of numbers
+//       - Dependency inversion in TokenStream to allow swapping the parser mid-parse?
+//       - Oh my god, not relying on std::iter::Iterator allows for so many more parsing options
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Number(isize, u32); // (value, radix)
+
+impl Parsable for Number {
+    // A number is:
+    // RadixMarker[Hex|Oct|Bin]?
+    // - Number
+    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: TokenStream<'text> {
+        use Type::*;
+        let negative = stream.expect(Minus).is_ok();
+        let radix = match stream
+            .expect_any_of([RadixMarkerHex, RadixMarkerDec, RadixMarkerOct, RadixMarkerBin])
+            .ok()
+            .map(|t| t.variant())
+        {
+            Some(RadixMarkerHex) => 16,
+            Some(RadixMarkerDec) => 10,
+            Some(RadixMarkerOct) => 8,
+            Some(RadixMarkerBin) => 2,
+            _ => p.radix,
+        };
+        let number = stream.expect(Number)?;
+        let number = isize::from_str_radix(number.lexeme(), radix)
+            .map_err(|_| Error::UnexpectedDigits(number.lexeme().into(), radix).context(stream.context()))?
+            * if negative { -1 } else { 1 };
+        // Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit)
+        Ok(Self(
+            if (-0x8000..0x10000).contains(&number) {
+                number
+            } else {
+                Err(Error::NumberTooWide(number).context(stream.context()))?
+            },
+            radix,
+        ))
+    }
+}
+impl From<Number> for isize {
+    fn from(value: Number) -> Self { value.0 as Self }
+}
+impl From<Number> for i32 {
+    fn from(value: Number) -> Self { value.0 as Self }
+}
+impl From<Number> for u16 {
+    /// Converts this type from the input type.
+    fn from(value: Number) -> Self { value.0 as Self }
+}
+
+impl std::ops::Sub<isize> for Number {
+    type Output = Self;
+    fn sub(mut self, rhs: isize) -> Self::Output {
+        self.0 -= rhs;
+        self
+    }
+}
+
+impl std::ops::Shr<usize> for Number {
+    type Output = Self;
+    fn shr(mut self, rhs: usize) -> Self::Output {
+        self.0 >>= rhs;
+        self
+    }
+}
+
+impl std::fmt::Display for Number {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:x}", self.0) }
+}
diff --git a/src/parser/instruction/encoding/primary_operand.rs b/src/parser/instruction/encoding/primary_operand.rs
new file mode 100644
index 0000000..651f595
--- /dev/null
+++ b/src/parser/instruction/encoding/primary_operand.rs
@@ -0,0 +1,141 @@
+// © 2023 John Breaux
+//! A [PrimaryOperand] contains the first [Register], addressing mode, and Extension
+//! Word for an [instruction](Instruction)
+use super::*;
+
+/// The Source of a [Double](Encoding::Double) or Destination of a
+/// [Single](Encoding::Single)
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum PrimaryOperand {
+    Direct(Register),
+    Indirect(Register),
+    PostInc(Register),
+    Indexed(Register, Number),
+    Absolute(Number),
+    Immediate(Number),
+    Four,
+    Eight,
+    Zero,
+    One,
+    Two,
+    MinusOne,
+}
+
+impl PrimaryOperand {
+    /// Returns the mode bits
+    pub fn mode(&self) -> u16 {
+        use PrimaryOperand::*;
+        match self {
+            Direct(_) | Zero => 0,
+            Indexed(_, _) | Absolute(_) | One => 1 << 4,
+            Indirect(_) | Two | Four => 2 << 4,
+            PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4,
+        }
+    }
+    /// Gets the register
+    pub fn register(&self) -> Register {
+        use PrimaryOperand::*;
+        match self {
+            Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r,
+            Immediate(_) => Register::pc,
+            Absolute(_) | Four | Eight => Register::sr,
+            Zero | One | Two | MinusOne => Register::cg,
+        }
+    }
+    /// Gets the extension word, if present
+    pub fn ext_word(&self) -> Option<u16> {
+        use PrimaryOperand::*;
+        match self {
+            Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()),
+            _ => None,
+        }
+    }
+}
+
+impl Parsable for PrimaryOperand {
+    // - Register
+    // - Indirect
+    //     - Register
+    //         - PostInc?
+    // - Number
+    //     - OpenIdx
+    //         - Register
+    //             - CloseIdx
+    // - Absolute
+    //     - Number
+    // - Immediate
+    //     - Number
+    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: crate::TokenStream<'text> {
+        use PrimaryOperand::*;
+        // Try parsing as Register Direct
+        if let Some(r) = Register::try_parse(p, stream)? {
+            return Ok(Self::Direct(r));
+        }
+        // Try parsing as Number Indexed
+        if let Some(idx) = Number::try_parse(p, stream)? {
+            stream.expect(Type::LParen)?;
+            let reg = Register::parse(p, stream)?;
+            stream.expect(Type::RParen)?;
+            return Ok(Self::Indexed(reg, idx));
+        }
+        // Or directly match any of the valid prefix markers
+        let token = stream.expect_any_of([Type::Indirect, Type::Absolute, Type::Immediate])?;
+        Ok(match token.variant() {
+            Type::Indirect => {
+                let reg = stream.expect(Type::Register)?.parse()?;
+                match stream.expect(Type::Plus) {
+                    Ok(_) => PostInc(reg),
+                    Err(_) => Indirect(reg),
+                }
+            }
+            Type::Absolute => Absolute(Number::parse(p, stream)?),
+            Type::Immediate => {
+                let number = Number::parse(p, stream)?;
+                match number.into() {
+                    // There are two representations for the all-ones constant, since Number preserves absolute
+                    // signedness.
+                    -1 | 0xffff => MinusOne,
+                    0 => Zero,
+                    1 => One,
+                    2 => Two,
+                    4 => Four,
+                    8 => Eight,
+                    _ => Immediate(number),
+                }
+            }
+            _ => unreachable!("Token {token:?} passed expectation but failed match!"),
+        })
+    }
+}
+
+impl From<SecondaryOperand> for PrimaryOperand {
+    fn from(value: SecondaryOperand) -> Self {
+        match value {
+            SecondaryOperand::Direct(r) => Self::Direct(r),
+            SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n),
+            SecondaryOperand::Absolute(n) => Self::Absolute(n),
+            SecondaryOperand::Zero => Self::Zero,
+            SecondaryOperand::One => Self::One,
+        }
+    }
+}
+
+impl Display for PrimaryOperand {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Direct(r) => write!(f, "{r}"),
+            Self::Indirect(r) => write!(f, "@{r}"),
+            Self::PostInc(r) => write!(f, "@{r}+"),
+            Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
+            Self::Absolute(n) => write!(f, "&{n}"),
+            Self::Immediate(n) => write!(f, "#{n}"),
+            Self::Four => write!(f, "#4"),
+            Self::Eight => write!(f, "#8"),
+            Self::Zero => write!(f, "#0"),
+            Self::One => write!(f, "#1"),
+            Self::Two => write!(f, "#2"),
+            Self::MinusOne => write!(f, "#-1"),
+        }
+    }
+}
diff --git a/src/parser/instruction/encoding/register.rs b/src/parser/instruction/encoding/register.rs
new file mode 100644
index 0000000..18fa297
--- /dev/null
+++ b/src/parser/instruction/encoding/register.rs
@@ -0,0 +1,111 @@
+// © 2023 John Breaux
+//! A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
+use super::*;
+use std::str::FromStr;
+
+/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
+
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Register {
+    /// Program Counter
+    pc,
+    /// Stack Pointer
+    sp,
+    /// Status Register
+    sr,
+    /// Constant Generator
+    cg,
+    r4,
+    r5,
+    r6,
+    r7,
+    r8,
+    r9,
+    r10,
+    r11,
+    r12,
+    r13,
+    r14,
+    r15,
+}
+
+impl Parsable for Register {
+    fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: crate::TokenStream<'text> {
+        stream.expect(Type::Register).map_err(|e| e.context(stream.context()))?.lexeme().parse()
+    }
+}
+
+impl From<Register> for u16 {
+    fn from(value: Register) -> Self { value as u16 }
+}
+
+impl TryFrom<u16> for Register {
+    type Error = Error;
+    fn try_from(value: u16) -> Result<Self, Self::Error> {
+        use Register::*;
+        Ok(match value {
+            0 => pc,
+            1 => sp,
+            2 => sr,
+            3 => cg,
+            4 => r4,
+            5 => r5,
+            6 => r6,
+            7 => r7,
+            8 => r8,
+            9 => r9,
+            10 => r10,
+            11 => r11,
+            12 => r12,
+            13 => r13,
+            14 => r14,
+            15 => r15,
+            _ => return Err(Error::RegisterTooHigh(value)),
+        })
+    }
+}
+
+impl FromStr for Register {
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use Register::*;
+        match s {
+            "pc" => Ok(pc),
+            "sp" => Ok(sp),
+            "sr" => Ok(sr),
+            "cg" => Ok(cg),
+            _ => str::parse::<u16>(&s[1..]).map_err(|_| -> Self::Err { Error::NotARegister(s.into()) })?.try_into(),
+        }
+    }
+}
+
+impl From<Register> for &str {
+    fn from(value: Register) -> Self {
+        use Register::*;
+        match value {
+            pc => "pc",
+            sp => "sp",
+            sr => "sr",
+            cg => "cg",
+            r4 => "r4",
+            r5 => "r5",
+            r6 => "r6",
+            r7 => "r7",
+            r8 => "r8",
+            r9 => "r9",
+            r10 => "r10",
+            r11 => "r11",
+            r12 => "r12",
+            r13 => "r13",
+            r14 => "r14",
+            r15 => "r15",
+        }
+    }
+}
+
+impl std::fmt::Display for Register {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) }
+}
diff --git a/src/parser/instruction/encoding/secondary_operand.rs b/src/parser/instruction/encoding/secondary_operand.rs
new file mode 100644
index 0000000..44d8f33
--- /dev/null
+++ b/src/parser/instruction/encoding/secondary_operand.rs
@@ -0,0 +1,95 @@
+// © 2023 John Breaux
+//! A [SecondaryOperand] contains the second [Register], addressing mode, and Extension
+//! Word for a [two-operand](Encoding::Double) [instruction](Instruction)
+use super::*;
+
+/// The destination of a [Double](Encoding::Double)
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum SecondaryOperand {
+    Direct(Register),
+    Indexed(Register, Number),
+    Absolute(Number),
+    // Joke encodings?
+    Zero,
+    One,
+}
+
+impl SecondaryOperand {
+    pub fn mode(&self) -> u16 {
+        use SecondaryOperand::*;
+        match self {
+            Direct(_) | Zero => 0,
+            Indexed(_, _) | Absolute(_) | One => 1 << 7,
+        }
+    }
+    pub fn register(&self) -> Register {
+        use SecondaryOperand::*;
+        match self {
+            Direct(r) | Indexed(r, _) => *r,
+            Absolute(_) => Register::sr,
+            Zero | One => Register::cg,
+        }
+    }
+    /// This is the only way to have an extension word
+    pub fn ext_word(&self) -> Option<u16> {
+        use SecondaryOperand::*;
+        match self {
+            Indexed(_, w) | Absolute(w) => Some((*w).into()),
+            _ => None,
+        }
+    }
+}
+
+impl Parsable for SecondaryOperand {
+    /// Separator
+    /// - Register  => Direct
+    /// - Number    => Indexed
+    ///     - OpenIdx
+    ///         - Register
+    ///             - CloseIdx
+    /// - Absolute
+    ///   - Number
+    /// - Immediate
+    ///   - Number == 0, 1
+    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, crate::Error>
+    where T: crate::TokenStream<'text> {
+        use SecondaryOperand::*;
+        stream.allow(Type::Separator);
+        // Try parsing as Register Direct
+        if let Some(r) = Register::try_parse(p, stream)? {
+            return Ok(Self::Direct(r));
+        }
+        // Try parsing as Number Indexed
+        if let Some(idx) = Number::try_parse(p, stream)? {
+            stream.expect(Type::LParen)?;
+            let reg = Register::parse(p, stream)?;
+            stream.expect(Type::RParen)?;
+            return Ok(Self::Indexed(reg, idx));
+        }
+        let token = stream.expect_any_of([Type::Absolute, Type::Immediate])?;
+        Ok(match token.variant() {
+            Type::Absolute => Absolute(Number::parse(p, stream)?),
+            Type::Immediate => {
+                let number = Number::parse(p, stream)?;
+                match number.into() {
+                    0 => Zero,
+                    1 => One,
+                    n => Err(Error::FatSecondaryImmediate(n as isize).context(stream.context()))?,
+                }
+            }
+            _ => unreachable!("Token {token:?} passed expectation but failed match!"),
+        })
+    }
+}
+
+impl Display for SecondaryOperand {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Direct(r) => write!(f, "{r}"),
+            Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
+            Self::Absolute(n) => write!(f, "&{n}"),
+            Self::Zero => write!(f, "#0"),
+            Self::One => write!(f, "#1"),
+        }
+    }
+}
diff --git a/src/parser/instruction/encoding/width.rs b/src/parser/instruction/encoding/width.rs
new file mode 100644
index 0000000..1501456
--- /dev/null
+++ b/src/parser/instruction/encoding/width.rs
@@ -0,0 +1,31 @@
+// © 2023 John Breaux
+use super::*;
+
+/// Represents an instruction's operand width.
+///
+/// Evaluates to false when instruction takes word-sized operands, or true when
+/// instruction takes byte-sized operands
+#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Width(bool);
+
+impl Parsable for Width {
+    fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: TokenStream<'text> {
+        let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else {
+            return Ok(Self(false));
+        };
+        Ok(Self(token.is_variant(Type::ByteWidth)))
+    }
+}
+impl From<Width> for u16 {
+    fn from(value: Width) -> Self { (value.0 as Self) << 6 }
+}
+impl From<Width> for bool {
+    fn from(value: Width) -> Self { value.0 }
+}
+impl From<bool> for Width {
+    fn from(value: bool) -> Self { Width(value) }
+}
+impl std::fmt::Display for Width {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) }
+}
diff --git a/src/parser/instruction/opcode.rs b/src/parser/instruction/opcode.rs
new file mode 100644
index 0000000..1384cc8
--- /dev/null
+++ b/src/parser/instruction/opcode.rs
@@ -0,0 +1,258 @@
+// © 2023 John Breaux
+//! An [Opcode] encodes an msp430 operation
+use super::*;
+
+use std::str::FromStr;
+
+/// Opcode from the [MSPGCC Manual][1]
+///
+/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will
+/// extract from a [TokenStream] only the required arguments for that call.
+///
+/// [1]: https://mspgcc.sourceforge.net/manual/x223.html
+#[allow(clippy::identity_op)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Opcode {
+    // "Emulated" opcodes
+    Nop,
+    Pop,
+    Br,
+    Ret,
+    Clrc,
+    Setc,
+    Clrz,
+    Setz,
+    Clrn,
+    Setn,
+    Dint,
+    Eint,
+    Rla,
+    Rlc,
+    Inv,
+    Clr,
+    Tst,
+    Dec,
+    Decd,
+    Inc,
+    Incd,
+    Adc,
+    Dadc,
+    Sbc,
+    // Single
+    Rrc  = 0x1000 | 0 << 7,
+    Swpb = 0x1000 | 1 << 7,
+    Rra  = 0x1000 | 2 << 7,
+    Sxt  = 0x1000 | 3 << 7,
+    Push = 0x1000 | 4 << 7,
+    Call = 0x1000 | 5 << 7,
+    Reti = 0x1000 | 6 << 7,
+    // Jump
+    Jnz  = 0x2000 | 0 << 10,
+    Jz   = 0x2000 | 1 << 10,
+    Jnc  = 0x2000 | 2 << 10,
+    Jc   = 0x2000 | 3 << 10,
+    Jn   = 0x2000 | 4 << 10,
+    Jge  = 0x2000 | 5 << 10,
+    Jl   = 0x2000 | 6 << 10,
+    Jmp  = 0x2000 | 7 << 10,
+    // Double
+    Mov  = 0x4000,
+    Add  = 0x5000,
+    Addc = 0x6000,
+    Subc = 0x7000,
+    Sub  = 0x8000,
+    Cmp  = 0x9000,
+    Dadd = 0xa000,
+    Bit  = 0xb000,
+    Bic  = 0xc000,
+    Bis  = 0xd000,
+    Xor  = 0xe000,
+    And  = 0xf000,
+}
+
+impl Opcode {
+    pub fn takes_width(&self) -> bool {
+        use Opcode::*;
+        match self {
+            Rrc => true,
+            Swpb => false,
+            Rra => true,
+            Sxt => false,
+            Push => true,
+            Call | Reti => false,
+            Jnz | Jz | Jnc | Jc | Jn | Jge | Jl | Jmp => false,
+            Mov | Add | Addc | Subc | Sub | Cmp | Dadd | Bit | Bic | Bis | Xor | And => true,
+            Nop | Pop | Br | Ret | Clrc | Setc | Clrz | Setz | Clrn | Setn | Dint | Eint | Rla | Rlc | Inv | Clr
+            | Tst | Dec | Decd | Inc | Incd | Adc | Dadc | Sbc => true,
+        }
+    }
+    /// Resolve an Opcode into an [Opcode] and an [EncodingParser]
+    pub fn resolve(self) -> (Opcode, EncodingParser) {
+        use super::Encoding as Enc;
+        use Opcode::*;
+        use Register::*;
+        use {PrimaryOperand as Src, SecondaryOperand as Dst};
+        match self {
+            Rrc | Swpb | Rra | Sxt | Push | Call | Reti => (self, Enc::single().end()),
+            Jnz | Jz | Jnc | Jc | Jn | Jge | Jl | Jmp => (self, Enc::jump().end()),
+            Mov | Add | Addc | Subc | Sub | Cmp | Dadd | Bit | Bic | Bis | Xor | And => (self, Enc::double().end()),
+            Nop => (Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()),
+            Pop => (Mov, Enc::double().src(Src::PostInc(sp)).end()),
+            Br => (Mov, Enc::double().dst(Dst::Direct(pc)).end()),
+            Ret => (Mov, Enc::double().src(Src::PostInc(sp)).dst(Dst::Direct(pc)).end()),
+            Clrc => (Bic, Enc::double().src(Src::One).dst(Dst::Direct(sr)).end()),
+            Setc => (Bis, Enc::double().src(Src::One).dst(Dst::Direct(sr)).end()),
+            Clrz => (Bic, Enc::double().src(Src::Two).dst(Dst::Direct(sr)).end()),
+            Setz => (Bis, Enc::double().src(Src::Two).dst(Dst::Direct(sr)).end()),
+            Clrn => (Bic, Enc::double().src(Src::Four).dst(Dst::Direct(sr)).end()),
+            Setn => (Bis, Enc::double().src(Src::Four).dst(Dst::Direct(sr)).end()),
+            Dint => (Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(sr)).end()),
+            Eint => (Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(sr)).end()),
+            Rla => (Add, Enc::reflexive().end()),
+            Rlc => (Addc, Enc::reflexive().end()),
+            Inv => (Xor, Enc::double().src(Src::MinusOne).end()),
+            Clr => (Mov, Enc::double().src(Src::Zero).end()),
+            Tst => (Cmp, Enc::double().src(Src::Zero).end()),
+            Dec => (Sub, Enc::double().src(Src::One).end()),
+            Decd => (Sub, Enc::double().src(Src::Two).end()),
+            Inc => (Add, Enc::double().src(Src::One).end()),
+            Incd => (Add, Enc::double().src(Src::Two).end()),
+            Adc => (Addc, Enc::double().src(Src::Zero).end()),
+            Dadc => (Dadd, Enc::double().src(Src::Zero).end()),
+            Sbc => (Subc, Enc::double().src(Src::Zero).end()),
+        }
+    }
+}
+
+impl FromStr for Opcode {
+    type Err = Error;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use Opcode::*;
+        //TODO: Reduce allocations here
+        let s = s.to_ascii_lowercase();
+        Ok(match s.as_str() {
+            "rrc" => Rrc,
+            "swpb" => Swpb,
+            "rra" => Rra,
+            "sxt" => Sxt,
+            "push" => Push,
+            "call" => Call,
+            "reti" => Reti,
+
+            "jne" | "jnz" => Jnz,
+            "jeq" | "jz" => Jz,
+            "jnc" | "jlo" => Jnc,
+            "jc" | "jhs" => Jc,
+            "jn" => Jn,
+            "jge" => Jge,
+            "jl" => Jl,
+            "jmp" => Jmp,
+
+            "mov" => Mov,
+            "add" => Add,
+            "addc" => Addc,
+            "subc" => Subc,
+            "sub" => Sub,
+            "cmp" => Cmp,
+            "dadd" => Dadd,
+            "bit" => Bit,
+            "bic" => Bic,
+            "bis" => Bis,
+            "xor" => Xor,
+            "and" => And,
+
+            "nop" => Nop,
+            "pop" => Pop,
+            "br" => Br,
+            "ret" => Ret,
+            "clrc" => Clrc,
+            "setc" => Setc,
+            "clrz" => Clrz,
+            "setz" => Setz,
+            "clrn" => Clrn,
+            "setn" => Setn,
+            "dint" => Dint,
+            "eint" => Eint,
+            "rla" => Rla,
+            "rlc" => Rlc,
+            "inv" => Inv,
+            "clr" => Clr,
+            "tst" => Tst,
+            "dec" => Dec,
+            "decd" => Decd,
+            "inc" => Inc,
+            "incd" => Incd,
+            "adc" => Adc,
+            "dadc" => Dadc,
+            "sbc" => Sbc,
+            _ => Err(Error::UnrecognizedOpcode(s))?,
+        })
+    }
+}
+
+impl Display for Opcode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use Opcode::*;
+        write!(
+            f,
+            "{}",
+            match self {
+                Nop => "nop",
+                Pop => "pop",
+                Br => "br",
+                Ret => "ret",
+                Clrc => "clrc",
+                Setc => "setc",
+                Clrz => "clrz",
+                Setz => "setz",
+                Clrn => "clrn",
+                Setn => "setn",
+                Dint => "dint",
+                Eint => "eint",
+                Rla => "rla",
+                Rlc => "rlc",
+                Inv => "inv",
+                Clr => "clr",
+                Tst => "tst",
+                Dec => "dec",
+                Decd => "decd",
+                Inc => "inc",
+                Incd => "incd",
+                Adc => "adc",
+                Dadc => "dadc",
+                Sbc => "sbc",
+                Rrc => "rrc",
+                Swpb => "swpb",
+                Rra => "rra",
+                Sxt => "sxt",
+                Push => "push",
+                Call => "call",
+                Reti => "reti",
+                Jnz => "jnz",
+                Jz => "jz",
+                Jnc => "jnc",
+                Jc => "jc",
+                Jn => "jn",
+                Jge => "jge",
+                Jl => "jl",
+                Jmp => "jmp",
+                Mov => "mov",
+                Add => "add",
+                Addc => "addc",
+                Subc => "subc",
+                Sub => "sub",
+                Cmp => "cmp",
+                Dadd => "dadd",
+                Bit => "bit",
+                Bic => "bic",
+                Bis => "bis",
+                Xor => "xor",
+                And => "and",
+            }
+        )
+    }
+}
+
+impl LowerHex for Opcode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:04x}", *self as u16) }
+}
diff --git a/src/parser/label.rs b/src/parser/label.rs
new file mode 100644
index 0000000..f9f7614
--- /dev/null
+++ b/src/parser/label.rs
@@ -0,0 +1,16 @@
+// © 2023 John Breaux
+use super::*;
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Label(pub Identifier);
+
+impl Parsable for Label {
+    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
+    where T: TokenStream<'text> {
+        Ok(Self(Identifier::parse(p, stream).and_then(|t| stream.require(Type::Label).and(Ok(t)))?))
+    }
+}
+
+impl Display for Label {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) }
+}
diff --git a/src/parser/parsable.rs b/src/parser/parsable.rs
new file mode 100644
index 0000000..50fdadf
--- /dev/null
+++ b/src/parser/parsable.rs
@@ -0,0 +1,44 @@
+// © 2023 John Breaux
+use super::*;
+/// Parses tokens from [stream](TokenStream) into Self node
+pub trait Parsable {
+    /// Parses tokens from [TokenStream](TokenStream) into Self nodes
+    fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
+    where
+        Self: Sized,
+        T: TokenStream<'text>;
+
+    /// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
+    ///
+    /// Masks failed expectations.
+    fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, Error>
+    where
+        Self: Sized,
+        T: TokenStream<'text>,
+    {
+        match Self::parse(p, stream).map_err(|e| e.bare()) {
+            Ok(tt) => Ok(Some(tt)),
+            Err(Error::UnexpectedToken { .. }) | Err(Error::AllExpectationsFailed { .. }) => Ok(None),
+            Err(e) => Err(e),
+        }
+    }
+
+    fn parse_and<'text, T, R>(p: &Parser, stream: &mut T, f: fn(p: &Parser, &mut T) -> R) -> Result<(Self, R), Error>
+    where
+        Self: Sized,
+        T: TokenStream<'text>,
+    {
+        Ok((Self::parse(p, stream)?, f(p, stream)))
+    }
+
+    /// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
+    ///
+    /// Returns [`Self::default()`](Default::default()) on error
+    fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self
+    where
+        Self: Sized + Default,
+        T: TokenStream<'text>,
+    {
+        Self::parse(p, stream).unwrap_or_default()
+    }
+}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
new file mode 100644
index 0000000..c842e94
--- /dev/null
+++ b/src/tokenizer.rs
@@ -0,0 +1,193 @@
+// © 2023 John Breaux
+//! Iterates over &[str], producing [Token]s
+
+// Things we need:
+// ✔ 1. Lexer/Tokenizer
+// ✔     1. Instructions
+// ✔         1. Instruction mnemonics /ad.../
+// ✔         2. Byte/Word Mode Marker /(.\[bw\])?/
+// ✔     2. Src operands
+// ✔         1. Registers /(r1[0-5]|r[0-9])/
+// ✔         2. Immediate Values /#/
+// ✔         3. Absolute addresses /&/
+// ✔         4. Numbers /[0-9A-Fa-f]+
+// ✔         5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/
+// ✔     4. Label definitions /(^.*):/
+// ✔     5. Comments (may be useful for debugging)
+
+pub mod context;
+pub mod token;
+
+use crate::Error;
+use context::Context;
+use token::{Token, Type};
+
+/// Backtracking through bifurcated timelines
+pub trait TokenStream<'text>: Iterator<Item = Token<'text>> {
+    /// Gets this stream's [Context]
+    fn context(&self) -> Context;
+
+    /// Creates an iterator that skips [Type::Space] in the input
+    fn ignore_spaces(&'text mut self) -> IgnoreSpaces<'text, Self>
+    where Self: Sized {
+        IgnoreSpaces::new(self)
+    }
+
+    /// Returns the next [Token] without advancing
+    fn peek(&mut self) -> Self::Item;
+
+    /// Returns the next [Token] if it is of the expected [Type], without advancing
+    fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
+
+    /// Consumes and returns a [Token] if it is the expected [Type]
+    ///
+    /// Otherwise, does not consume a [Token]
+    fn expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
+
+    /// Ignores a [Token] of the expected [Type], propegating errors.
+    fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) }
+
+    /// Ignores a [Token] of the expected [Type], discarding errors.
+    fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
+
+    /// Runs a functor on each
+    fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, Error>, expected: T) -> Result<U, Error>
+    where T: AsRef<[Type]> {
+        for &expected in expected.as_ref() {
+            match f(self, expected).map_err(|e| e.bare()) {
+                Ok(t) => return Ok(t),
+                Err(Error::UnexpectedToken { .. }) => continue,
+                Err(e) => return Err(e.context(self.context())),
+            }
+        }
+        Err(Error::expected(expected, self.peek()).context(self.context()))
+    }
+
+    /// Returns the next [Token] if it is of the expected [Types](Type), without advancing
+    fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
+    where T: AsRef<[Type]> {
+        self.any_of(Self::peek_expect, expected)
+    }
+    /// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
+    ///
+    /// Otherwise, does not consume a [Token]
+    fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
+    where T: AsRef<[Type]> {
+        self.any_of(Self::expect, expected)
+    }
+    /// Ignores a [Token] of any expected [Type], discarding errors.
+    fn allow_any_of<T>(&mut self, expected: T)
+    where T: AsRef<[Type]> {
+        let _ = self.expect_any_of(expected);
+    }
+    /// Ignores a [Token] of any expected [Type], propegating errors.
+    fn require_any_of<T>(&mut self, expected: T) -> Result<(), Error>
+    where T: AsRef<[Type]> {
+        self.any_of(Self::require, expected)
+    }
+}
+
+/// Iterates over &[str], producing [Token]s
+#[must_use = "iterators are lazy and do nothing unless consumed"]
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Tokenizer<'t> {
+    text: &'t str,
+    idx: usize,
+    context: Context,
+}
+
+impl<'t> Tokenizer<'t> {
+    /// Produces a new [Tokenizer] from a [str]ing slice
+    pub fn new<T>(text: &'t T) -> Self
+    where T: AsRef<str> + ?Sized {
+        Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
+    }
+
+    fn count(&mut self, token: &Token) {
+        // update the context
+        self.context.count(token);
+        // advance the index
+        self.idx += token.len();
+    }
+}
+
+impl<'text> Iterator for Tokenizer<'text> {
+    type Item = Token<'text>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.idx >= self.text.len() {
+            return None;
+        }
+        let token = Token::from(&self.text[self.idx..]);
+        // Process [Type::Directive]s
+        self.count(&token);
+        Some(token)
+    }
+}
+
+impl<'text> TokenStream<'text> for Tokenizer<'text> {
+    fn context(&self) -> Context { self.context }
+    // Tokenizer has access to the source buffer, and can implement expect and peek without cloning
+    // itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
+    // registers are valid identifiers.
+    fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
+        let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
+        self.count(&token);
+        Ok(token)
+    }
+    fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
+    fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
+        Token::expect(&self.text[self.idx..], expected)
+    }
+}
+
+#[must_use = "iterators are lazy and do nothing unless consumed"]
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct IgnoreSpaces<'t, T>
+where T: TokenStream<'t>
+{
+    inner: &'t mut T,
+}
+
+impl<'t, T> IgnoreSpaces<'t, T>
+where T: TokenStream<'t>
+{
+    pub fn new(t: &'t mut T) -> Self { IgnoreSpaces { inner: t } }
+    /// Gets a mutable reference to the inner [Iterator]
+    pub fn inner_mut(&mut self) -> &mut T { self.inner }
+}
+
+impl<'t, T> Iterator for IgnoreSpaces<'t, T>
+where T: TokenStream<'t>
+{
+    type Item = Token<'t>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let next = self.inner.next()?;
+        // Space tokens are greedy, so the next token shouldn't be a Space
+        match next.variant() {
+            Type::Space => self.next(),
+            _ => Some(next),
+        }
+    }
+}
+
+impl<'t, T> TokenStream<'t> for IgnoreSpaces<'t, T>
+where T: TokenStream<'t>
+{
+    fn context(&self) -> Context { self.inner.context() }
+    fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
+        self.inner.allow_any_of([Type::Space, Type::Endl]);
+        self.inner.expect(expected)
+    }
+
+    fn peek(&mut self) -> Self::Item {
+        self.inner.allow_any_of([Type::Space, Type::Endl]);
+        self.inner.peek()
+    }
+
+    fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
+        self.inner.allow_any_of([Type::Space, Type::Endl]);
+        self.inner.peek_expect(expected)
+    }
+}
diff --git a/src/tokenizer/context.rs b/src/tokenizer/context.rs
new file mode 100644
index 0000000..9576e98
--- /dev/null
+++ b/src/tokenizer/context.rs
@@ -0,0 +1,36 @@
+//! Stores contextual information about the current tokenizer state, useful for printing errors
+use super::*;
+/// Stores contextual information about the current tokenizer state, useful for printing errors
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Context {
+    line: usize,
+    tokens: usize,
+    position: usize,
+}
+
+impl Context {
+    pub fn new() -> Self { Default::default() }
+    pub fn line(&self) -> usize { self.line }
+    pub fn tokens(&self) -> usize { self.tokens }
+    pub fn position(&self) -> usize { self.position }
+    pub(super) fn count(&mut self, t: &Token) {
+        match t.variant() {
+            Type::EndOfFile => return,
+            Type::Endl => {
+                self.line += 1;
+                self.position = 0;
+            }
+            _ => self.position += t.len(),
+        }
+        self.tokens += 1;
+    }
+}
+impl Default for Context {
+    fn default() -> Self { Self { line: 1, tokens: 0, position: 0 } }
+}
+
+impl std::fmt::Display for Context {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}:{}", self.line, self.position)
+    }
+}
diff --git a/src/tokenizer/token.rs b/src/tokenizer/token.rs
new file mode 100644
index 0000000..d86c67e
--- /dev/null
+++ b/src/tokenizer/token.rs
@@ -0,0 +1,309 @@
+// © 2023 John Breaux
+//! Defines the [Token]
+//!
+//! A [Token] represents all valid sequences of characters,
+//! sorted by meaning
+
+use regex::Regex;
+use std::{
+    fmt::{Debug, Display},
+    sync::OnceLock,
+};
+
+/// Implements regex matching functions on [`Token`] for each [`Type`],
+/// and implements [`From<&str>`] for [`Token`]
+macro_rules! regex_impl {
+(<$t:lifetime> $type:ty {$(
+    $(#[$meta:meta])*
+    pub fn $func:ident (text: &str) -> Option<Self> {
+        regex!($out:path = $re:literal)
+    }
+)*}) => {
+impl<$t> $type {
+    /// Lexes a token only for the expected `variant`
+    ///
+    /// Warning: This bypasses precedence rules. Only use for specific patterns.
+    pub fn expect(text: &$t str, expected: Type) -> Result<Self, Error> {
+        match expected {$(
+            $out => Self::$func(text),
+        )*}.ok_or(Error::UnexpectedToken {
+            expected,
+            got: Self::from(text).into(),
+        })
+    }
+    $(
+    $(#[$meta])*
+    /// Tries to read [`
+    #[doc = stringify!($out)]
+    /// `] from `text`
+    pub fn $func(text: &$t str) -> Option<Self> {
+        static RE: OnceLock<Regex> = OnceLock::new();
+        let lexeme = RE.get_or_init(|| Regex::new($re).unwrap())
+            .find(text)?.into();
+        Some(Self { variant: $out, lexeme })
+    })*
+}
+impl<$t> From<&$t str> for $type {
+    fn from (value: &$t str) -> Self {
+        $(
+            if let Some(token) = Self::$func(value) {
+                token
+            } else
+        )*
+        {todo!("Unexpected input: {value:#?}")}
+    }
+}
+};
+}
+
+use crate::Error;
+
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Token<'text> {
+    /// The type of this token
+    variant: Type,
+    /// The sub[str]ing corresponding to this token
+    lexeme: &'text str,
+}
+
+impl<'text> Token<'text> {
+    /// Returns the [Type] of this [Token]
+    pub fn variant(&self) -> Type { self.variant }
+
+    /// Returns the Lexeme (originating string slice) of this token
+    pub fn lexeme(&self) -> &str { self.lexeme }
+
+    /// Parses this [Token] into another type
+    pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
+    where F: std::str::FromStr {
+        self.lexeme.parse()
+    }
+    /// Returns whether the Lexeme is the expected [Type]
+    pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected }
+
+    /// Returns the length of [Self::lexeme] in bytes.
+    pub fn len(&self) -> usize { self.lexeme.len() }
+
+    /// Returns `true` if [Self::lexeme] has a length of zero bytes.
+    pub fn is_empty(&self) -> bool { self.lexeme.is_empty() }
+}
+
+impl<'text> Debug for Token<'text> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_list().entry(&self.variant).entry(&self.lexeme).finish()
+    }
+}
+
+impl<'text> Display for Token<'text> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self.variant {
+            Type::Endl | Type::EndOfFile => write!(f, "{}", self.variant),
+            v => write!(f, "\"{}\" ({v})", self.lexeme),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Type {
+    /// contiguous whitespace, excluding newline
+    Space,
+    /// newline and contiguous whitespace
+    Endl,
+    /// A line-comment
+    Comment,
+    /// Jump label *definition*
+    Label,
+    /// Instructions
+    Insn,
+    /// Operand width is byte
+    ByteWidth,
+    /// Operand width is word
+    WordWidth,
+    /// Register mnemonic (i.e. `pc`, `r14`)
+    Register,
+    /// Marker for base-10
+    RadixMarkerDec,
+    /// Marker for base-16
+    RadixMarkerHex,
+    /// Marker for base-8
+    RadixMarkerOct,
+    /// Marker for base-2
+    RadixMarkerBin,
+    /// 1-4 hexadigit numbers only
+    Number,
+    /// Negative number marker
+    Minus,
+    /// post-increment mode marker
+    Plus,
+    /// Open-Indexed-Mode marker
+    LParen,
+    /// Close-Indexed-Mode marker
+    RParen,
+    /// Indirect mode marker
+    Indirect,
+    /// absolute address marker
+    Absolute,
+    /// immediate value marker
+    Immediate,
+    /// Valid identifier. Identifiers must start with a Latin alphabetic character or underline
+    Identifier,
+    /// Assembler directive
+    Directive,
+    /// Separator (comma)
+    Separator,
+    /// End of File marker
+    EndOfFile,
+}
+
+regex_impl! {<'text> Token<'text> {
+    pub fn expect_space(text: &str) -> Option<Self> {
+        regex!(Type::Space = r"^[\s--\n]+")
+    }
+    pub fn expect_endl(text: &str) -> Option<Self> {
+        regex!(Type::Endl = r"^[\s]+")
+    }
+    pub fn expect_comment(text: &str) -> Option<Self> {
+        regex!(Type::Comment = r"^(;|//).*")
+    }
+    pub fn expect_label(text: &str) -> Option<Self> {
+        regex!(Type::Label = r"^:")
+    }
+    pub fn expect_insn(text: &str) -> Option<Self> {
+        regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)")
+    }
+    pub fn expect_byte_width(text: &str) -> Option<Self> {
+        regex!(Type::ByteWidth = r"(?i)^\.b")
+    }
+    pub fn expect_word_width(text: &str) -> Option<Self> {
+        regex!(Type::WordWidth = r"(?i)^\.w")
+    }
+    pub fn expect_register(text: &str) -> Option<Self> {
+        // old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
+        regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)")
+    }
+    pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
+        regex!(Type::RadixMarkerDec = r"(?i)^0d")
+    }
+    pub fn expect_radix_marker_hex(text: &str) -> Option<Self> {
+        regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)")
+    }
+    pub fn expect_radix_marker_oct(text: &str) -> Option<Self> {
+        regex!(Type::RadixMarkerOct = r"(?i)^0o")
+    }
+    pub fn expect_radix_marker_bin(text: &str) -> Option<Self> {
+        regex!(Type::RadixMarkerBin = r"(?i)^0b")
+    }
+    pub fn expect_number(text: &str) -> Option<Self> {
+        regex!(Type::Number = r"^+?[[:xdigit:]]{1,5}")
+    }
+    pub fn expect_minus(text: &str) -> Option<Self> {
+        regex!(Type::Minus = r"^-")
+    }
+    pub fn expect_plus(text: &str) -> Option<Self> {
+        regex!(Type::Plus = r"^\+")
+    }
+    pub fn expect_open_idx(text: &str) -> Option<Self> {
+        regex!(Type::LParen = r"^\(")
+    }
+    pub fn expect_close_idx(text: &str) -> Option<Self> {
+        regex!(Type::RParen = r"^\)")
+    }
+    pub fn expect_indrect(text: &str) -> Option<Self> {
+        regex!(Type::Indirect = r"^@")
+    }
+    pub fn expect_absolute(text: &str) -> Option<Self> {
+        regex!(Type::Absolute = r"^&")
+    }
+    pub fn expect_immediate(text: &str) -> Option<Self> {
+        regex!(Type::Immediate = r"^#")
+    }
+    pub fn expect_directive(text: &str) -> Option<Self> {
+        regex!(Type::Directive = r"^\.\w+( .*)?")
+    }
+    pub fn expect_identifier(text: &str) -> Option<Self> {
+        regex!(Type::Identifier = r"^[A-Za-z_]\w+")
+    }
+    pub fn expect_separator(text: &str) -> Option<Self> {
+        regex!(Type::Separator = r"^,")
+    }
+    pub fn expect_end_of_file(text: &str) -> Option<Self> {
+        regex!(Type::EndOfFile = r"^$")
+    }
+}}
+
+impl Display for Type {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Space => write!(f, "space"),
+            Self::Endl => write!(f, "newline"),
+            Self::Comment => write!(f, "comment"),
+            Self::Label => write!(f, "label definition"),
+            Self::Insn => write!(f, "instruction mnemonic"),
+            Self::ByteWidth => write!(f, "byte-width marker"),
+            Self::WordWidth => write!(f, "word-width marker"),
+            Self::Register => write!(f, "register mnemonic"),
+            Self::RadixMarkerDec => write!(f, "decimal radix marker"),
+            Self::RadixMarkerHex => write!(f, "hexadecimal radix marker"),
+            Self::RadixMarkerOct => write!(f, "octal radix marker"),
+            Self::RadixMarkerBin => write!(f, "binary radix marker"),
+            Self::Number => write!(f, "number"),
+            Self::Minus => write!(f, "minus sign"),
+            Self::Plus => write!(f, "plus sign"),
+            Self::LParen => write!(f, "left parenthesis"),
+            Self::RParen => write!(f, "right parenthesis"),
+            Self::Indirect => write!(f, "indirect mode marker"),
+            Self::Absolute => write!(f, "absolute mode marker"),
+            Self::Immediate => write!(f, "immediate mode marker"),
+            Self::Identifier => write!(f, "identifier"),
+            Self::Directive => write!(f, "directive"),
+            Self::Separator => write!(f, "comma"),
+            Self::EndOfFile => write!(f, "EOF"),
+        }
+    }
+}
+
+/// Owned version of a token, which can outlive its parent buffer
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct OwnedToken {
+    /// The type of this token
+    variant: Type,
+    /// The sub[String] corresponding to this token
+    lexeme: String,
+}
+
+impl Display for OwnedToken {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) }
+}
+
+impl<'t> From<&'t OwnedToken> for Token<'t> {
+    fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } }
+}
+
+impl From<Token<'_>> for OwnedToken {
+    fn from(value: Token<'_>) -> Self {
+        let Token { variant, lexeme } = value;
+        OwnedToken { variant, lexeme: lexeme.to_owned() }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Types(Vec<Type>);
+
+impl<T: AsRef<[Type]>> From<T> for Types {
+    // TODO: Possibly bad. Check out in rust playground.
+    fn from(value: T) -> Self { Self(value.as_ref().to_owned()) }
+}
+
+impl Display for Types {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for (idx, t) in self.0.iter().enumerate() {
+            write!(f, "{t}")?;
+            match idx {
+                i if i < self.0.len() - 2 => write!(f, ", ")?,
+                i if i < self.0.len() - 1 => write!(f, " or ")?,
+                _ => (),
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/valid.asm b/valid.asm
new file mode 100755
index 0000000..c819644
--- /dev/null
+++ b/valid.asm
@@ -0,0 +1,260 @@
+;© 2023 John Breaux
+; examples of valid assembly
+;
+
+_register_mode:
+.define numbered r1
+mov r0, r1
+mov r1, r2
+mov r2, r3
+mov r3, r4
+mov r4, r5
+mov r5, r6
+mov r6, r7
+mov r7, r8
+mov r8, r9
+mov r9, r10
+mov r10, r11
+mov r11, r12
+mov r12, r13
+mov r13, r14
+mov r14, r15
+
+.define special r2
+mov pc, r15
+mov sp, r15
+mov sr, r15
+mov cg, r15
+
+
+indirect_mode:
+.define numbered r3
+mov @r0, r1
+mov @r1, r2
+;mov @r2, r3
+;mov @r3, r4
+mov @r4, r5
+mov @r5, r6
+mov @r6, r7
+mov @r7, r8
+mov @r8, r9
+mov @r9, r10
+mov @r10, r11
+mov @r11, r12
+mov @r12, r13
+mov @r13, r14
+mov @r14, r15
+
+.define special r4
+mov @pc, r15
+mov @sp, r15
+;mov @sr, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
+;mov @cg, r15
+
+indirect_pi_mode:
+.define numbered r5
+;mov @r0+, r1
+mov @r1+, r2
+;mov @r2+, r3
+;mov @r3+, r4
+mov @r4+, r5
+mov @r5+, r6
+mov @r6+, r7
+mov @r7+, r8
+mov @r8+, r9
+mov @r9+, r10
+mov @r10+, r11
+mov @r11+, r12
+mov @r12+, r13
+mov @r13+, r14
+mov @r14+, r15
+
+.define special r6
+;mov @pc+, r15 ; This is how mov-immediate is encoded, and is not valid
+;mov @sp+, r15 ; pop r15
+;mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
+;mov @cg+, r15
+
+indexed_mode:
+.define numbered r7
+mov.b 10(r0), r1
+mov 10(r1), r2
+;mov 10(r2), r3 ; Invalid: cannot index relative to sr
+;mov 10(r3), r4 ; Invalid: cannot index relative to cg
+mov 10(r4), r5
+mov 10(r5), r6
+mov 10(r6), r7
+mov 10(r7), r8
+mov 10(r8), r9
+mov 10(r9), r10
+mov 10(r10), r11
+mov 10(r11), r12
+mov 10(r12), r13
+mov 10(r13), r14
+mov 10(r14), r15
+
+.define special r8
+mov 10(pc), r15
+mov 10(sp), r15
+;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
+;mov 10(cg), r15
+
+_immediate_mode:
+.define numbered r9
+mov #beef, r0
+mov #beef, r1
+mov #beef, r2
+mov #beef, r3
+mov #beef, r4
+mov #beef, r5
+mov #beef, r6
+mov #beef, r7
+mov #beef, r8
+mov #beef, r9
+mov #beef, r10
+mov #beef, r11
+mov #beef, r12
+mov #beef, r13
+mov #beef, r14
+mov #beef, r15
+
+.define special r10
+mov #beef, pc
+mov #beef, sp
+mov #beef, sr
+mov #beef, cg
+
+; jmp _register_mode ; TODO: msp430_asm currently has no support for jump labels.
+jmp 3fe
+jmp -3fc
+ret
+
+; Funky encodings
+mov r6,         r4
+mov @r6,        r4
+mov @r6+,       r4
+mov 0(r6),      r4
+mov 4141(r6),   r4
+mov #-1,        r4
+mov #ffff,      r4
+mov #0,         r4
+mov #1,         r4
+mov #2,         r4
+mov #4,         r4
+mov #8,         r4
+mov r6,         0(r4)
+mov @r6,        0(r4)
+mov @r6+,       0(r4)
+mov 0(r6),      0(r4)
+mov 4141(r6),   0(r4)
+mov #-1,        0(r4)
+mov #ffff,      0(r4)
+mov #0,         0(r4)
+mov #1,         0(r4)
+mov #2,         0(r4)
+mov #4,         0(r4)
+mov #8,         0(r4)
+mov r6,         4141(r4)
+mov @r6,        4141(r4)
+mov @r6+,       4141(r4)
+mov 0(r6),      4141(r4)
+mov 4141(r6),   4141(r4)
+mov #-1,        4141(r4)
+mov #ffff,      4141(r4)
+mov #0,         4141(r4)
+mov #1,         4141(r4)
+mov #2,         4141(r4)
+mov #4,         4141(r4)
+mov #8,         4141(r4)
+mov r6,         #0
+mov @r6,        #0
+mov @r6+,       #0
+mov 0(r6),      #0
+mov 4141(r6),   #0
+mov #-1,        #0
+mov #ffff,      #0
+mov #0,         #0
+mov #1,         #0
+mov #2,         #0
+mov #4,         #0
+mov #8,         #0
+mov r6,         #1
+mov @r6,        #1
+mov @r6+,       #1
+mov 0(r6),      #1
+mov 4141(r6),   #1
+mov #-1,        #1
+mov #ffff,      #1
+mov #0,         #1
+mov #1,         #1
+mov #2,         #1
+mov #4,         #1
+mov #8,         #1
+
+; Instruction exercise
+; Jumps
+jne 10
+jeq 10
+jlo 10
+jhs 10
+jn  10
+jge 10
+jl  10
+jmp 10
+
+; Two-ops
+mov  r14, r15
+add  r14, r15
+addc r14, r15
+subc r14, r15
+sub  r14, r15
+cmp  r14, r15
+dadd r14, r15
+bit  r14, r15
+bic  r14, r15
+bis  r14, r15
+xor  r14, r15
+and  r14, 10(r15)
+
+; One-ops
+rrc  r15
+swpb r15
+rra  r15
+sxt  r15
+push r15
+call r15
+reti r15
+
+; Jump aliases
+jnc 10
+jnz 10
+jc  10
+jz  10
+
+; "emulated" no-op instructions
+ret
+clrc
+setc
+clrz
+setz
+clrn
+setn
+dint
+eint
+nop
+
+; "emulated" one-op instructions
+br   r15
+pop  r15
+rla  r15
+rlc  r15
+inv  r15
+clr  r15
+tst  r15
+dec  r15
+decd r15
+inc  r15
+incd r15
+adc  r15
+dadc r15
+sbc  r15