msp430-asm: init repo with proof-of-concept code

This commit is contained in:
2023-08-19 23:02:24 -05:00
commit a9ee7d3bc9
29 changed files with 2460 additions and 0 deletions

107
src/error.rs Normal file
View File

@@ -0,0 +1,107 @@
// © 2023 John Breaux
// TODO: Be incredibly specific about the source of the errors
use std::fmt::Display;
use super::{
tokenizer::token::{OwnedToken, Types},
*,
};
// TODO: Store error context in error. for example:
// Error {ExpectationFailed{...}, WhileParsing(Register)}
#[derive(Debug)]
pub enum Error {
/// Produced by [Parser](crate::parser::Parser::parse<T>())
ParseError(parser::root::Root, Box<dyn std::error::Error + 'static>),
Contextual(Context, Box<Self>),
/// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type]
UnexpectedToken {
expected: Type,
got: OwnedToken,
},
/// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the expected
/// [Types](Type)
AllExpectationsFailed {
expected: Types,
got: OwnedToken,
},
/// Produced by
/// [Number](parser::instruction::encoding::number::Number)[::parse()](parser::parsable::Parsable::parse())
/// when the parsed number contains digits too high for the specified radix
UnexpectedDigits(String, u32),
/// Produced by
/// [Opcode](parser::instruction::opcode::Opcode)[::parse()](parser::parsable::Parsable::parse())
/// when the opcode passed lexing but did not match recognized opcodes.
///
/// This should be interpreted as a failure in lexing.
UnrecognizedOpcode(String),
NotARegister(String),
RegisterTooHigh(u16),
FatSecondaryImmediate(isize),
NumberTooWide(isize),
JumpedTooFar(isize),
JumpedOdd(isize),
EndOfFile,
}
impl Error {
pub fn context(self, c: Context) -> Self {
match self {
Self::Contextual(..) => self,
_ => Self::Contextual(c, Box::new(self)),
}
}
// Extracts the root of the error tree
pub fn bare(self) -> Self {
match self {
Self::Contextual(_, bare) => bare.bare(),
_ => self,
}
}
pub fn swap(mut self, other: Self) -> Self {
if let Self::Contextual(_, err) = &mut self {
_ = std::mem::replace(err.as_mut(), other)
}
self
}
pub fn expected<E: AsRef<[Type]>, T: Into<OwnedToken>>(expected: E, got: T) -> Self {
match expected.as_ref().len() {
1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() },
_ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() },
}
}
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::Contextual(ctx, error) => write!(f, "{ctx}: {error}"),
Error::ParseError(_, error) => write!(f, "Error encountered while parsing:\n{error}"),
Error::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."),
Error::AllExpectationsFailed { expected, got } => write!(f, "Expected one of {expected}, got {got}."),
Error::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."),
Error::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"),
Error::NotARegister(reg) => write!(f, "{reg} is not a register"),
Error::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"),
Error::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"),
Error::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"),
Error::JumpedTooFar(num) => write!(f, "{num} is too far away (jump targets must be in range (-3fc..=3fe"),
Error::JumpedOdd(num) => write!(f, "Jump target {num} should not be odd."),
Error::EndOfFile => write!(f, "Unexpected end of file"),
}
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::ParseError(_, e) => Some(e.as_ref()),
_ => None,
}
}
}

17
src/hash.rs Normal file
View File

@@ -0,0 +1,17 @@
// © 2023 John Breaux
//! Convenience trait for dealing with hashable data
pub type Hash = u64;
pub trait FromHash: From<Hash> {
/// Hashes anything that implements [type@Hash] using the [DefaultHasher](std::collections::hash_map::DefaultHasher)
fn hash<T: std::hash::Hash>(hashable: T) -> Hash {
use std::hash::Hasher;
let mut hasher = std::collections::hash_map::DefaultHasher::new();
hashable.hash(&mut hasher);
hasher.finish()
}
fn from_hash<T: std::hash::Hash>(hashable: T) -> Self
where Self: Sized {
Self::from(Self::hash(hashable))
}
}
impl<T: From<Hash>> FromHash for T {}

21
src/lib.rs Normal file
View File

@@ -0,0 +1,21 @@
// © 2023 John Breaux
//! An assembler for the TI MSP430
pub mod preamble {
use super::*;
pub use error::Error;
pub use hash::{FromHash, Hash};
pub use linker::{Linker, Visitor};
pub use parser::Parser;
pub use tokenizer::{
context::Context,
token::{Token, Type},
TokenStream, Tokenizer,
};
}
use preamble::*;
pub mod error;
pub mod hash;
pub mod linker;
pub mod parser;
pub mod tokenizer;

20
src/linker.rs Normal file
View File

@@ -0,0 +1,20 @@
// © 2023 John Breaux
/// TODO: tree traversal and label resolution
use crate::parser::preamble::*;
pub trait Visitor<T> {
// visit_node for all nodes
fn visit_register(&mut self, r: &Register) -> T;
fn visit_number(&mut self, n: &Number) -> T;
fn visit_width(&mut self, w: &Width) -> T;
fn visit_primary_operand(&mut self, p: &PrimaryOperand) -> T;
fn visit_secondary_operand(&mut self, d: &SecondaryOperand) -> T;
fn visit_jump_target(&mut self, t: &JumpTarget) -> T;
fn visit_encoding(&mut self, e: &Encoding) -> T;
fn visit_opcode(&mut self, o: &Opcode) -> T;
fn visit_instruction(&mut self, i: &Instruction) -> T;
fn visit_directive(&mut self, d: &Directive) -> T;
// the most important one: resolve identifiers
fn visit_identifier(&mut self, i: &Identifier) -> T;
}
/// TODO: [Linker]
pub struct Linker;

104
src/main.rs Normal file
View File

@@ -0,0 +1,104 @@
//! Simple frontend for the assembler
use std::io::Read;
use msp430_asm::preamble::*;
// const ASM: &str = r"
// //.org 8000
// //.define INT #2400
// //entry:
// mov.b 8000(sp), r15 ; pop into sp
// rrc @pc+
// add #64, r8
// call #10 // call INT
// ";
fn main() -> Result<(), Error> {
// Get args
let mut repl = true;
for arg in std::env::args() {
match arg.as_str() {
"-" | "-f" | "--file" => repl = false,
_ => (),
}
}
// Decide if repl mode is enabled
let mut buf = String::new();
if repl {
// print!("> ");
// let _ = std::io::stdout().flush();
while let Ok(len) = std::io::stdin().read_line(&mut buf) {
match len {
0 => break,
1 => continue,
_ => (),
}
if len < 1 {
break;
}
// print!("\nLexer: ");
// tokenizer_dump(&mut Tokenizer::new(&buf));
//print!("Parser: ");
match Parser::default().parse(&buf) {
Ok(line) => println!("{line:x}"),
//Ok(tree) => println!("=> {tree}\n => {tree:x}"),
Err(error) => println!("{error}"),
}
buf.clear();
// print!("> ");
// let _ = std::io::stdout().flush();
}
} else {
std::io::stdin().lock().read_to_string(&mut buf).map_err(|_| Error::EndOfFile)?;
let mut tk = Tokenizer::new(&buf);
// println!("Lexer: ");
// tokenizer_dump(&mut Tokenizer::new(&buf));
let tree = Parser::default().parse_with(&mut tk);
match &tree {
Ok(tree) => println!("{tree:x}"),
Err(error) => eprintln!("{error}"),
}
}
Ok(())
}
#[allow(dead_code)]
fn tokenizer_dump<'text, T: TokenStream<'text>>(t: &mut T) {
for token in t {
match token.variant() {
//Token::Space => (),
Type::Endl => {
println!();
continue;
}
Type::Comment => (),
Type::Label => (),
Type::Insn => (),
Type::ByteWidth => (),
Type::WordWidth => (),
Type::Register => (),
Type::RadixMarkerHex => (),
Type::RadixMarkerOct => (),
Type::RadixMarkerBin => (),
Type::Number => (),
Type::Minus => (),
Type::LParen => (),
Type::RParen => (),
Type::Indirect => (),
Type::Plus => (),
Type::Absolute => (),
Type::Immediate => (),
Type::Identifier => (),
Type::Directive => (),
Type::Separator => (),
Type::EndOfFile => (),
_ => continue,
};
print!("{token:?} ");
}
}

212
src/parser.rs Normal file
View File

@@ -0,0 +1,212 @@
// © 2023 John Breaux
//! Parses [Tokens](crate::Token) into an [abstract syntax tree](Root)
use crate::{Error, Hash, TokenStream, Type};
use std::fmt::{Debug, Display, LowerHex};
pub mod preamble {
//! All the different AST node types
use super::*;
// Traits
pub use parsable::Parsable;
pub use comment::Comment;
pub use directive::Directive;
pub use identifier::Identifier;
pub use instruction::{
encoding::{
encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand,
register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding,
},
opcode::Opcode,
Instruction,
};
pub use label::Label;
pub use line::Line;
pub use root::Root;
}
use preamble::*;
pub(crate) mod parsable;
pub(crate) mod comment;
pub(crate) mod directive;
pub(crate) mod identifier;
pub(crate) mod instruction;
pub(crate) mod label;
pub(crate) mod line {
// © 2023 John Breaux
use super::*;
/// A line is one of:
/// - [`Label`] (definition)
/// - [`Instruction`]
/// - [`Directive`]
/// - [`Comment`]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Line {
Empty,
Label(Label), // TODO: Label resolution
Insn(Instruction),
Directive(Directive),
Comment(Comment),
EndOfFile, // Expected end of file
}
impl Parsable for Line {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> {
if let Ok(token) = stream.peek_expect_any_of([Type::Comment, Type::Directive, Type::Insn, Type::Identifier])
{
return Ok(match token.variant() {
Type::Comment => Self::Comment(Comment::parse(p, stream)?),
Type::Directive => Self::Directive(Directive::parse(p, stream)?),
Type::Identifier => Self::Label(Label::parse(p, stream)?),
Type::Insn => Self::Insn(Instruction::parse(p, stream)?),
_ => unreachable!(),
});
}
// TODO: preserve comments
let token = stream.expect_any_of([Type::EndOfFile])?;
Ok(match token.variant() {
Type::EndOfFile => Self::EndOfFile,
_ => unreachable!(),
})
}
}
impl Display for Line {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Empty => writeln!(f, "\n"),
Self::Label(arg0) => Display::fmt(arg0, f),
Self::Insn(arg0) => Display::fmt(arg0, f),
Self::Directive(arg0) => Display::fmt(arg0, f),
Self::Comment(arg0) => Display::fmt(arg0, f),
Self::EndOfFile => write!(f, "; End of file."),
}
}
}
impl LowerHex for Line {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Line::Insn(arg0) => LowerHex::fmt(arg0, f),
_ => Ok(()),
}
}
}
}
pub(crate) mod root {
// © 2023 John Breaux
use super::*;
/// Contains the entire AST
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Root(pub Vec<Line>);
// TODO: Get data out of ParseTree
// TODO: Maybe implement some sort of follower
impl Parsable for Root {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> {
let mut lines = vec![];
loop {
match Line::parse(p, stream) {
Ok(Line::EndOfFile) => break,
Ok(line) => lines.push(line),
Err(e) => {
let ret = Self(lines);
eprintln!("{ret}");
eprintln!("Error:{e}\n");
eprint!("Remaining:");
stream.for_each(|t| eprint!("{t}"));
eprintln!();
return Err(Error::ParseError(ret, Box::new(e)));
}
}
}
Ok(Root(lines))
}
}
impl Display for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for line in self.0.iter() {
f.pad(&format!("{line} "))?;
}
Ok(())
}
}
impl LowerHex for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for line in self.0.iter() {
LowerHex::fmt(line, f)?;
}
Ok(())
}
}
impl Debug for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for line in self.0.iter() {
Display::fmt(line, f)?;
Debug::fmt(line, f)?;
}
Ok(())
}
}
}
/// The type for [Parser] callbacks
pub type EmitComment = Box<dyn FnMut(&str)>;
pub type DefineLabel = Box<dyn FnMut(&Identifier) -> Result<(), Error>>;
pub struct Parser {
radix: u32,
// TODO: callbacks for emitted token sequences?!
on_label: Option<DefineLabel>,
on_comment: Option<EmitComment>,
}
impl Parser {
pub fn parse_with<'t, T>(self, stream: &'t mut T) -> Result<Root, Error>
where T: TokenStream<'t> {
Root::parse(&self, &mut stream.ignore_spaces())
}
pub fn parse<T>(self, input: &T) -> Result<Root, Error>
where T: AsRef<str> + ?Sized {
Root::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces())
}
pub fn parse_one<T>(self, input: &T) -> Result<Line, Error>
where T: AsRef<str> + ?Sized {
Line::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces())
}
/// Sets the default radix for [Token](crate::tokenizer::token::Token) -> [Number]
/// conversion
pub fn radix(mut self, radix: u32) { self.radix = radix; }
/// Inform the caller of a new identifier definition
pub fn define_label(&mut self, l: &Identifier) -> Result<(), Error> {
match self.on_label.as_mut() {
Some(f) => f(l),
_ => Ok(()),
}
}
/// Inform the caller of an identifier being used
pub fn emit_comment(&mut self, d: &str) {
if let Some(f) = self.on_comment.as_mut() {
f(d)
}
}
}
impl Default for Parser {
fn default() -> Self { Self { radix: 16, on_label: None, on_comment: None } }
}
impl Debug for Parser {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive()
}
}

15
src/parser/comment.rs Normal file
View File

@@ -0,0 +1,15 @@
// © 2023 John Breaux
use super::*;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Comment(pub String);
impl Parsable for Comment {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> {
let token = stream.expect(Type::Comment)?;
Ok(Self(token.lexeme().to_string()))
}
}
impl Display for Comment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) }
}

32
src/parser/directive.rs Normal file
View File

@@ -0,0 +1,32 @@
// © 2023 John Breaux
//! A [Directive] issues commands directly to the [Tokenizer](crate::Tokenizer) and
//! [Linker](crate::Linker)
use super::*;
use crate::hash::FromHash;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Directive(pub Hash, pub String);
impl Directive {
fn str<S: ToString>(mut self, s: S) -> Self {
self.1 = s.to_string();
self
}
}
impl From<Hash> for Directive {
fn from(value: Hash) -> Self { Self(value, String::new()) }
}
impl Parsable for Directive {
fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> {
// expect a directive
let d = stream.expect(Type::Directive)?;
// send the directive to the listener
Ok(Self::from_hash(d.lexeme()).str(d.lexeme()))
}
}
impl Display for Directive {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.1) }
}

34
src/parser/identifier.rs Normal file
View File

@@ -0,0 +1,34 @@
// © 2023 John Breaux
use super::*;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Identifier {
Hash(Hash),
Str(String),
}
impl Identifier {
fn str<T: AsRef<str>>(s: T) -> Self { Self::Str(s.as_ref().into()) }
}
impl From<Hash> for Identifier {
fn from(value: Hash) -> Self { Self::Hash(value) }
}
impl Parsable for Identifier {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> {
let token = stream.expect(Type::Identifier)?;
match token.variant() {
Type::Identifier => Ok(Self::str(token.lexeme())),
_ => unreachable!("Expected Identifier, got {token:?}"),
}
}
}
impl Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Identifier::Hash(_) => Display::fmt("Unresolved", f),
Identifier::Str(s) => Display::fmt(s, f),
}
}
}

67
src/parser/instruction.rs Normal file
View File

@@ -0,0 +1,67 @@
// © 2023 John Breaux
//! An [Instruction] contains the [Opcode] and [Encoding] information for a single msp430
//! instruction
//!
//!
//! Note: [Opcode] and [Encoding] are very tightly coupled, because they represent interdependent parts
//! of the same instruction. This is why [Opcode]::resolve() returns an [EncodingParser] -- otherwise,
//! there's an explosion of states that I can't really cope with on my own. Really, there's about 9
//! valid classes of instruction, some of which are only used for one or two of the MSP430's
//! instructions.
use super::*;
pub mod encoding;
pub mod opcode;
/// Represents an entire MSP430 instruction
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Instruction(Opcode, Encoding);
impl Instruction {
pub fn opcode(&self) -> &Opcode { &self.0 }
pub fn encoding(&self) -> &Encoding { &self.1 }
/// Gets the Instruction as a [u16]
pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() }
/// Gets the [extension words]
pub fn ext_words(&self) -> (Option<u16>, Option<u16>) { self.1.extwords() }
}
impl Parsable for Instruction {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where
Self: Sized,
T: crate::TokenStream<'text>,
{
// parse an opcode
let insn = stream.expect(Type::Insn)?;
let opcode: Opcode = insn.parse()?;
// resolve the opcode to a final opcode and an encoding
let (opcode, encoding) = opcode.resolve();
// parse the encoding
let encoding = encoding.parse(p, stream)?;
Ok(Self(opcode, encoding))
}
}
impl From<Instruction> for u16 {
fn from(value: Instruction) -> Self { value.word() }
}
impl Display for Instruction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) }
}
impl LowerHex for Instruction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (word, (ext_src, ext_dst)) = (self.word(), self.ext_words());
write!(f, "{:04x} ", word.swap_bytes())?;
if let Some(e) = ext_src {
write!(f, "{:04x} ", e.swap_bytes())?
}
if let Some(e) = ext_dst {
write!(f, "{:04x} ", e.swap_bytes())?
}
Ok(())
}
}

View File

@@ -0,0 +1,81 @@
// © 2023 John Breaux
//! An [Encoding] represents the set of arguments for the [msp430's instructions](Opcode)
use super::*;
pub mod number;
pub mod register;
pub mod width;
pub mod jump_target;
pub mod primary_operand;
pub mod secondary_operand;
mod builder;
pub mod encoding_parser;
use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder};
use encoding_parser::EncodingParser;
/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html)
///
/// # Examples
/// ```rust
/// use msp430_asm::{*, parser::{Encoding, EncodingParser}};
/// // Create a token sequence
/// let asm_file = r".b 8000(r15)";
/// // Create a single-operand encoding parser
/// let single: EncodingParser = Encoding::single().end();
/// // Parse an Encoding from it
/// let encoding: Encoding = single
/// .parse(&Parser::default(), &mut Tokenizer::new(asm_file).ignore_spaces())
/// .unwrap();
/// // Print the Encoding
/// println!("{encoding}");
/// ```
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Encoding {
Single { width: Width, dst: PrimaryOperand },
Jump { target: JumpTarget },
Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand },
}
impl Encoding {
/// Returns a builder for [Encoding::Single]
pub fn single() -> SingleBuilder { Default::default() }
/// Returns a builder for [Encoding::Jump]
pub fn jump() -> JumpBuilder { Default::default() }
/// Returns a builder for [Encoding::Double]
pub fn double() -> DoubleBuilder { Default::default() }
/// Returns a builder for [Encoding::Double]
///
/// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and
/// dst are the same
pub fn reflexive() -> ReflexiveBuilder { Default::default() }
///
pub fn word(&self) -> u16 {
match *self {
Encoding::Single { width, dst } => u16::from(width) | dst.mode() | dst.register() as u16,
Encoding::Jump { target } => target.word(),
Encoding::Double { width, src, dst } => {
u16::from(width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8)
}
}
}
/// Returns extwords for instruction
pub fn extwords(&self) -> (Option<u16>, Option<u16>) {
match self {
Encoding::Double { src, dst, .. } => (src.ext_word(), dst.ext_word()),
Encoding::Single { dst, .. } => (dst.ext_word(), None),
Encoding::Jump { .. } => (None, None),
}
}
}
impl Display for Encoding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Encoding::Single { width, dst } => write!(f, "{width} {dst}"),
Encoding::Jump { target } => write!(f, " {target}"),
Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"),
}
}
}

View File

@@ -0,0 +1,76 @@
// © 2023 John Breaux
//! Builder API for [EncodingParser]
use super::*;
#[derive(Debug, Default)]
pub struct SingleBuilder {
width: Option<Width>,
dst: Option<PrimaryOperand>,
}
impl SingleBuilder {
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
/// Sets the [PrimaryOperand] field
pub fn operand(mut self, dst: PrimaryOperand) -> Self {
self.dst = Some(dst);
self
}
/// Build
pub fn end(&self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } }
}
#[derive(Debug, Default)]
pub struct JumpBuilder {
target: Option<JumpTarget>,
}
impl JumpBuilder {
pub fn target(mut self, target: JumpTarget) -> Self {
self.target = Some(target);
self
}
pub fn end(&self) -> EncodingParser { EncodingParser::Jump { target: self.target } }
}
#[derive(Debug, Default)]
pub struct DoubleBuilder {
width: Option<Width>,
src: Option<PrimaryOperand>,
dst: Option<SecondaryOperand>,
}
impl DoubleBuilder {
/// Sets the [Width] field
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
/// Sets the [PrimaryOperand] field
pub fn src(mut self, src: PrimaryOperand) -> Self {
self.src = Some(src);
self
}
/// Sets the [PrimaryOperand] field
pub fn dst(mut self, dst: SecondaryOperand) -> Self {
self.dst = Some(dst);
self
}
pub fn end(&self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } }
}
#[derive(Debug, Default)]
pub struct ReflexiveBuilder {
width: Option<Width>,
reg: Option<SecondaryOperand>,
}
impl ReflexiveBuilder {
/// Sets the [Width] field
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
pub fn reg(mut self, reg: SecondaryOperand) -> Self {
self.reg = Some(reg);
self
}
pub fn end(&self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } }
}

View File

@@ -0,0 +1,39 @@
// © 2023 John Breaux
//! An [EncodingParser] builds an [Encoding] from a [TokenStream]
use super::*;
#[derive(Debug)]
/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream]
pub enum EncodingParser {
Single { width: Option<Width>, dst: Option<PrimaryOperand> },
Jump { target: Option<JumpTarget> },
Double { width: Option<Width>, src: Option<PrimaryOperand>, dst: Option<SecondaryOperand> },
Reflexive { width: Option<Width>, reg: Option<SecondaryOperand> },
}
impl EncodingParser {
/// Constructs an [Encoding] from this [EncodingParser], filling holes
/// with the tokenstream
pub fn parse<'text, T>(&self, p: &Parser, stream: &mut T) -> Result<Encoding, Error>
where T: crate::TokenStream<'text> {
Ok(match self {
Self::Single { width, dst } => {
let width = width.unwrap_or_else(|| Width::parse_or_default(p, stream));
let dst = if let Some(dst) = dst { *dst } else { PrimaryOperand::parse(p, stream)? };
Encoding::Single { width, dst }
}
Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) },
Self::Double { width, src, dst } => {
let width = width.unwrap_or_else(|| Width::parse_or_default(p, stream));
let src = if let Some(src) = src { *src } else { PrimaryOperand::parse(p, stream)? };
let dst = if let Some(dst) = dst { *dst } else { SecondaryOperand::parse(p, stream)? };
Encoding::Double { width, src, dst }
}
Self::Reflexive { width, reg } => {
let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default());
let reg = if let Some(reg) = reg { *reg } else { SecondaryOperand::parse(p, stream)? };
Encoding::Double { width, src: reg.into(), dst: reg }
}
})
}
}

View File

@@ -0,0 +1,39 @@
// © 2023 John Breaux
//! A [JumpTarget] contains the [pc-relative offset](Number) or [Identifier]
//! for a [Jump instruction encoding](Encoding::Jump)
use super::*;
/// The target of a [Jump](Encoding::Jump)
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct JumpTarget(Number);
impl JumpTarget {
pub fn word(&self) -> u16 { u16::from(self.0) & 0x3ff }
}
impl Parsable for JumpTarget {
/// - Identifier
/// - Number
/// - Negative
/// - Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: crate::TokenStream<'text> {
// Try to parse a number
let target = Number::parse(p, stream)?;
match target.into() {
i if i % 2 != 0 => Err(Error::JumpedOdd(i).context(stream.context()))?,
i if (-1024..=1022).contains(&(i - 2)) => Ok(Self((target - 2) >> 1)),
i => Err(Error::JumpedTooFar(i).context(stream.context()))?,
}
}
}
impl From<JumpTarget> for u16 {
fn from(value: JumpTarget) -> Self { value.0.into() }
}
impl Display for JumpTarget {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", (1 + isize::from(self.0)) << 1)
}
}

View File

@@ -0,0 +1,75 @@
// © 2023 John Breaux
//! A [Number] represents a 16-bit signed or unsigned word
use super::*;
// TODO: Allow identifiers/expressions in place of numbers
// - Dependency inversion in TokenStream to allow swapping the parser mid-parse?
// - Oh my god, not relying on std::iter::Iterator allows for so many more parsing options
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Number(isize, u32); // (value, radix)
impl Parsable for Number {
// A number is:
// RadixMarker[Hex|Oct|Bin]?
// - Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> {
use Type::*;
let negative = stream.expect(Minus).is_ok();
let radix = match stream
.expect_any_of([RadixMarkerHex, RadixMarkerDec, RadixMarkerOct, RadixMarkerBin])
.ok()
.map(|t| t.variant())
{
Some(RadixMarkerHex) => 16,
Some(RadixMarkerDec) => 10,
Some(RadixMarkerOct) => 8,
Some(RadixMarkerBin) => 2,
_ => p.radix,
};
let number = stream.expect(Number)?;
let number = isize::from_str_radix(number.lexeme(), radix)
.map_err(|_| Error::UnexpectedDigits(number.lexeme().into(), radix).context(stream.context()))?
* if negative { -1 } else { 1 };
// Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit)
Ok(Self(
if (-0x8000..0x10000).contains(&number) {
number
} else {
Err(Error::NumberTooWide(number).context(stream.context()))?
},
radix,
))
}
}
impl From<Number> for isize {
fn from(value: Number) -> Self { value.0 as Self }
}
impl From<Number> for i32 {
fn from(value: Number) -> Self { value.0 as Self }
}
impl From<Number> for u16 {
/// Converts this type from the input type.
fn from(value: Number) -> Self { value.0 as Self }
}
impl std::ops::Sub<isize> for Number {
type Output = Self;
fn sub(mut self, rhs: isize) -> Self::Output {
self.0 -= rhs;
self
}
}
impl std::ops::Shr<usize> for Number {
type Output = Self;
fn shr(mut self, rhs: usize) -> Self::Output {
self.0 >>= rhs;
self
}
}
impl std::fmt::Display for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:x}", self.0) }
}

View File

@@ -0,0 +1,141 @@
// © 2023 John Breaux
//! A [PrimaryOperand] contains the first [Register], addressing mode, and Extension
//! Word for an [instruction](Instruction)
use super::*;
/// The Source of a [Double](Encoding::Double) or Destination of a
/// [Single](Encoding::Single)
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum PrimaryOperand {
Direct(Register),
Indirect(Register),
PostInc(Register),
Indexed(Register, Number),
Absolute(Number),
Immediate(Number),
Four,
Eight,
Zero,
One,
Two,
MinusOne,
}
impl PrimaryOperand {
/// Returns the mode bits
pub fn mode(&self) -> u16 {
use PrimaryOperand::*;
match self {
Direct(_) | Zero => 0,
Indexed(_, _) | Absolute(_) | One => 1 << 4,
Indirect(_) | Two | Four => 2 << 4,
PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4,
}
}
/// Gets the register
pub fn register(&self) -> Register {
use PrimaryOperand::*;
match self {
Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r,
Immediate(_) => Register::pc,
Absolute(_) | Four | Eight => Register::sr,
Zero | One | Two | MinusOne => Register::cg,
}
}
/// Gets the extension word, if present
pub fn ext_word(&self) -> Option<u16> {
use PrimaryOperand::*;
match self {
Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()),
_ => None,
}
}
}
impl Parsable for PrimaryOperand {
// - Register
// - Indirect
// - Register
// - PostInc?
// - Number
// - OpenIdx
// - Register
// - CloseIdx
// - Absolute
// - Number
// - Immediate
// - Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: crate::TokenStream<'text> {
use PrimaryOperand::*;
// Try parsing as Register Direct
if let Some(r) = Register::try_parse(p, stream)? {
return Ok(Self::Direct(r));
}
// Try parsing as Number Indexed
if let Some(idx) = Number::try_parse(p, stream)? {
stream.expect(Type::LParen)?;
let reg = Register::parse(p, stream)?;
stream.expect(Type::RParen)?;
return Ok(Self::Indexed(reg, idx));
}
// Or directly match any of the valid prefix markers
let token = stream.expect_any_of([Type::Indirect, Type::Absolute, Type::Immediate])?;
Ok(match token.variant() {
Type::Indirect => {
let reg = stream.expect(Type::Register)?.parse()?;
match stream.expect(Type::Plus) {
Ok(_) => PostInc(reg),
Err(_) => Indirect(reg),
}
}
Type::Absolute => Absolute(Number::parse(p, stream)?),
Type::Immediate => {
let number = Number::parse(p, stream)?;
match number.into() {
// There are two representations for the all-ones constant, since Number preserves absolute
// signedness.
-1 | 0xffff => MinusOne,
0 => Zero,
1 => One,
2 => Two,
4 => Four,
8 => Eight,
_ => Immediate(number),
}
}
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
})
}
}
impl From<SecondaryOperand> for PrimaryOperand {
fn from(value: SecondaryOperand) -> Self {
match value {
SecondaryOperand::Direct(r) => Self::Direct(r),
SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n),
SecondaryOperand::Absolute(n) => Self::Absolute(n),
SecondaryOperand::Zero => Self::Zero,
SecondaryOperand::One => Self::One,
}
}
}
impl Display for PrimaryOperand {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Direct(r) => write!(f, "{r}"),
Self::Indirect(r) => write!(f, "@{r}"),
Self::PostInc(r) => write!(f, "@{r}+"),
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
Self::Absolute(n) => write!(f, "&{n}"),
Self::Immediate(n) => write!(f, "#{n}"),
Self::Four => write!(f, "#4"),
Self::Eight => write!(f, "#8"),
Self::Zero => write!(f, "#0"),
Self::One => write!(f, "#1"),
Self::Two => write!(f, "#2"),
Self::MinusOne => write!(f, "#-1"),
}
}
}

View File

@@ -0,0 +1,111 @@
// © 2023 John Breaux
//! A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
use super::*;
use std::str::FromStr;
/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Register {
/// Program Counter
pc,
/// Stack Pointer
sp,
/// Status Register
sr,
/// Constant Generator
cg,
r4,
r5,
r6,
r7,
r8,
r9,
r10,
r11,
r12,
r13,
r14,
r15,
}
impl Parsable for Register {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, Error>
where T: crate::TokenStream<'text> {
stream.expect(Type::Register).map_err(|e| e.context(stream.context()))?.lexeme().parse()
}
}
impl From<Register> for u16 {
fn from(value: Register) -> Self { value as u16 }
}
impl TryFrom<u16> for Register {
type Error = Error;
fn try_from(value: u16) -> Result<Self, Self::Error> {
use Register::*;
Ok(match value {
0 => pc,
1 => sp,
2 => sr,
3 => cg,
4 => r4,
5 => r5,
6 => r6,
7 => r7,
8 => r8,
9 => r9,
10 => r10,
11 => r11,
12 => r12,
13 => r13,
14 => r14,
15 => r15,
_ => return Err(Error::RegisterTooHigh(value)),
})
}
}
impl FromStr for Register {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use Register::*;
match s {
"pc" => Ok(pc),
"sp" => Ok(sp),
"sr" => Ok(sr),
"cg" => Ok(cg),
_ => str::parse::<u16>(&s[1..]).map_err(|_| -> Self::Err { Error::NotARegister(s.into()) })?.try_into(),
}
}
}
impl From<Register> for &str {
fn from(value: Register) -> Self {
use Register::*;
match value {
pc => "pc",
sp => "sp",
sr => "sr",
cg => "cg",
r4 => "r4",
r5 => "r5",
r6 => "r6",
r7 => "r7",
r8 => "r8",
r9 => "r9",
r10 => "r10",
r11 => "r11",
r12 => "r12",
r13 => "r13",
r14 => "r14",
r15 => "r15",
}
}
}
impl std::fmt::Display for Register {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) }
}

View File

@@ -0,0 +1,95 @@
// © 2023 John Breaux
//! A [SecondaryOperand] contains the second [Register], addressing mode, and Extension
//! Word for a [two-operand](Encoding::Double) [instruction](Instruction)
use super::*;
/// The destination of a [Double](Encoding::Double)
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SecondaryOperand {
Direct(Register),
Indexed(Register, Number),
Absolute(Number),
// Joke encodings?
Zero,
One,
}
impl SecondaryOperand {
pub fn mode(&self) -> u16 {
use SecondaryOperand::*;
match self {
Direct(_) | Zero => 0,
Indexed(_, _) | Absolute(_) | One => 1 << 7,
}
}
pub fn register(&self) -> Register {
use SecondaryOperand::*;
match self {
Direct(r) | Indexed(r, _) => *r,
Absolute(_) => Register::sr,
Zero | One => Register::cg,
}
}
/// This is the only way to have an extension word
pub fn ext_word(&self) -> Option<u16> {
use SecondaryOperand::*;
match self {
Indexed(_, w) | Absolute(w) => Some((*w).into()),
_ => None,
}
}
}
impl Parsable for SecondaryOperand {
/// Separator
/// - Register => Direct
/// - Number => Indexed
/// - OpenIdx
/// - Register
/// - CloseIdx
/// - Absolute
/// - Number
/// - Immediate
/// - Number == 0, 1
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, crate::Error>
where T: crate::TokenStream<'text> {
use SecondaryOperand::*;
stream.allow(Type::Separator);
// Try parsing as Register Direct
if let Some(r) = Register::try_parse(p, stream)? {
return Ok(Self::Direct(r));
}
// Try parsing as Number Indexed
if let Some(idx) = Number::try_parse(p, stream)? {
stream.expect(Type::LParen)?;
let reg = Register::parse(p, stream)?;
stream.expect(Type::RParen)?;
return Ok(Self::Indexed(reg, idx));
}
let token = stream.expect_any_of([Type::Absolute, Type::Immediate])?;
Ok(match token.variant() {
Type::Absolute => Absolute(Number::parse(p, stream)?),
Type::Immediate => {
let number = Number::parse(p, stream)?;
match number.into() {
0 => Zero,
1 => One,
n => Err(Error::FatSecondaryImmediate(n as isize).context(stream.context()))?,
}
}
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
})
}
}
impl Display for SecondaryOperand {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Direct(r) => write!(f, "{r}"),
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
Self::Absolute(n) => write!(f, "&{n}"),
Self::Zero => write!(f, "#0"),
Self::One => write!(f, "#1"),
}
}
}

View File

@@ -0,0 +1,31 @@
// © 2023 John Breaux
use super::*;
/// Represents an instruction's operand width.
///
/// Evaluates to false when instruction takes word-sized operands, or true when
/// instruction takes byte-sized operands
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Width(bool);
impl Parsable for Width {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> {
let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else {
return Ok(Self(false));
};
Ok(Self(token.is_variant(Type::ByteWidth)))
}
}
impl From<Width> for u16 {
fn from(value: Width) -> Self { (value.0 as Self) << 6 }
}
impl From<Width> for bool {
fn from(value: Width) -> Self { value.0 }
}
impl From<bool> for Width {
fn from(value: bool) -> Self { Width(value) }
}
impl std::fmt::Display for Width {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) }
}

View File

@@ -0,0 +1,258 @@
// © 2023 John Breaux
//! An [Opcode] encodes an msp430 operation
use super::*;
use std::str::FromStr;
/// Opcode from the [MSPGCC Manual][1]
///
/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will
/// extract from a [TokenStream] only the required arguments for that call.
///
/// [1]: https://mspgcc.sourceforge.net/manual/x223.html
#[allow(clippy::identity_op)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Opcode {
// "Emulated" opcodes
Nop,
Pop,
Br,
Ret,
Clrc,
Setc,
Clrz,
Setz,
Clrn,
Setn,
Dint,
Eint,
Rla,
Rlc,
Inv,
Clr,
Tst,
Dec,
Decd,
Inc,
Incd,
Adc,
Dadc,
Sbc,
// Single
Rrc = 0x1000 | 0 << 7,
Swpb = 0x1000 | 1 << 7,
Rra = 0x1000 | 2 << 7,
Sxt = 0x1000 | 3 << 7,
Push = 0x1000 | 4 << 7,
Call = 0x1000 | 5 << 7,
Reti = 0x1000 | 6 << 7,
// Jump
Jnz = 0x2000 | 0 << 10,
Jz = 0x2000 | 1 << 10,
Jnc = 0x2000 | 2 << 10,
Jc = 0x2000 | 3 << 10,
Jn = 0x2000 | 4 << 10,
Jge = 0x2000 | 5 << 10,
Jl = 0x2000 | 6 << 10,
Jmp = 0x2000 | 7 << 10,
// Double
Mov = 0x4000,
Add = 0x5000,
Addc = 0x6000,
Subc = 0x7000,
Sub = 0x8000,
Cmp = 0x9000,
Dadd = 0xa000,
Bit = 0xb000,
Bic = 0xc000,
Bis = 0xd000,
Xor = 0xe000,
And = 0xf000,
}
impl Opcode {
pub fn takes_width(&self) -> bool {
use Opcode::*;
match self {
Rrc => true,
Swpb => false,
Rra => true,
Sxt => false,
Push => true,
Call | Reti => false,
Jnz | Jz | Jnc | Jc | Jn | Jge | Jl | Jmp => false,
Mov | Add | Addc | Subc | Sub | Cmp | Dadd | Bit | Bic | Bis | Xor | And => true,
Nop | Pop | Br | Ret | Clrc | Setc | Clrz | Setz | Clrn | Setn | Dint | Eint | Rla | Rlc | Inv | Clr
| Tst | Dec | Decd | Inc | Incd | Adc | Dadc | Sbc => true,
}
}
/// Resolve an Opcode into an [Opcode] and an [EncodingParser]
pub fn resolve(self) -> (Opcode, EncodingParser) {
use super::Encoding as Enc;
use Opcode::*;
use Register::*;
use {PrimaryOperand as Src, SecondaryOperand as Dst};
match self {
Rrc | Swpb | Rra | Sxt | Push | Call | Reti => (self, Enc::single().end()),
Jnz | Jz | Jnc | Jc | Jn | Jge | Jl | Jmp => (self, Enc::jump().end()),
Mov | Add | Addc | Subc | Sub | Cmp | Dadd | Bit | Bic | Bis | Xor | And => (self, Enc::double().end()),
Nop => (Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()),
Pop => (Mov, Enc::double().src(Src::PostInc(sp)).end()),
Br => (Mov, Enc::double().dst(Dst::Direct(pc)).end()),
Ret => (Mov, Enc::double().src(Src::PostInc(sp)).dst(Dst::Direct(pc)).end()),
Clrc => (Bic, Enc::double().src(Src::One).dst(Dst::Direct(sr)).end()),
Setc => (Bis, Enc::double().src(Src::One).dst(Dst::Direct(sr)).end()),
Clrz => (Bic, Enc::double().src(Src::Two).dst(Dst::Direct(sr)).end()),
Setz => (Bis, Enc::double().src(Src::Two).dst(Dst::Direct(sr)).end()),
Clrn => (Bic, Enc::double().src(Src::Four).dst(Dst::Direct(sr)).end()),
Setn => (Bis, Enc::double().src(Src::Four).dst(Dst::Direct(sr)).end()),
Dint => (Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(sr)).end()),
Eint => (Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(sr)).end()),
Rla => (Add, Enc::reflexive().end()),
Rlc => (Addc, Enc::reflexive().end()),
Inv => (Xor, Enc::double().src(Src::MinusOne).end()),
Clr => (Mov, Enc::double().src(Src::Zero).end()),
Tst => (Cmp, Enc::double().src(Src::Zero).end()),
Dec => (Sub, Enc::double().src(Src::One).end()),
Decd => (Sub, Enc::double().src(Src::Two).end()),
Inc => (Add, Enc::double().src(Src::One).end()),
Incd => (Add, Enc::double().src(Src::Two).end()),
Adc => (Addc, Enc::double().src(Src::Zero).end()),
Dadc => (Dadd, Enc::double().src(Src::Zero).end()),
Sbc => (Subc, Enc::double().src(Src::Zero).end()),
}
}
}
impl FromStr for Opcode {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use Opcode::*;
//TODO: Reduce allocations here
let s = s.to_ascii_lowercase();
Ok(match s.as_str() {
"rrc" => Rrc,
"swpb" => Swpb,
"rra" => Rra,
"sxt" => Sxt,
"push" => Push,
"call" => Call,
"reti" => Reti,
"jne" | "jnz" => Jnz,
"jeq" | "jz" => Jz,
"jnc" | "jlo" => Jnc,
"jc" | "jhs" => Jc,
"jn" => Jn,
"jge" => Jge,
"jl" => Jl,
"jmp" => Jmp,
"mov" => Mov,
"add" => Add,
"addc" => Addc,
"subc" => Subc,
"sub" => Sub,
"cmp" => Cmp,
"dadd" => Dadd,
"bit" => Bit,
"bic" => Bic,
"bis" => Bis,
"xor" => Xor,
"and" => And,
"nop" => Nop,
"pop" => Pop,
"br" => Br,
"ret" => Ret,
"clrc" => Clrc,
"setc" => Setc,
"clrz" => Clrz,
"setz" => Setz,
"clrn" => Clrn,
"setn" => Setn,
"dint" => Dint,
"eint" => Eint,
"rla" => Rla,
"rlc" => Rlc,
"inv" => Inv,
"clr" => Clr,
"tst" => Tst,
"dec" => Dec,
"decd" => Decd,
"inc" => Inc,
"incd" => Incd,
"adc" => Adc,
"dadc" => Dadc,
"sbc" => Sbc,
_ => Err(Error::UnrecognizedOpcode(s))?,
})
}
}
impl Display for Opcode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Opcode::*;
write!(
f,
"{}",
match self {
Nop => "nop",
Pop => "pop",
Br => "br",
Ret => "ret",
Clrc => "clrc",
Setc => "setc",
Clrz => "clrz",
Setz => "setz",
Clrn => "clrn",
Setn => "setn",
Dint => "dint",
Eint => "eint",
Rla => "rla",
Rlc => "rlc",
Inv => "inv",
Clr => "clr",
Tst => "tst",
Dec => "dec",
Decd => "decd",
Inc => "inc",
Incd => "incd",
Adc => "adc",
Dadc => "dadc",
Sbc => "sbc",
Rrc => "rrc",
Swpb => "swpb",
Rra => "rra",
Sxt => "sxt",
Push => "push",
Call => "call",
Reti => "reti",
Jnz => "jnz",
Jz => "jz",
Jnc => "jnc",
Jc => "jc",
Jn => "jn",
Jge => "jge",
Jl => "jl",
Jmp => "jmp",
Mov => "mov",
Add => "add",
Addc => "addc",
Subc => "subc",
Sub => "sub",
Cmp => "cmp",
Dadd => "dadd",
Bit => "bit",
Bic => "bic",
Bis => "bis",
Xor => "xor",
And => "and",
}
)
}
}
impl LowerHex for Opcode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:04x}", *self as u16) }
}

16
src/parser/label.rs Normal file
View File

@@ -0,0 +1,16 @@
// © 2023 John Breaux
use super::*;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Label(pub Identifier);
impl Parsable for Label {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where T: TokenStream<'text> {
Ok(Self(Identifier::parse(p, stream).and_then(|t| stream.require(Type::Label).and(Ok(t)))?))
}
}
impl Display for Label {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) }
}

44
src/parser/parsable.rs Normal file
View File

@@ -0,0 +1,44 @@
// © 2023 John Breaux
use super::*;
/// Parses tokens from [stream](TokenStream) into Self node
pub trait Parsable {
/// Parses tokens from [TokenStream](TokenStream) into Self nodes
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, Error>
where
Self: Sized,
T: TokenStream<'text>;
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
///
/// Masks failed expectations.
fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, Error>
where
Self: Sized,
T: TokenStream<'text>,
{
match Self::parse(p, stream).map_err(|e| e.bare()) {
Ok(tt) => Ok(Some(tt)),
Err(Error::UnexpectedToken { .. }) | Err(Error::AllExpectationsFailed { .. }) => Ok(None),
Err(e) => Err(e),
}
}
fn parse_and<'text, T, R>(p: &Parser, stream: &mut T, f: fn(p: &Parser, &mut T) -> R) -> Result<(Self, R), Error>
where
Self: Sized,
T: TokenStream<'text>,
{
Ok((Self::parse(p, stream)?, f(p, stream)))
}
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
///
/// Returns [`Self::default()`](Default::default()) on error
fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self
where
Self: Sized + Default,
T: TokenStream<'text>,
{
Self::parse(p, stream).unwrap_or_default()
}
}

193
src/tokenizer.rs Normal file
View File

@@ -0,0 +1,193 @@
// © 2023 John Breaux
//! Iterates over &[str], producing [Token]s
// Things we need:
// ✔ 1. Lexer/Tokenizer
// ✔ 1. Instructions
// ✔ 1. Instruction mnemonics /ad.../
// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/
// ✔ 2. Src operands
// ✔ 1. Registers /(r1[0-5]|r[0-9])/
// ✔ 2. Immediate Values /#/
// ✔ 3. Absolute addresses /&/
// ✔ 4. Numbers /[0-9A-Fa-f]+
// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/
// ✔ 4. Label definitions /(^.*):/
// ✔ 5. Comments (may be useful for debugging)
pub mod context;
pub mod token;
use crate::Error;
use context::Context;
use token::{Token, Type};
/// Backtracking through bifurcated timelines
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> {
/// Gets this stream's [Context]
fn context(&self) -> Context;
/// Creates an iterator that skips [Type::Space] in the input
fn ignore_spaces(&'text mut self) -> IgnoreSpaces<'text, Self>
where Self: Sized {
IgnoreSpaces::new(self)
}
/// Returns the next [Token] without advancing
fn peek(&mut self) -> Self::Item;
/// Returns the next [Token] if it is of the expected [Type], without advancing
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
/// Consumes and returns a [Token] if it is the expected [Type]
///
/// Otherwise, does not consume a [Token]
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error>;
/// Ignores a [Token] of the expected [Type], propegating errors.
fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) }
/// Ignores a [Token] of the expected [Type], discarding errors.
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
/// Runs a functor on each
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, Error>, expected: T) -> Result<U, Error>
where T: AsRef<[Type]> {
for &expected in expected.as_ref() {
match f(self, expected).map_err(|e| e.bare()) {
Ok(t) => return Ok(t),
Err(Error::UnexpectedToken { .. }) => continue,
Err(e) => return Err(e.context(self.context())),
}
}
Err(Error::expected(expected, self.peek()).context(self.context()))
}
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
where T: AsRef<[Type]> {
self.any_of(Self::peek_expect, expected)
}
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
///
/// Otherwise, does not consume a [Token]
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, Error>
where T: AsRef<[Type]> {
self.any_of(Self::expect, expected)
}
/// Ignores a [Token] of any expected [Type], discarding errors.
fn allow_any_of<T>(&mut self, expected: T)
where T: AsRef<[Type]> {
let _ = self.expect_any_of(expected);
}
/// Ignores a [Token] of any expected [Type], propegating errors.
fn require_any_of<T>(&mut self, expected: T) -> Result<(), Error>
where T: AsRef<[Type]> {
self.any_of(Self::require, expected)
}
}
/// Iterates over &[str], producing [Token]s
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Tokenizer<'t> {
text: &'t str,
idx: usize,
context: Context,
}
impl<'t> Tokenizer<'t> {
/// Produces a new [Tokenizer] from a [str]ing slice
pub fn new<T>(text: &'t T) -> Self
where T: AsRef<str> + ?Sized {
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
}
fn count(&mut self, token: &Token) {
// update the context
self.context.count(token);
// advance the index
self.idx += token.len();
}
}
impl<'text> Iterator for Tokenizer<'text> {
type Item = Token<'text>;
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.text.len() {
return None;
}
let token = Token::from(&self.text[self.idx..]);
// Process [Type::Directive]s
self.count(&token);
Some(token)
}
}
impl<'text> TokenStream<'text> for Tokenizer<'text> {
fn context(&self) -> Context { self.context }
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
// registers are valid identifiers.
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
self.count(&token);
Ok(token)
}
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
Token::expect(&self.text[self.idx..], expected)
}
}
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
inner: &'t mut T,
}
impl<'t, T> IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
pub fn new(t: &'t mut T) -> Self { IgnoreSpaces { inner: t } }
/// Gets a mutable reference to the inner [Iterator]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
}
impl<'t, T> Iterator for IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.inner.next()?;
// Space tokens are greedy, so the next token shouldn't be a Space
match next.variant() {
Type::Space => self.next(),
_ => Some(next),
}
}
}
impl<'t, T> TokenStream<'t> for IgnoreSpaces<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.expect(expected)
}
fn peek(&mut self) -> Self::Item {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.peek()
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, Error> {
self.inner.allow_any_of([Type::Space, Type::Endl]);
self.inner.peek_expect(expected)
}
}

36
src/tokenizer/context.rs Normal file
View File

@@ -0,0 +1,36 @@
//! Stores contextual information about the current tokenizer state, useful for printing errors
use super::*;
/// Stores contextual information about the current tokenizer state, useful for printing errors
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Context {
line: usize,
tokens: usize,
position: usize,
}
impl Context {
pub fn new() -> Self { Default::default() }
pub fn line(&self) -> usize { self.line }
pub fn tokens(&self) -> usize { self.tokens }
pub fn position(&self) -> usize { self.position }
pub(super) fn count(&mut self, t: &Token) {
match t.variant() {
Type::EndOfFile => return,
Type::Endl => {
self.line += 1;
self.position = 0;
}
_ => self.position += t.len(),
}
self.tokens += 1;
}
}
impl Default for Context {
fn default() -> Self { Self { line: 1, tokens: 0, position: 0 } }
}
impl std::fmt::Display for Context {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.line, self.position)
}
}

309
src/tokenizer/token.rs Normal file
View File

@@ -0,0 +1,309 @@
// © 2023 John Breaux
//! Defines the [Token]
//!
//! A [Token] represents all valid sequences of characters,
//! sorted by meaning
use regex::Regex;
use std::{
fmt::{Debug, Display},
sync::OnceLock,
};
/// Implements regex matching functions on [`Token`] for each [`Type`],
/// and implements [`From<&str>`] for [`Token`]
macro_rules! regex_impl {
(<$t:lifetime> $type:ty {$(
$(#[$meta:meta])*
pub fn $func:ident (text: &str) -> Option<Self> {
regex!($out:path = $re:literal)
}
)*}) => {
impl<$t> $type {
/// Lexes a token only for the expected `variant`
///
/// Warning: This bypasses precedence rules. Only use for specific patterns.
pub fn expect(text: &$t str, expected: Type) -> Result<Self, Error> {
match expected {$(
$out => Self::$func(text),
)*}.ok_or(Error::UnexpectedToken {
expected,
got: Self::from(text).into(),
})
}
$(
$(#[$meta])*
/// Tries to read [`
#[doc = stringify!($out)]
/// `] from `text`
pub fn $func(text: &$t str) -> Option<Self> {
static RE: OnceLock<Regex> = OnceLock::new();
let lexeme = RE.get_or_init(|| Regex::new($re).unwrap())
.find(text)?.into();
Some(Self { variant: $out, lexeme })
})*
}
impl<$t> From<&$t str> for $type {
fn from (value: &$t str) -> Self {
$(
if let Some(token) = Self::$func(value) {
token
} else
)*
{todo!("Unexpected input: {value:#?}")}
}
}
};
}
use crate::Error;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'text> {
/// The type of this token
variant: Type,
/// The sub[str]ing corresponding to this token
lexeme: &'text str,
}
impl<'text> Token<'text> {
/// Returns the [Type] of this [Token]
pub fn variant(&self) -> Type { self.variant }
/// Returns the Lexeme (originating string slice) of this token
pub fn lexeme(&self) -> &str { self.lexeme }
/// Parses this [Token] into another type
pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
where F: std::str::FromStr {
self.lexeme.parse()
}
/// Returns whether the Lexeme is the expected [Type]
pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected }
/// Returns the length of [Self::lexeme] in bytes.
pub fn len(&self) -> usize { self.lexeme.len() }
/// Returns `true` if [Self::lexeme] has a length of zero bytes.
pub fn is_empty(&self) -> bool { self.lexeme.is_empty() }
}
impl<'text> Debug for Token<'text> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list().entry(&self.variant).entry(&self.lexeme).finish()
}
}
impl<'text> Display for Token<'text> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.variant {
Type::Endl | Type::EndOfFile => write!(f, "{}", self.variant),
v => write!(f, "\"{}\" ({v})", self.lexeme),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Type {
/// contiguous whitespace, excluding newline
Space,
/// newline and contiguous whitespace
Endl,
/// A line-comment
Comment,
/// Jump label *definition*
Label,
/// Instructions
Insn,
/// Operand width is byte
ByteWidth,
/// Operand width is word
WordWidth,
/// Register mnemonic (i.e. `pc`, `r14`)
Register,
/// Marker for base-10
RadixMarkerDec,
/// Marker for base-16
RadixMarkerHex,
/// Marker for base-8
RadixMarkerOct,
/// Marker for base-2
RadixMarkerBin,
/// 1-4 hexadigit numbers only
Number,
/// Negative number marker
Minus,
/// post-increment mode marker
Plus,
/// Open-Indexed-Mode marker
LParen,
/// Close-Indexed-Mode marker
RParen,
/// Indirect mode marker
Indirect,
/// absolute address marker
Absolute,
/// immediate value marker
Immediate,
/// Valid identifier. Identifiers must start with a Latin alphabetic character or underline
Identifier,
/// Assembler directive
Directive,
/// Separator (comma)
Separator,
/// End of File marker
EndOfFile,
}
regex_impl! {<'text> Token<'text> {
pub fn expect_space(text: &str) -> Option<Self> {
regex!(Type::Space = r"^[\s--\n]+")
}
pub fn expect_endl(text: &str) -> Option<Self> {
regex!(Type::Endl = r"^[\s]+")
}
pub fn expect_comment(text: &str) -> Option<Self> {
regex!(Type::Comment = r"^(;|//).*")
}
pub fn expect_label(text: &str) -> Option<Self> {
regex!(Type::Label = r"^:")
}
pub fn expect_insn(text: &str) -> Option<Self> {
regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)")
}
pub fn expect_byte_width(text: &str) -> Option<Self> {
regex!(Type::ByteWidth = r"(?i)^\.b")
}
pub fn expect_word_width(text: &str) -> Option<Self> {
regex!(Type::WordWidth = r"(?i)^\.w")
}
pub fn expect_register(text: &str) -> Option<Self> {
// old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)")
}
pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerDec = r"(?i)^0d")
}
pub fn expect_radix_marker_hex(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)")
}
pub fn expect_radix_marker_oct(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerOct = r"(?i)^0o")
}
pub fn expect_radix_marker_bin(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerBin = r"(?i)^0b")
}
pub fn expect_number(text: &str) -> Option<Self> {
regex!(Type::Number = r"^+?[[:xdigit:]]{1,5}")
}
pub fn expect_minus(text: &str) -> Option<Self> {
regex!(Type::Minus = r"^-")
}
pub fn expect_plus(text: &str) -> Option<Self> {
regex!(Type::Plus = r"^\+")
}
pub fn expect_open_idx(text: &str) -> Option<Self> {
regex!(Type::LParen = r"^\(")
}
pub fn expect_close_idx(text: &str) -> Option<Self> {
regex!(Type::RParen = r"^\)")
}
pub fn expect_indrect(text: &str) -> Option<Self> {
regex!(Type::Indirect = r"^@")
}
pub fn expect_absolute(text: &str) -> Option<Self> {
regex!(Type::Absolute = r"^&")
}
pub fn expect_immediate(text: &str) -> Option<Self> {
regex!(Type::Immediate = r"^#")
}
pub fn expect_directive(text: &str) -> Option<Self> {
regex!(Type::Directive = r"^\.\w+( .*)?")
}
pub fn expect_identifier(text: &str) -> Option<Self> {
regex!(Type::Identifier = r"^[A-Za-z_]\w+")
}
pub fn expect_separator(text: &str) -> Option<Self> {
regex!(Type::Separator = r"^,")
}
pub fn expect_end_of_file(text: &str) -> Option<Self> {
regex!(Type::EndOfFile = r"^$")
}
}}
impl Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Space => write!(f, "space"),
Self::Endl => write!(f, "newline"),
Self::Comment => write!(f, "comment"),
Self::Label => write!(f, "label definition"),
Self::Insn => write!(f, "instruction mnemonic"),
Self::ByteWidth => write!(f, "byte-width marker"),
Self::WordWidth => write!(f, "word-width marker"),
Self::Register => write!(f, "register mnemonic"),
Self::RadixMarkerDec => write!(f, "decimal radix marker"),
Self::RadixMarkerHex => write!(f, "hexadecimal radix marker"),
Self::RadixMarkerOct => write!(f, "octal radix marker"),
Self::RadixMarkerBin => write!(f, "binary radix marker"),
Self::Number => write!(f, "number"),
Self::Minus => write!(f, "minus sign"),
Self::Plus => write!(f, "plus sign"),
Self::LParen => write!(f, "left parenthesis"),
Self::RParen => write!(f, "right parenthesis"),
Self::Indirect => write!(f, "indirect mode marker"),
Self::Absolute => write!(f, "absolute mode marker"),
Self::Immediate => write!(f, "immediate mode marker"),
Self::Identifier => write!(f, "identifier"),
Self::Directive => write!(f, "directive"),
Self::Separator => write!(f, "comma"),
Self::EndOfFile => write!(f, "EOF"),
}
}
}
/// Owned version of a token, which can outlive its parent buffer
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct OwnedToken {
/// The type of this token
variant: Type,
/// The sub[String] corresponding to this token
lexeme: String,
}
impl Display for OwnedToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) }
}
impl<'t> From<&'t OwnedToken> for Token<'t> {
fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } }
}
impl From<Token<'_>> for OwnedToken {
fn from(value: Token<'_>) -> Self {
let Token { variant, lexeme } = value;
OwnedToken { variant, lexeme: lexeme.to_owned() }
}
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Types(Vec<Type>);
impl<T: AsRef<[Type]>> From<T> for Types {
// TODO: Possibly bad. Check out in rust playground.
fn from(value: T) -> Self { Self(value.as_ref().to_owned()) }
}
impl Display for Types {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (idx, t) in self.0.iter().enumerate() {
write!(f, "{t}")?;
match idx {
i if i < self.0.len() - 2 => write!(f, ", ")?,
i if i < self.0.len() - 1 => write!(f, " or ")?,
_ => (),
}
}
Ok(())
}
}