From a9ee7d3bc94a11023e31fa14defc99ef5d442349 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Sat, 19 Aug 2023 23:02:24 -0500 Subject: [PATCH] msp430-asm: init repo with proof-of-concept code --- .gitignore | 1 + .rustfmt.toml | 14 + Cargo.toml | 12 + src/error.rs | 107 ++++++ src/hash.rs | 17 + src/lib.rs | 21 ++ src/linker.rs | 20 ++ src/main.rs | 104 ++++++ src/parser.rs | 212 ++++++++++++ src/parser/comment.rs | 15 + src/parser/directive.rs | 32 ++ src/parser/identifier.rs | 34 ++ src/parser/instruction.rs | 67 ++++ src/parser/instruction/encoding.rs | 81 +++++ src/parser/instruction/encoding/builder.rs | 76 +++++ .../instruction/encoding/encoding_parser.rs | 39 +++ .../instruction/encoding/jump_target.rs | 39 +++ src/parser/instruction/encoding/number.rs | 75 +++++ .../instruction/encoding/primary_operand.rs | 141 ++++++++ src/parser/instruction/encoding/register.rs | 111 +++++++ .../instruction/encoding/secondary_operand.rs | 95 ++++++ src/parser/instruction/encoding/width.rs | 31 ++ src/parser/instruction/opcode.rs | 258 +++++++++++++++ src/parser/label.rs | 16 + src/parser/parsable.rs | 44 +++ src/tokenizer.rs | 193 +++++++++++ src/tokenizer/context.rs | 36 ++ src/tokenizer/token.rs | 309 ++++++++++++++++++ valid.asm | 260 +++++++++++++++ 29 files changed, 2460 insertions(+) create mode 100644 .gitignore create mode 100644 .rustfmt.toml create mode 100644 Cargo.toml create mode 100644 src/error.rs create mode 100644 src/hash.rs create mode 100644 src/lib.rs create mode 100644 src/linker.rs create mode 100644 src/main.rs create mode 100644 src/parser.rs create mode 100644 src/parser/comment.rs create mode 100644 src/parser/directive.rs create mode 100644 src/parser/identifier.rs create mode 100644 src/parser/instruction.rs create mode 100644 src/parser/instruction/encoding.rs create mode 100644 src/parser/instruction/encoding/builder.rs create mode 100644 src/parser/instruction/encoding/encoding_parser.rs create mode 100644 src/parser/instruction/encoding/jump_target.rs create mode 100644 src/parser/instruction/encoding/number.rs create mode 100644 src/parser/instruction/encoding/primary_operand.rs create mode 100644 src/parser/instruction/encoding/register.rs create mode 100644 src/parser/instruction/encoding/secondary_operand.rs create mode 100644 src/parser/instruction/encoding/width.rs create mode 100644 src/parser/instruction/opcode.rs create mode 100644 src/parser/label.rs create mode 100644 src/parser/parsable.rs create mode 100644 src/tokenizer.rs create mode 100644 src/tokenizer/context.rs create mode 100644 src/tokenizer/token.rs create mode 100755 valid.asm diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..6d174c4 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,14 @@ +unstable_features = true +max_width = 120 +wrap_comments = true +comment_width = 100 + +# Allow structs to fill an entire line +use_small_heuristics = "Max" +# Allow small functions on single line +fn_single_line = true + +# Alignment +enum_discrim_align_threshold = 12 +#struct_field_align_threshold = 12 +where_single_line = true diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..81a52c6 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "msp430-asm" +version = "0.1.0" +edition = "2021" +authors = ["John Breaux"] +publish = false + + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +regex = "1.9.3" diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..1a9c96c --- /dev/null +++ b/src/error.rs @@ -0,0 +1,107 @@ +// © 2023 John Breaux +// TODO: Be incredibly specific about the source of the errors + +use std::fmt::Display; + +use super::{ + tokenizer::token::{OwnedToken, Types}, + *, +}; + +// TODO: Store error context in error. for example: +// Error {ExpectationFailed{...}, WhileParsing(Register)} + +#[derive(Debug)] +pub enum Error { + /// Produced by [Parser](crate::parser::Parser::parse()) + ParseError(parser::root::Root, Box), + Contextual(Context, Box), + /// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type] + UnexpectedToken { + expected: Type, + got: OwnedToken, + }, + /// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the expected + /// [Types](Type) + AllExpectationsFailed { + expected: Types, + got: OwnedToken, + }, + /// Produced by + /// [Number](parser::instruction::encoding::number::Number)[::parse()](parser::parsable::Parsable::parse()) + /// when the parsed number contains digits too high for the specified radix + UnexpectedDigits(String, u32), + /// Produced by + /// [Opcode](parser::instruction::opcode::Opcode)[::parse()](parser::parsable::Parsable::parse()) + /// when the opcode passed lexing but did not match recognized opcodes. + /// + /// This should be interpreted as a failure in lexing. + UnrecognizedOpcode(String), + NotARegister(String), + RegisterTooHigh(u16), + FatSecondaryImmediate(isize), + NumberTooWide(isize), + JumpedTooFar(isize), + JumpedOdd(isize), + EndOfFile, +} + +impl Error { + pub fn context(self, c: Context) -> Self { + match self { + Self::Contextual(..) => self, + _ => Self::Contextual(c, Box::new(self)), + } + } + + // Extracts the root of the error tree + pub fn bare(self) -> Self { + match self { + Self::Contextual(_, bare) => bare.bare(), + _ => self, + } + } + + pub fn swap(mut self, other: Self) -> Self { + if let Self::Contextual(_, err) = &mut self { + _ = std::mem::replace(err.as_mut(), other) + } + self + } + + pub fn expected, T: Into>(expected: E, got: T) -> Self { + match expected.as_ref().len() { + 1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() }, + _ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() }, + } + } +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::Contextual(ctx, error) => write!(f, "{ctx}: {error}"), + Error::ParseError(_, error) => write!(f, "Error encountered while parsing:\n{error}"), + Error::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."), + Error::AllExpectationsFailed { expected, got } => write!(f, "Expected one of {expected}, got {got}."), + Error::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."), + Error::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"), + Error::NotARegister(reg) => write!(f, "{reg} is not a register"), + Error::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"), + Error::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"), + Error::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"), + Error::JumpedTooFar(num) => write!(f, "{num} is too far away (jump targets must be in range (-3fc..=3fe"), + Error::JumpedOdd(num) => write!(f, "Jump target {num} should not be odd."), + Error::EndOfFile => write!(f, "Unexpected end of file"), + } + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::ParseError(_, e) => Some(e.as_ref()), + _ => None, + } + } +} diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000..0f8a98f --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,17 @@ +// © 2023 John Breaux +//! Convenience trait for dealing with hashable data +pub type Hash = u64; +pub trait FromHash: From { + /// Hashes anything that implements [type@Hash] using the [DefaultHasher](std::collections::hash_map::DefaultHasher) + fn hash(hashable: T) -> Hash { + use std::hash::Hasher; + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + hashable.hash(&mut hasher); + hasher.finish() + } + fn from_hash(hashable: T) -> Self + where Self: Sized { + Self::from(Self::hash(hashable)) + } +} +impl> FromHash for T {} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..a8464c6 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,21 @@ +// © 2023 John Breaux +//! An assembler for the TI MSP430 +pub mod preamble { + use super::*; + pub use error::Error; + pub use hash::{FromHash, Hash}; + pub use linker::{Linker, Visitor}; + pub use parser::Parser; + pub use tokenizer::{ + context::Context, + token::{Token, Type}, + TokenStream, Tokenizer, + }; +} + +use preamble::*; +pub mod error; +pub mod hash; +pub mod linker; +pub mod parser; +pub mod tokenizer; diff --git a/src/linker.rs b/src/linker.rs new file mode 100644 index 0000000..8dab54c --- /dev/null +++ b/src/linker.rs @@ -0,0 +1,20 @@ +// © 2023 John Breaux +/// TODO: tree traversal and label resolution +use crate::parser::preamble::*; +pub trait Visitor { + // visit_node for all nodes + fn visit_register(&mut self, r: &Register) -> T; + fn visit_number(&mut self, n: &Number) -> T; + fn visit_width(&mut self, w: &Width) -> T; + fn visit_primary_operand(&mut self, p: &PrimaryOperand) -> T; + fn visit_secondary_operand(&mut self, d: &SecondaryOperand) -> T; + fn visit_jump_target(&mut self, t: &JumpTarget) -> T; + fn visit_encoding(&mut self, e: &Encoding) -> T; + fn visit_opcode(&mut self, o: &Opcode) -> T; + fn visit_instruction(&mut self, i: &Instruction) -> T; + fn visit_directive(&mut self, d: &Directive) -> T; + // the most important one: resolve identifiers + fn visit_identifier(&mut self, i: &Identifier) -> T; +} +/// TODO: [Linker] +pub struct Linker; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..37ba9f3 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,104 @@ +//! Simple frontend for the assembler + +use std::io::Read; + +use msp430_asm::preamble::*; + +// const ASM: &str = r" +// //.org 8000 +// //.define INT #2400 +// //entry: +// mov.b 8000(sp), r15 ; pop into sp +// rrc @pc+ +// add #64, r8 +// call #10 // call INT +// "; + +fn main() -> Result<(), Error> { + // Get args + let mut repl = true; + for arg in std::env::args() { + match arg.as_str() { + "-" | "-f" | "--file" => repl = false, + _ => (), + } + } + + // Decide if repl mode is enabled + let mut buf = String::new(); + + if repl { + // print!("> "); + // let _ = std::io::stdout().flush(); + while let Ok(len) = std::io::stdin().read_line(&mut buf) { + match len { + 0 => break, + 1 => continue, + _ => (), + } + if len < 1 { + break; + } + // print!("\nLexer: "); + // tokenizer_dump(&mut Tokenizer::new(&buf)); + //print!("Parser: "); + match Parser::default().parse(&buf) { + Ok(line) => println!("{line:x}"), + //Ok(tree) => println!("=> {tree}\n => {tree:x}"), + Err(error) => println!("{error}"), + } + buf.clear(); + // print!("> "); + // let _ = std::io::stdout().flush(); + } + } else { + std::io::stdin().lock().read_to_string(&mut buf).map_err(|_| Error::EndOfFile)?; + let mut tk = Tokenizer::new(&buf); + + // println!("Lexer: "); + // tokenizer_dump(&mut Tokenizer::new(&buf)); + let tree = Parser::default().parse_with(&mut tk); + match &tree { + Ok(tree) => println!("{tree:x}"), + Err(error) => eprintln!("{error}"), + } + } + + Ok(()) +} + +#[allow(dead_code)] +fn tokenizer_dump<'text, T: TokenStream<'text>>(t: &mut T) { + for token in t { + match token.variant() { + //Token::Space => (), + Type::Endl => { + println!(); + continue; + } + Type::Comment => (), + Type::Label => (), + Type::Insn => (), + Type::ByteWidth => (), + Type::WordWidth => (), + Type::Register => (), + Type::RadixMarkerHex => (), + Type::RadixMarkerOct => (), + Type::RadixMarkerBin => (), + Type::Number => (), + Type::Minus => (), + Type::LParen => (), + Type::RParen => (), + Type::Indirect => (), + Type::Plus => (), + Type::Absolute => (), + Type::Immediate => (), + Type::Identifier => (), + Type::Directive => (), + Type::Separator => (), + Type::EndOfFile => (), + _ => continue, + }; + print!("{token:?} "); + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..de352e9 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,212 @@ +// © 2023 John Breaux +//! Parses [Tokens](crate::Token) into an [abstract syntax tree](Root) + +use crate::{Error, Hash, TokenStream, Type}; +use std::fmt::{Debug, Display, LowerHex}; + +pub mod preamble { + //! All the different AST node types + use super::*; + // Traits + pub use parsable::Parsable; + + pub use comment::Comment; + pub use directive::Directive; + pub use identifier::Identifier; + pub use instruction::{ + encoding::{ + encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand, + register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding, + }, + opcode::Opcode, + Instruction, + }; + pub use label::Label; + pub use line::Line; + pub use root::Root; +} +use preamble::*; + +pub(crate) mod parsable; + +pub(crate) mod comment; +pub(crate) mod directive; +pub(crate) mod identifier; +pub(crate) mod instruction; +pub(crate) mod label; + +pub(crate) mod line { + // © 2023 John Breaux + use super::*; + + /// A line is one of: + /// - [`Label`] (definition) + /// - [`Instruction`] + /// - [`Directive`] + /// - [`Comment`] + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum Line { + Empty, + Label(Label), // TODO: Label resolution + Insn(Instruction), + Directive(Directive), + Comment(Comment), + EndOfFile, // Expected end of file + } + + impl Parsable for Line { + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + if let Ok(token) = stream.peek_expect_any_of([Type::Comment, Type::Directive, Type::Insn, Type::Identifier]) + { + return Ok(match token.variant() { + Type::Comment => Self::Comment(Comment::parse(p, stream)?), + Type::Directive => Self::Directive(Directive::parse(p, stream)?), + Type::Identifier => Self::Label(Label::parse(p, stream)?), + Type::Insn => Self::Insn(Instruction::parse(p, stream)?), + _ => unreachable!(), + }); + } + // TODO: preserve comments + let token = stream.expect_any_of([Type::EndOfFile])?; + Ok(match token.variant() { + Type::EndOfFile => Self::EndOfFile, + _ => unreachable!(), + }) + } + } + impl Display for Line { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Empty => writeln!(f, "\n"), + Self::Label(arg0) => Display::fmt(arg0, f), + Self::Insn(arg0) => Display::fmt(arg0, f), + Self::Directive(arg0) => Display::fmt(arg0, f), + Self::Comment(arg0) => Display::fmt(arg0, f), + Self::EndOfFile => write!(f, "; End of file."), + } + } + } + impl LowerHex for Line { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Line::Insn(arg0) => LowerHex::fmt(arg0, f), + _ => Ok(()), + } + } + } +} + +pub(crate) mod root { + // © 2023 John Breaux + use super::*; + + /// Contains the entire AST + #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct Root(pub Vec); + + // TODO: Get data out of ParseTree + // TODO: Maybe implement some sort of follower + impl Parsable for Root { + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + let mut lines = vec![]; + loop { + match Line::parse(p, stream) { + Ok(Line::EndOfFile) => break, + Ok(line) => lines.push(line), + Err(e) => { + let ret = Self(lines); + eprintln!("{ret}"); + eprintln!("Error:{e}\n"); + eprint!("Remaining:"); + stream.for_each(|t| eprint!("{t}")); + eprintln!(); + return Err(Error::ParseError(ret, Box::new(e))); + } + } + } + Ok(Root(lines)) + } + } + + impl Display for Root { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for line in self.0.iter() { + f.pad(&format!("{line} "))?; + } + Ok(()) + } + } + impl LowerHex for Root { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for line in self.0.iter() { + LowerHex::fmt(line, f)?; + } + Ok(()) + } + } + impl Debug for Root { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for line in self.0.iter() { + Display::fmt(line, f)?; + Debug::fmt(line, f)?; + } + Ok(()) + } + } +} + +/// The type for [Parser] callbacks +pub type EmitComment = Box; +pub type DefineLabel = Box Result<(), Error>>; + +pub struct Parser { + radix: u32, + // TODO: callbacks for emitted token sequences?! + on_label: Option, + on_comment: Option, +} + +impl Parser { + pub fn parse_with<'t, T>(self, stream: &'t mut T) -> Result + where T: TokenStream<'t> { + Root::parse(&self, &mut stream.ignore_spaces()) + } + pub fn parse(self, input: &T) -> Result + where T: AsRef + ?Sized { + Root::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces()) + } + pub fn parse_one(self, input: &T) -> Result + where T: AsRef + ?Sized { + Line::parse(&self, &mut super::Tokenizer::new(input).ignore_spaces()) + } + + /// Sets the default radix for [Token](crate::tokenizer::token::Token) -> [Number] + /// conversion + pub fn radix(mut self, radix: u32) { self.radix = radix; } + + /// Inform the caller of a new identifier definition + pub fn define_label(&mut self, l: &Identifier) -> Result<(), Error> { + match self.on_label.as_mut() { + Some(f) => f(l), + _ => Ok(()), + } + } + /// Inform the caller of an identifier being used + pub fn emit_comment(&mut self, d: &str) { + if let Some(f) = self.on_comment.as_mut() { + f(d) + } + } +} + +impl Default for Parser { + fn default() -> Self { Self { radix: 16, on_label: None, on_comment: None } } +} + +impl Debug for Parser { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive() + } +} diff --git a/src/parser/comment.rs b/src/parser/comment.rs new file mode 100644 index 0000000..2cb97ab --- /dev/null +++ b/src/parser/comment.rs @@ -0,0 +1,15 @@ +// © 2023 John Breaux +use super::*; +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Comment(pub String); + +impl Parsable for Comment { + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + let token = stream.expect(Type::Comment)?; + Ok(Self(token.lexeme().to_string())) + } +} +impl Display for Comment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) } +} diff --git a/src/parser/directive.rs b/src/parser/directive.rs new file mode 100644 index 0000000..fd1dc73 --- /dev/null +++ b/src/parser/directive.rs @@ -0,0 +1,32 @@ +// © 2023 John Breaux +//! A [Directive] issues commands directly to the [Tokenizer](crate::Tokenizer) and +//! [Linker](crate::Linker) +use super::*; +use crate::hash::FromHash; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Directive(pub Hash, pub String); + +impl Directive { + fn str(mut self, s: S) -> Self { + self.1 = s.to_string(); + self + } +} + +impl From for Directive { + fn from(value: Hash) -> Self { Self(value, String::new()) } +} + +impl Parsable for Directive { + fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + // expect a directive + let d = stream.expect(Type::Directive)?; + // send the directive to the listener + Ok(Self::from_hash(d.lexeme()).str(d.lexeme())) + } +} +impl Display for Directive { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.1) } +} diff --git a/src/parser/identifier.rs b/src/parser/identifier.rs new file mode 100644 index 0000000..bd25609 --- /dev/null +++ b/src/parser/identifier.rs @@ -0,0 +1,34 @@ +// © 2023 John Breaux +use super::*; +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Identifier { + Hash(Hash), + Str(String), +} + +impl Identifier { + fn str>(s: T) -> Self { Self::Str(s.as_ref().into()) } +} + +impl From for Identifier { + fn from(value: Hash) -> Self { Self::Hash(value) } +} + +impl Parsable for Identifier { + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + let token = stream.expect(Type::Identifier)?; + match token.variant() { + Type::Identifier => Ok(Self::str(token.lexeme())), + _ => unreachable!("Expected Identifier, got {token:?}"), + } + } +} +impl Display for Identifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Identifier::Hash(_) => Display::fmt("Unresolved", f), + Identifier::Str(s) => Display::fmt(s, f), + } + } +} diff --git a/src/parser/instruction.rs b/src/parser/instruction.rs new file mode 100644 index 0000000..a64ab6f --- /dev/null +++ b/src/parser/instruction.rs @@ -0,0 +1,67 @@ +// © 2023 John Breaux +//! An [Instruction] contains the [Opcode] and [Encoding] information for a single msp430 +//! instruction +//! +//! +//! Note: [Opcode] and [Encoding] are very tightly coupled, because they represent interdependent parts +//! of the same instruction. This is why [Opcode]::resolve() returns an [EncodingParser] -- otherwise, +//! there's an explosion of states that I can't really cope with on my own. Really, there's about 9 +//! valid classes of instruction, some of which are only used for one or two of the MSP430's +//! instructions. + +use super::*; + +pub mod encoding; +pub mod opcode; + +/// Represents an entire MSP430 instruction +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Instruction(Opcode, Encoding); + +impl Instruction { + pub fn opcode(&self) -> &Opcode { &self.0 } + pub fn encoding(&self) -> &Encoding { &self.1 } + /// Gets the Instruction as a [u16] + pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() } + /// Gets the [extension words] + pub fn ext_words(&self) -> (Option, Option) { self.1.extwords() } +} + +impl Parsable for Instruction { + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where + Self: Sized, + T: crate::TokenStream<'text>, + { + // parse an opcode + let insn = stream.expect(Type::Insn)?; + let opcode: Opcode = insn.parse()?; + // resolve the opcode to a final opcode and an encoding + let (opcode, encoding) = opcode.resolve(); + // parse the encoding + let encoding = encoding.parse(p, stream)?; + Ok(Self(opcode, encoding)) + } +} + +impl From for u16 { + fn from(value: Instruction) -> Self { value.word() } +} + +impl Display for Instruction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) } +} + +impl LowerHex for Instruction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let (word, (ext_src, ext_dst)) = (self.word(), self.ext_words()); + write!(f, "{:04x} ", word.swap_bytes())?; + if let Some(e) = ext_src { + write!(f, "{:04x} ", e.swap_bytes())? + } + if let Some(e) = ext_dst { + write!(f, "{:04x} ", e.swap_bytes())? + } + Ok(()) + } +} diff --git a/src/parser/instruction/encoding.rs b/src/parser/instruction/encoding.rs new file mode 100644 index 0000000..b1bef8f --- /dev/null +++ b/src/parser/instruction/encoding.rs @@ -0,0 +1,81 @@ +// © 2023 John Breaux +//! An [Encoding] represents the set of arguments for the [msp430's instructions](Opcode) +use super::*; + +pub mod number; +pub mod register; +pub mod width; + +pub mod jump_target; +pub mod primary_operand; +pub mod secondary_operand; + +mod builder; +pub mod encoding_parser; + +use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder}; +use encoding_parser::EncodingParser; + +/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html) +/// +/// # Examples +/// ```rust +/// use msp430_asm::{*, parser::{Encoding, EncodingParser}}; +/// // Create a token sequence +/// let asm_file = r".b 8000(r15)"; +/// // Create a single-operand encoding parser +/// let single: EncodingParser = Encoding::single().end(); +/// // Parse an Encoding from it +/// let encoding: Encoding = single +/// .parse(&Parser::default(), &mut Tokenizer::new(asm_file).ignore_spaces()) +/// .unwrap(); +/// // Print the Encoding +/// println!("{encoding}"); +/// ``` +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Encoding { + Single { width: Width, dst: PrimaryOperand }, + Jump { target: JumpTarget }, + Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand }, +} +impl Encoding { + /// Returns a builder for [Encoding::Single] + pub fn single() -> SingleBuilder { Default::default() } + /// Returns a builder for [Encoding::Jump] + pub fn jump() -> JumpBuilder { Default::default() } + /// Returns a builder for [Encoding::Double] + pub fn double() -> DoubleBuilder { Default::default() } + /// Returns a builder for [Encoding::Double] + /// + /// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and + /// dst are the same + pub fn reflexive() -> ReflexiveBuilder { Default::default() } + /// + pub fn word(&self) -> u16 { + match *self { + Encoding::Single { width, dst } => u16::from(width) | dst.mode() | dst.register() as u16, + Encoding::Jump { target } => target.word(), + Encoding::Double { width, src, dst } => { + u16::from(width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8) + } + } + } + /// Returns extwords for instruction + pub fn extwords(&self) -> (Option, Option) { + match self { + Encoding::Double { src, dst, .. } => (src.ext_word(), dst.ext_word()), + Encoding::Single { dst, .. } => (dst.ext_word(), None), + Encoding::Jump { .. } => (None, None), + } + } +} + +impl Display for Encoding { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Encoding::Single { width, dst } => write!(f, "{width} {dst}"), + Encoding::Jump { target } => write!(f, " {target}"), + Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"), + } + } +} diff --git a/src/parser/instruction/encoding/builder.rs b/src/parser/instruction/encoding/builder.rs new file mode 100644 index 0000000..c70a843 --- /dev/null +++ b/src/parser/instruction/encoding/builder.rs @@ -0,0 +1,76 @@ +// © 2023 John Breaux +//! Builder API for [EncodingParser] +use super::*; +#[derive(Debug, Default)] +pub struct SingleBuilder { + width: Option, + dst: Option, +} +impl SingleBuilder { + pub fn width(mut self, width: bool) -> Self { + self.width = Some(width.into()); + self + } + /// Sets the [PrimaryOperand] field + pub fn operand(mut self, dst: PrimaryOperand) -> Self { + self.dst = Some(dst); + self + } + /// Build + pub fn end(&self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } } +} + +#[derive(Debug, Default)] +pub struct JumpBuilder { + target: Option, +} +impl JumpBuilder { + pub fn target(mut self, target: JumpTarget) -> Self { + self.target = Some(target); + self + } + pub fn end(&self) -> EncodingParser { EncodingParser::Jump { target: self.target } } +} + +#[derive(Debug, Default)] +pub struct DoubleBuilder { + width: Option, + src: Option, + dst: Option, +} +impl DoubleBuilder { + /// Sets the [Width] field + pub fn width(mut self, width: bool) -> Self { + self.width = Some(width.into()); + self + } + /// Sets the [PrimaryOperand] field + pub fn src(mut self, src: PrimaryOperand) -> Self { + self.src = Some(src); + self + } + /// Sets the [PrimaryOperand] field + pub fn dst(mut self, dst: SecondaryOperand) -> Self { + self.dst = Some(dst); + self + } + pub fn end(&self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } } +} + +#[derive(Debug, Default)] +pub struct ReflexiveBuilder { + width: Option, + reg: Option, +} +impl ReflexiveBuilder { + /// Sets the [Width] field + pub fn width(mut self, width: bool) -> Self { + self.width = Some(width.into()); + self + } + pub fn reg(mut self, reg: SecondaryOperand) -> Self { + self.reg = Some(reg); + self + } + pub fn end(&self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } } +} diff --git a/src/parser/instruction/encoding/encoding_parser.rs b/src/parser/instruction/encoding/encoding_parser.rs new file mode 100644 index 0000000..118938c --- /dev/null +++ b/src/parser/instruction/encoding/encoding_parser.rs @@ -0,0 +1,39 @@ +// © 2023 John Breaux +//! An [EncodingParser] builds an [Encoding] from a [TokenStream] +use super::*; + +#[derive(Debug)] +/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream] +pub enum EncodingParser { + Single { width: Option, dst: Option }, + Jump { target: Option }, + Double { width: Option, src: Option, dst: Option }, + Reflexive { width: Option, reg: Option }, +} +impl EncodingParser { + /// Constructs an [Encoding] from this [EncodingParser], filling holes + /// with the tokenstream + pub fn parse<'text, T>(&self, p: &Parser, stream: &mut T) -> Result + where T: crate::TokenStream<'text> { + Ok(match self { + Self::Single { width, dst } => { + let width = width.unwrap_or_else(|| Width::parse_or_default(p, stream)); + let dst = if let Some(dst) = dst { *dst } else { PrimaryOperand::parse(p, stream)? }; + Encoding::Single { width, dst } + } + Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) }, + Self::Double { width, src, dst } => { + let width = width.unwrap_or_else(|| Width::parse_or_default(p, stream)); + let src = if let Some(src) = src { *src } else { PrimaryOperand::parse(p, stream)? }; + let dst = if let Some(dst) = dst { *dst } else { SecondaryOperand::parse(p, stream)? }; + + Encoding::Double { width, src, dst } + } + Self::Reflexive { width, reg } => { + let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default()); + let reg = if let Some(reg) = reg { *reg } else { SecondaryOperand::parse(p, stream)? }; + Encoding::Double { width, src: reg.into(), dst: reg } + } + }) + } +} diff --git a/src/parser/instruction/encoding/jump_target.rs b/src/parser/instruction/encoding/jump_target.rs new file mode 100644 index 0000000..347c42b --- /dev/null +++ b/src/parser/instruction/encoding/jump_target.rs @@ -0,0 +1,39 @@ +// © 2023 John Breaux +//! A [JumpTarget] contains the [pc-relative offset](Number) or [Identifier] +//! for a [Jump instruction encoding](Encoding::Jump) +use super::*; + +/// The target of a [Jump](Encoding::Jump) +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct JumpTarget(Number); + +impl JumpTarget { + pub fn word(&self) -> u16 { u16::from(self.0) & 0x3ff } +} + +impl Parsable for JumpTarget { + /// - Identifier + /// - Number + /// - Negative + /// - Number + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: crate::TokenStream<'text> { + // Try to parse a number + let target = Number::parse(p, stream)?; + match target.into() { + i if i % 2 != 0 => Err(Error::JumpedOdd(i).context(stream.context()))?, + i if (-1024..=1022).contains(&(i - 2)) => Ok(Self((target - 2) >> 1)), + i => Err(Error::JumpedTooFar(i).context(stream.context()))?, + } + } +} + +impl From for u16 { + fn from(value: JumpTarget) -> Self { value.0.into() } +} + +impl Display for JumpTarget { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", (1 + isize::from(self.0)) << 1) + } +} diff --git a/src/parser/instruction/encoding/number.rs b/src/parser/instruction/encoding/number.rs new file mode 100644 index 0000000..18943b8 --- /dev/null +++ b/src/parser/instruction/encoding/number.rs @@ -0,0 +1,75 @@ +// © 2023 John Breaux +//! A [Number] represents a 16-bit signed or unsigned word +use super::*; + +// TODO: Allow identifiers/expressions in place of numbers +// - Dependency inversion in TokenStream to allow swapping the parser mid-parse? +// - Oh my god, not relying on std::iter::Iterator allows for so many more parsing options + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Number(isize, u32); // (value, radix) + +impl Parsable for Number { + // A number is: + // RadixMarker[Hex|Oct|Bin]? + // - Number + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + use Type::*; + let negative = stream.expect(Minus).is_ok(); + let radix = match stream + .expect_any_of([RadixMarkerHex, RadixMarkerDec, RadixMarkerOct, RadixMarkerBin]) + .ok() + .map(|t| t.variant()) + { + Some(RadixMarkerHex) => 16, + Some(RadixMarkerDec) => 10, + Some(RadixMarkerOct) => 8, + Some(RadixMarkerBin) => 2, + _ => p.radix, + }; + let number = stream.expect(Number)?; + let number = isize::from_str_radix(number.lexeme(), radix) + .map_err(|_| Error::UnexpectedDigits(number.lexeme().into(), radix).context(stream.context()))? + * if negative { -1 } else { 1 }; + // Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit) + Ok(Self( + if (-0x8000..0x10000).contains(&number) { + number + } else { + Err(Error::NumberTooWide(number).context(stream.context()))? + }, + radix, + )) + } +} +impl From for isize { + fn from(value: Number) -> Self { value.0 as Self } +} +impl From for i32 { + fn from(value: Number) -> Self { value.0 as Self } +} +impl From for u16 { + /// Converts this type from the input type. + fn from(value: Number) -> Self { value.0 as Self } +} + +impl std::ops::Sub for Number { + type Output = Self; + fn sub(mut self, rhs: isize) -> Self::Output { + self.0 -= rhs; + self + } +} + +impl std::ops::Shr for Number { + type Output = Self; + fn shr(mut self, rhs: usize) -> Self::Output { + self.0 >>= rhs; + self + } +} + +impl std::fmt::Display for Number { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:x}", self.0) } +} diff --git a/src/parser/instruction/encoding/primary_operand.rs b/src/parser/instruction/encoding/primary_operand.rs new file mode 100644 index 0000000..651f595 --- /dev/null +++ b/src/parser/instruction/encoding/primary_operand.rs @@ -0,0 +1,141 @@ +// © 2023 John Breaux +//! A [PrimaryOperand] contains the first [Register], addressing mode, and Extension +//! Word for an [instruction](Instruction) +use super::*; + +/// The Source of a [Double](Encoding::Double) or Destination of a +/// [Single](Encoding::Single) +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum PrimaryOperand { + Direct(Register), + Indirect(Register), + PostInc(Register), + Indexed(Register, Number), + Absolute(Number), + Immediate(Number), + Four, + Eight, + Zero, + One, + Two, + MinusOne, +} + +impl PrimaryOperand { + /// Returns the mode bits + pub fn mode(&self) -> u16 { + use PrimaryOperand::*; + match self { + Direct(_) | Zero => 0, + Indexed(_, _) | Absolute(_) | One => 1 << 4, + Indirect(_) | Two | Four => 2 << 4, + PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4, + } + } + /// Gets the register + pub fn register(&self) -> Register { + use PrimaryOperand::*; + match self { + Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r, + Immediate(_) => Register::pc, + Absolute(_) | Four | Eight => Register::sr, + Zero | One | Two | MinusOne => Register::cg, + } + } + /// Gets the extension word, if present + pub fn ext_word(&self) -> Option { + use PrimaryOperand::*; + match self { + Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()), + _ => None, + } + } +} + +impl Parsable for PrimaryOperand { + // - Register + // - Indirect + // - Register + // - PostInc? + // - Number + // - OpenIdx + // - Register + // - CloseIdx + // - Absolute + // - Number + // - Immediate + // - Number + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: crate::TokenStream<'text> { + use PrimaryOperand::*; + // Try parsing as Register Direct + if let Some(r) = Register::try_parse(p, stream)? { + return Ok(Self::Direct(r)); + } + // Try parsing as Number Indexed + if let Some(idx) = Number::try_parse(p, stream)? { + stream.expect(Type::LParen)?; + let reg = Register::parse(p, stream)?; + stream.expect(Type::RParen)?; + return Ok(Self::Indexed(reg, idx)); + } + // Or directly match any of the valid prefix markers + let token = stream.expect_any_of([Type::Indirect, Type::Absolute, Type::Immediate])?; + Ok(match token.variant() { + Type::Indirect => { + let reg = stream.expect(Type::Register)?.parse()?; + match stream.expect(Type::Plus) { + Ok(_) => PostInc(reg), + Err(_) => Indirect(reg), + } + } + Type::Absolute => Absolute(Number::parse(p, stream)?), + Type::Immediate => { + let number = Number::parse(p, stream)?; + match number.into() { + // There are two representations for the all-ones constant, since Number preserves absolute + // signedness. + -1 | 0xffff => MinusOne, + 0 => Zero, + 1 => One, + 2 => Two, + 4 => Four, + 8 => Eight, + _ => Immediate(number), + } + } + _ => unreachable!("Token {token:?} passed expectation but failed match!"), + }) + } +} + +impl From for PrimaryOperand { + fn from(value: SecondaryOperand) -> Self { + match value { + SecondaryOperand::Direct(r) => Self::Direct(r), + SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n), + SecondaryOperand::Absolute(n) => Self::Absolute(n), + SecondaryOperand::Zero => Self::Zero, + SecondaryOperand::One => Self::One, + } + } +} + +impl Display for PrimaryOperand { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Direct(r) => write!(f, "{r}"), + Self::Indirect(r) => write!(f, "@{r}"), + Self::PostInc(r) => write!(f, "@{r}+"), + Self::Indexed(r, idx) => write!(f, "{idx}({r})"), + Self::Absolute(n) => write!(f, "&{n}"), + Self::Immediate(n) => write!(f, "#{n}"), + Self::Four => write!(f, "#4"), + Self::Eight => write!(f, "#8"), + Self::Zero => write!(f, "#0"), + Self::One => write!(f, "#1"), + Self::Two => write!(f, "#2"), + Self::MinusOne => write!(f, "#-1"), + } + } +} diff --git a/src/parser/instruction/encoding/register.rs b/src/parser/instruction/encoding/register.rs new file mode 100644 index 0000000..18fa297 --- /dev/null +++ b/src/parser/instruction/encoding/register.rs @@ -0,0 +1,111 @@ +// © 2023 John Breaux +//! A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html) +use super::*; +use std::str::FromStr; + +/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html) + +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Register { + /// Program Counter + pc, + /// Stack Pointer + sp, + /// Status Register + sr, + /// Constant Generator + cg, + r4, + r5, + r6, + r7, + r8, + r9, + r10, + r11, + r12, + r13, + r14, + r15, +} + +impl Parsable for Register { + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + where T: crate::TokenStream<'text> { + stream.expect(Type::Register).map_err(|e| e.context(stream.context()))?.lexeme().parse() + } +} + +impl From for u16 { + fn from(value: Register) -> Self { value as u16 } +} + +impl TryFrom for Register { + type Error = Error; + fn try_from(value: u16) -> Result { + use Register::*; + Ok(match value { + 0 => pc, + 1 => sp, + 2 => sr, + 3 => cg, + 4 => r4, + 5 => r5, + 6 => r6, + 7 => r7, + 8 => r8, + 9 => r9, + 10 => r10, + 11 => r11, + 12 => r12, + 13 => r13, + 14 => r14, + 15 => r15, + _ => return Err(Error::RegisterTooHigh(value)), + }) + } +} + +impl FromStr for Register { + type Err = Error; + + fn from_str(s: &str) -> Result { + use Register::*; + match s { + "pc" => Ok(pc), + "sp" => Ok(sp), + "sr" => Ok(sr), + "cg" => Ok(cg), + _ => str::parse::(&s[1..]).map_err(|_| -> Self::Err { Error::NotARegister(s.into()) })?.try_into(), + } + } +} + +impl From for &str { + fn from(value: Register) -> Self { + use Register::*; + match value { + pc => "pc", + sp => "sp", + sr => "sr", + cg => "cg", + r4 => "r4", + r5 => "r5", + r6 => "r6", + r7 => "r7", + r8 => "r8", + r9 => "r9", + r10 => "r10", + r11 => "r11", + r12 => "r12", + r13 => "r13", + r14 => "r14", + r15 => "r15", + } + } +} + +impl std::fmt::Display for Register { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) } +} diff --git a/src/parser/instruction/encoding/secondary_operand.rs b/src/parser/instruction/encoding/secondary_operand.rs new file mode 100644 index 0000000..44d8f33 --- /dev/null +++ b/src/parser/instruction/encoding/secondary_operand.rs @@ -0,0 +1,95 @@ +// © 2023 John Breaux +//! A [SecondaryOperand] contains the second [Register], addressing mode, and Extension +//! Word for a [two-operand](Encoding::Double) [instruction](Instruction) +use super::*; + +/// The destination of a [Double](Encoding::Double) +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SecondaryOperand { + Direct(Register), + Indexed(Register, Number), + Absolute(Number), + // Joke encodings? + Zero, + One, +} + +impl SecondaryOperand { + pub fn mode(&self) -> u16 { + use SecondaryOperand::*; + match self { + Direct(_) | Zero => 0, + Indexed(_, _) | Absolute(_) | One => 1 << 7, + } + } + pub fn register(&self) -> Register { + use SecondaryOperand::*; + match self { + Direct(r) | Indexed(r, _) => *r, + Absolute(_) => Register::sr, + Zero | One => Register::cg, + } + } + /// This is the only way to have an extension word + pub fn ext_word(&self) -> Option { + use SecondaryOperand::*; + match self { + Indexed(_, w) | Absolute(w) => Some((*w).into()), + _ => None, + } + } +} + +impl Parsable for SecondaryOperand { + /// Separator + /// - Register => Direct + /// - Number => Indexed + /// - OpenIdx + /// - Register + /// - CloseIdx + /// - Absolute + /// - Number + /// - Immediate + /// - Number == 0, 1 + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: crate::TokenStream<'text> { + use SecondaryOperand::*; + stream.allow(Type::Separator); + // Try parsing as Register Direct + if let Some(r) = Register::try_parse(p, stream)? { + return Ok(Self::Direct(r)); + } + // Try parsing as Number Indexed + if let Some(idx) = Number::try_parse(p, stream)? { + stream.expect(Type::LParen)?; + let reg = Register::parse(p, stream)?; + stream.expect(Type::RParen)?; + return Ok(Self::Indexed(reg, idx)); + } + let token = stream.expect_any_of([Type::Absolute, Type::Immediate])?; + Ok(match token.variant() { + Type::Absolute => Absolute(Number::parse(p, stream)?), + Type::Immediate => { + let number = Number::parse(p, stream)?; + match number.into() { + 0 => Zero, + 1 => One, + n => Err(Error::FatSecondaryImmediate(n as isize).context(stream.context()))?, + } + } + _ => unreachable!("Token {token:?} passed expectation but failed match!"), + }) + } +} + +impl Display for SecondaryOperand { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Direct(r) => write!(f, "{r}"), + Self::Indexed(r, idx) => write!(f, "{idx}({r})"), + Self::Absolute(n) => write!(f, "&{n}"), + Self::Zero => write!(f, "#0"), + Self::One => write!(f, "#1"), + } + } +} diff --git a/src/parser/instruction/encoding/width.rs b/src/parser/instruction/encoding/width.rs new file mode 100644 index 0000000..1501456 --- /dev/null +++ b/src/parser/instruction/encoding/width.rs @@ -0,0 +1,31 @@ +// © 2023 John Breaux +use super::*; + +/// Represents an instruction's operand width. +/// +/// Evaluates to false when instruction takes word-sized operands, or true when +/// instruction takes byte-sized operands +#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Width(bool); + +impl Parsable for Width { + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else { + return Ok(Self(false)); + }; + Ok(Self(token.is_variant(Type::ByteWidth))) + } +} +impl From for u16 { + fn from(value: Width) -> Self { (value.0 as Self) << 6 } +} +impl From for bool { + fn from(value: Width) -> Self { value.0 } +} +impl From for Width { + fn from(value: bool) -> Self { Width(value) } +} +impl std::fmt::Display for Width { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) } +} diff --git a/src/parser/instruction/opcode.rs b/src/parser/instruction/opcode.rs new file mode 100644 index 0000000..1384cc8 --- /dev/null +++ b/src/parser/instruction/opcode.rs @@ -0,0 +1,258 @@ +// © 2023 John Breaux +//! An [Opcode] encodes an msp430 operation +use super::*; + +use std::str::FromStr; + +/// Opcode from the [MSPGCC Manual][1] +/// +/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will +/// extract from a [TokenStream] only the required arguments for that call. +/// +/// [1]: https://mspgcc.sourceforge.net/manual/x223.html +#[allow(clippy::identity_op)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Opcode { + // "Emulated" opcodes + Nop, + Pop, + Br, + Ret, + Clrc, + Setc, + Clrz, + Setz, + Clrn, + Setn, + Dint, + Eint, + Rla, + Rlc, + Inv, + Clr, + Tst, + Dec, + Decd, + Inc, + Incd, + Adc, + Dadc, + Sbc, + // Single + Rrc = 0x1000 | 0 << 7, + Swpb = 0x1000 | 1 << 7, + Rra = 0x1000 | 2 << 7, + Sxt = 0x1000 | 3 << 7, + Push = 0x1000 | 4 << 7, + Call = 0x1000 | 5 << 7, + Reti = 0x1000 | 6 << 7, + // Jump + Jnz = 0x2000 | 0 << 10, + Jz = 0x2000 | 1 << 10, + Jnc = 0x2000 | 2 << 10, + Jc = 0x2000 | 3 << 10, + Jn = 0x2000 | 4 << 10, + Jge = 0x2000 | 5 << 10, + Jl = 0x2000 | 6 << 10, + Jmp = 0x2000 | 7 << 10, + // Double + Mov = 0x4000, + Add = 0x5000, + Addc = 0x6000, + Subc = 0x7000, + Sub = 0x8000, + Cmp = 0x9000, + Dadd = 0xa000, + Bit = 0xb000, + Bic = 0xc000, + Bis = 0xd000, + Xor = 0xe000, + And = 0xf000, +} + +impl Opcode { + pub fn takes_width(&self) -> bool { + use Opcode::*; + match self { + Rrc => true, + Swpb => false, + Rra => true, + Sxt => false, + Push => true, + Call | Reti => false, + Jnz | Jz | Jnc | Jc | Jn | Jge | Jl | Jmp => false, + Mov | Add | Addc | Subc | Sub | Cmp | Dadd | Bit | Bic | Bis | Xor | And => true, + Nop | Pop | Br | Ret | Clrc | Setc | Clrz | Setz | Clrn | Setn | Dint | Eint | Rla | Rlc | Inv | Clr + | Tst | Dec | Decd | Inc | Incd | Adc | Dadc | Sbc => true, + } + } + /// Resolve an Opcode into an [Opcode] and an [EncodingParser] + pub fn resolve(self) -> (Opcode, EncodingParser) { + use super::Encoding as Enc; + use Opcode::*; + use Register::*; + use {PrimaryOperand as Src, SecondaryOperand as Dst}; + match self { + Rrc | Swpb | Rra | Sxt | Push | Call | Reti => (self, Enc::single().end()), + Jnz | Jz | Jnc | Jc | Jn | Jge | Jl | Jmp => (self, Enc::jump().end()), + Mov | Add | Addc | Subc | Sub | Cmp | Dadd | Bit | Bic | Bis | Xor | And => (self, Enc::double().end()), + Nop => (Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()), + Pop => (Mov, Enc::double().src(Src::PostInc(sp)).end()), + Br => (Mov, Enc::double().dst(Dst::Direct(pc)).end()), + Ret => (Mov, Enc::double().src(Src::PostInc(sp)).dst(Dst::Direct(pc)).end()), + Clrc => (Bic, Enc::double().src(Src::One).dst(Dst::Direct(sr)).end()), + Setc => (Bis, Enc::double().src(Src::One).dst(Dst::Direct(sr)).end()), + Clrz => (Bic, Enc::double().src(Src::Two).dst(Dst::Direct(sr)).end()), + Setz => (Bis, Enc::double().src(Src::Two).dst(Dst::Direct(sr)).end()), + Clrn => (Bic, Enc::double().src(Src::Four).dst(Dst::Direct(sr)).end()), + Setn => (Bis, Enc::double().src(Src::Four).dst(Dst::Direct(sr)).end()), + Dint => (Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(sr)).end()), + Eint => (Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(sr)).end()), + Rla => (Add, Enc::reflexive().end()), + Rlc => (Addc, Enc::reflexive().end()), + Inv => (Xor, Enc::double().src(Src::MinusOne).end()), + Clr => (Mov, Enc::double().src(Src::Zero).end()), + Tst => (Cmp, Enc::double().src(Src::Zero).end()), + Dec => (Sub, Enc::double().src(Src::One).end()), + Decd => (Sub, Enc::double().src(Src::Two).end()), + Inc => (Add, Enc::double().src(Src::One).end()), + Incd => (Add, Enc::double().src(Src::Two).end()), + Adc => (Addc, Enc::double().src(Src::Zero).end()), + Dadc => (Dadd, Enc::double().src(Src::Zero).end()), + Sbc => (Subc, Enc::double().src(Src::Zero).end()), + } + } +} + +impl FromStr for Opcode { + type Err = Error; + fn from_str(s: &str) -> Result { + use Opcode::*; + //TODO: Reduce allocations here + let s = s.to_ascii_lowercase(); + Ok(match s.as_str() { + "rrc" => Rrc, + "swpb" => Swpb, + "rra" => Rra, + "sxt" => Sxt, + "push" => Push, + "call" => Call, + "reti" => Reti, + + "jne" | "jnz" => Jnz, + "jeq" | "jz" => Jz, + "jnc" | "jlo" => Jnc, + "jc" | "jhs" => Jc, + "jn" => Jn, + "jge" => Jge, + "jl" => Jl, + "jmp" => Jmp, + + "mov" => Mov, + "add" => Add, + "addc" => Addc, + "subc" => Subc, + "sub" => Sub, + "cmp" => Cmp, + "dadd" => Dadd, + "bit" => Bit, + "bic" => Bic, + "bis" => Bis, + "xor" => Xor, + "and" => And, + + "nop" => Nop, + "pop" => Pop, + "br" => Br, + "ret" => Ret, + "clrc" => Clrc, + "setc" => Setc, + "clrz" => Clrz, + "setz" => Setz, + "clrn" => Clrn, + "setn" => Setn, + "dint" => Dint, + "eint" => Eint, + "rla" => Rla, + "rlc" => Rlc, + "inv" => Inv, + "clr" => Clr, + "tst" => Tst, + "dec" => Dec, + "decd" => Decd, + "inc" => Inc, + "incd" => Incd, + "adc" => Adc, + "dadc" => Dadc, + "sbc" => Sbc, + _ => Err(Error::UnrecognizedOpcode(s))?, + }) + } +} + +impl Display for Opcode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Opcode::*; + write!( + f, + "{}", + match self { + Nop => "nop", + Pop => "pop", + Br => "br", + Ret => "ret", + Clrc => "clrc", + Setc => "setc", + Clrz => "clrz", + Setz => "setz", + Clrn => "clrn", + Setn => "setn", + Dint => "dint", + Eint => "eint", + Rla => "rla", + Rlc => "rlc", + Inv => "inv", + Clr => "clr", + Tst => "tst", + Dec => "dec", + Decd => "decd", + Inc => "inc", + Incd => "incd", + Adc => "adc", + Dadc => "dadc", + Sbc => "sbc", + Rrc => "rrc", + Swpb => "swpb", + Rra => "rra", + Sxt => "sxt", + Push => "push", + Call => "call", + Reti => "reti", + Jnz => "jnz", + Jz => "jz", + Jnc => "jnc", + Jc => "jc", + Jn => "jn", + Jge => "jge", + Jl => "jl", + Jmp => "jmp", + Mov => "mov", + Add => "add", + Addc => "addc", + Subc => "subc", + Sub => "sub", + Cmp => "cmp", + Dadd => "dadd", + Bit => "bit", + Bic => "bic", + Bis => "bis", + Xor => "xor", + And => "and", + } + ) + } +} + +impl LowerHex for Opcode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:04x}", *self as u16) } +} diff --git a/src/parser/label.rs b/src/parser/label.rs new file mode 100644 index 0000000..f9f7614 --- /dev/null +++ b/src/parser/label.rs @@ -0,0 +1,16 @@ +// © 2023 John Breaux +use super::*; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Label(pub Identifier); + +impl Parsable for Label { + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + Ok(Self(Identifier::parse(p, stream).and_then(|t| stream.require(Type::Label).and(Ok(t)))?)) + } +} + +impl Display for Label { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) } +} diff --git a/src/parser/parsable.rs b/src/parser/parsable.rs new file mode 100644 index 0000000..50fdadf --- /dev/null +++ b/src/parser/parsable.rs @@ -0,0 +1,44 @@ +// © 2023 John Breaux +use super::*; +/// Parses tokens from [stream](TokenStream) into Self node +pub trait Parsable { + /// Parses tokens from [TokenStream](TokenStream) into Self nodes + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where + Self: Sized, + T: TokenStream<'text>; + + /// Attempts to parse tokens from [stream](TokenStream) into Self nodes. + /// + /// Masks failed expectations. + fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result, Error> + where + Self: Sized, + T: TokenStream<'text>, + { + match Self::parse(p, stream).map_err(|e| e.bare()) { + Ok(tt) => Ok(Some(tt)), + Err(Error::UnexpectedToken { .. }) | Err(Error::AllExpectationsFailed { .. }) => Ok(None), + Err(e) => Err(e), + } + } + + fn parse_and<'text, T, R>(p: &Parser, stream: &mut T, f: fn(p: &Parser, &mut T) -> R) -> Result<(Self, R), Error> + where + Self: Sized, + T: TokenStream<'text>, + { + Ok((Self::parse(p, stream)?, f(p, stream))) + } + + /// Attempts to parse tokens from [stream](TokenStream) into Self nodes. + /// + /// Returns [`Self::default()`](Default::default()) on error + fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self + where + Self: Sized + Default, + T: TokenStream<'text>, + { + Self::parse(p, stream).unwrap_or_default() + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..c842e94 --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,193 @@ +// © 2023 John Breaux +//! Iterates over &[str], producing [Token]s + +// Things we need: +// ✔ 1. Lexer/Tokenizer +// ✔ 1. Instructions +// ✔ 1. Instruction mnemonics /ad.../ +// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/ +// ✔ 2. Src operands +// ✔ 1. Registers /(r1[0-5]|r[0-9])/ +// ✔ 2. Immediate Values /#/ +// ✔ 3. Absolute addresses /&/ +// ✔ 4. Numbers /[0-9A-Fa-f]+ +// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/ +// ✔ 4. Label definitions /(^.*):/ +// ✔ 5. Comments (may be useful for debugging) + +pub mod context; +pub mod token; + +use crate::Error; +use context::Context; +use token::{Token, Type}; + +/// Backtracking through bifurcated timelines +pub trait TokenStream<'text>: Iterator> { + /// Gets this stream's [Context] + fn context(&self) -> Context; + + /// Creates an iterator that skips [Type::Space] in the input + fn ignore_spaces(&'text mut self) -> IgnoreSpaces<'text, Self> + where Self: Sized { + IgnoreSpaces::new(self) + } + + /// Returns the next [Token] without advancing + fn peek(&mut self) -> Self::Item; + + /// Returns the next [Token] if it is of the expected [Type], without advancing + fn peek_expect(&mut self, expected: Type) -> Result; + + /// Consumes and returns a [Token] if it is the expected [Type] + /// + /// Otherwise, does not consume a [Token] + fn expect(&mut self, expected: Type) -> Result; + + /// Ignores a [Token] of the expected [Type], propegating errors. + fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) } + + /// Ignores a [Token] of the expected [Type], discarding errors. + fn allow(&mut self, expected: Type) { let _ = self.expect(expected); } + + /// Runs a functor on each + fn any_of(&mut self, f: fn(&mut Self, Type) -> Result, expected: T) -> Result + where T: AsRef<[Type]> { + for &expected in expected.as_ref() { + match f(self, expected).map_err(|e| e.bare()) { + Ok(t) => return Ok(t), + Err(Error::UnexpectedToken { .. }) => continue, + Err(e) => return Err(e.context(self.context())), + } + } + Err(Error::expected(expected, self.peek()).context(self.context())) + } + + /// Returns the next [Token] if it is of the expected [Types](Type), without advancing + fn peek_expect_any_of(&mut self, expected: T) -> Result + where T: AsRef<[Type]> { + self.any_of(Self::peek_expect, expected) + } + /// Consumes and returns a [Token] if it matches any of the expected [Types](Type) + /// + /// Otherwise, does not consume a [Token] + fn expect_any_of(&mut self, expected: T) -> Result + where T: AsRef<[Type]> { + self.any_of(Self::expect, expected) + } + /// Ignores a [Token] of any expected [Type], discarding errors. + fn allow_any_of(&mut self, expected: T) + where T: AsRef<[Type]> { + let _ = self.expect_any_of(expected); + } + /// Ignores a [Token] of any expected [Type], propegating errors. + fn require_any_of(&mut self, expected: T) -> Result<(), Error> + where T: AsRef<[Type]> { + self.any_of(Self::require, expected) + } +} + +/// Iterates over &[str], producing [Token]s +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Tokenizer<'t> { + text: &'t str, + idx: usize, + context: Context, +} + +impl<'t> Tokenizer<'t> { + /// Produces a new [Tokenizer] from a [str]ing slice + pub fn new(text: &'t T) -> Self + where T: AsRef + ?Sized { + Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() } + } + + fn count(&mut self, token: &Token) { + // update the context + self.context.count(token); + // advance the index + self.idx += token.len(); + } +} + +impl<'text> Iterator for Tokenizer<'text> { + type Item = Token<'text>; + + fn next(&mut self) -> Option { + if self.idx >= self.text.len() { + return None; + } + let token = Token::from(&self.text[self.idx..]); + // Process [Type::Directive]s + self.count(&token); + Some(token) + } +} + +impl<'text> TokenStream<'text> for Tokenizer<'text> { + fn context(&self) -> Context { self.context } + // Tokenizer has access to the source buffer, and can implement expect and peek without cloning + // itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and + // registers are valid identifiers. + fn expect(&mut self, expected: Type) -> Result { + let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?; + self.count(&token); + Ok(token) + } + fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) } + fn peek_expect(&mut self, expected: Type) -> Result { + Token::expect(&self.text[self.idx..], expected) + } +} + +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct IgnoreSpaces<'t, T> +where T: TokenStream<'t> +{ + inner: &'t mut T, +} + +impl<'t, T> IgnoreSpaces<'t, T> +where T: TokenStream<'t> +{ + pub fn new(t: &'t mut T) -> Self { IgnoreSpaces { inner: t } } + /// Gets a mutable reference to the inner [Iterator] + pub fn inner_mut(&mut self) -> &mut T { self.inner } +} + +impl<'t, T> Iterator for IgnoreSpaces<'t, T> +where T: TokenStream<'t> +{ + type Item = Token<'t>; + + fn next(&mut self) -> Option { + let next = self.inner.next()?; + // Space tokens are greedy, so the next token shouldn't be a Space + match next.variant() { + Type::Space => self.next(), + _ => Some(next), + } + } +} + +impl<'t, T> TokenStream<'t> for IgnoreSpaces<'t, T> +where T: TokenStream<'t> +{ + fn context(&self) -> Context { self.inner.context() } + fn expect(&mut self, expected: Type) -> Result { + self.inner.allow_any_of([Type::Space, Type::Endl]); + self.inner.expect(expected) + } + + fn peek(&mut self) -> Self::Item { + self.inner.allow_any_of([Type::Space, Type::Endl]); + self.inner.peek() + } + + fn peek_expect(&mut self, expected: Type) -> Result { + self.inner.allow_any_of([Type::Space, Type::Endl]); + self.inner.peek_expect(expected) + } +} diff --git a/src/tokenizer/context.rs b/src/tokenizer/context.rs new file mode 100644 index 0000000..9576e98 --- /dev/null +++ b/src/tokenizer/context.rs @@ -0,0 +1,36 @@ +//! Stores contextual information about the current tokenizer state, useful for printing errors +use super::*; +/// Stores contextual information about the current tokenizer state, useful for printing errors +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Context { + line: usize, + tokens: usize, + position: usize, +} + +impl Context { + pub fn new() -> Self { Default::default() } + pub fn line(&self) -> usize { self.line } + pub fn tokens(&self) -> usize { self.tokens } + pub fn position(&self) -> usize { self.position } + pub(super) fn count(&mut self, t: &Token) { + match t.variant() { + Type::EndOfFile => return, + Type::Endl => { + self.line += 1; + self.position = 0; + } + _ => self.position += t.len(), + } + self.tokens += 1; + } +} +impl Default for Context { + fn default() -> Self { Self { line: 1, tokens: 0, position: 0 } } +} + +impl std::fmt::Display for Context { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.line, self.position) + } +} diff --git a/src/tokenizer/token.rs b/src/tokenizer/token.rs new file mode 100644 index 0000000..d86c67e --- /dev/null +++ b/src/tokenizer/token.rs @@ -0,0 +1,309 @@ +// © 2023 John Breaux +//! Defines the [Token] +//! +//! A [Token] represents all valid sequences of characters, +//! sorted by meaning + +use regex::Regex; +use std::{ + fmt::{Debug, Display}, + sync::OnceLock, +}; + +/// Implements regex matching functions on [`Token`] for each [`Type`], +/// and implements [`From<&str>`] for [`Token`] +macro_rules! regex_impl { +(<$t:lifetime> $type:ty {$( + $(#[$meta:meta])* + pub fn $func:ident (text: &str) -> Option { + regex!($out:path = $re:literal) + } +)*}) => { +impl<$t> $type { + /// Lexes a token only for the expected `variant` + /// + /// Warning: This bypasses precedence rules. Only use for specific patterns. + pub fn expect(text: &$t str, expected: Type) -> Result { + match expected {$( + $out => Self::$func(text), + )*}.ok_or(Error::UnexpectedToken { + expected, + got: Self::from(text).into(), + }) + } + $( + $(#[$meta])* + /// Tries to read [` + #[doc = stringify!($out)] + /// `] from `text` + pub fn $func(text: &$t str) -> Option { + static RE: OnceLock = OnceLock::new(); + let lexeme = RE.get_or_init(|| Regex::new($re).unwrap()) + .find(text)?.into(); + Some(Self { variant: $out, lexeme }) + })* +} +impl<$t> From<&$t str> for $type { + fn from (value: &$t str) -> Self { + $( + if let Some(token) = Self::$func(value) { + token + } else + )* + {todo!("Unexpected input: {value:#?}")} + } +} +}; +} + +use crate::Error; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Token<'text> { + /// The type of this token + variant: Type, + /// The sub[str]ing corresponding to this token + lexeme: &'text str, +} + +impl<'text> Token<'text> { + /// Returns the [Type] of this [Token] + pub fn variant(&self) -> Type { self.variant } + + /// Returns the Lexeme (originating string slice) of this token + pub fn lexeme(&self) -> &str { self.lexeme } + + /// Parses this [Token] into another type + pub fn parse(&self) -> Result::Err> + where F: std::str::FromStr { + self.lexeme.parse() + } + /// Returns whether the Lexeme is the expected [Type] + pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected } + + /// Returns the length of [Self::lexeme] in bytes. + pub fn len(&self) -> usize { self.lexeme.len() } + + /// Returns `true` if [Self::lexeme] has a length of zero bytes. + pub fn is_empty(&self) -> bool { self.lexeme.is_empty() } +} + +impl<'text> Debug for Token<'text> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_list().entry(&self.variant).entry(&self.lexeme).finish() + } +} + +impl<'text> Display for Token<'text> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.variant { + Type::Endl | Type::EndOfFile => write!(f, "{}", self.variant), + v => write!(f, "\"{}\" ({v})", self.lexeme), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Type { + /// contiguous whitespace, excluding newline + Space, + /// newline and contiguous whitespace + Endl, + /// A line-comment + Comment, + /// Jump label *definition* + Label, + /// Instructions + Insn, + /// Operand width is byte + ByteWidth, + /// Operand width is word + WordWidth, + /// Register mnemonic (i.e. `pc`, `r14`) + Register, + /// Marker for base-10 + RadixMarkerDec, + /// Marker for base-16 + RadixMarkerHex, + /// Marker for base-8 + RadixMarkerOct, + /// Marker for base-2 + RadixMarkerBin, + /// 1-4 hexadigit numbers only + Number, + /// Negative number marker + Minus, + /// post-increment mode marker + Plus, + /// Open-Indexed-Mode marker + LParen, + /// Close-Indexed-Mode marker + RParen, + /// Indirect mode marker + Indirect, + /// absolute address marker + Absolute, + /// immediate value marker + Immediate, + /// Valid identifier. Identifiers must start with a Latin alphabetic character or underline + Identifier, + /// Assembler directive + Directive, + /// Separator (comma) + Separator, + /// End of File marker + EndOfFile, +} + +regex_impl! {<'text> Token<'text> { + pub fn expect_space(text: &str) -> Option { + regex!(Type::Space = r"^[\s--\n]+") + } + pub fn expect_endl(text: &str) -> Option { + regex!(Type::Endl = r"^[\s]+") + } + pub fn expect_comment(text: &str) -> Option { + regex!(Type::Comment = r"^(;|//).*") + } + pub fn expect_label(text: &str) -> Option { + regex!(Type::Label = r"^:") + } + pub fn expect_insn(text: &str) -> Option { + regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)") + } + pub fn expect_byte_width(text: &str) -> Option { + regex!(Type::ByteWidth = r"(?i)^\.b") + } + pub fn expect_word_width(text: &str) -> Option { + regex!(Type::WordWidth = r"(?i)^\.w") + } + pub fn expect_register(text: &str) -> Option { + // old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)") + regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)") + } + pub fn expect_radix_marker_dec(text: &str) -> Option { + regex!(Type::RadixMarkerDec = r"(?i)^0d") + } + pub fn expect_radix_marker_hex(text: &str) -> Option { + regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)") + } + pub fn expect_radix_marker_oct(text: &str) -> Option { + regex!(Type::RadixMarkerOct = r"(?i)^0o") + } + pub fn expect_radix_marker_bin(text: &str) -> Option { + regex!(Type::RadixMarkerBin = r"(?i)^0b") + } + pub fn expect_number(text: &str) -> Option { + regex!(Type::Number = r"^+?[[:xdigit:]]{1,5}") + } + pub fn expect_minus(text: &str) -> Option { + regex!(Type::Minus = r"^-") + } + pub fn expect_plus(text: &str) -> Option { + regex!(Type::Plus = r"^\+") + } + pub fn expect_open_idx(text: &str) -> Option { + regex!(Type::LParen = r"^\(") + } + pub fn expect_close_idx(text: &str) -> Option { + regex!(Type::RParen = r"^\)") + } + pub fn expect_indrect(text: &str) -> Option { + regex!(Type::Indirect = r"^@") + } + pub fn expect_absolute(text: &str) -> Option { + regex!(Type::Absolute = r"^&") + } + pub fn expect_immediate(text: &str) -> Option { + regex!(Type::Immediate = r"^#") + } + pub fn expect_directive(text: &str) -> Option { + regex!(Type::Directive = r"^\.\w+( .*)?") + } + pub fn expect_identifier(text: &str) -> Option { + regex!(Type::Identifier = r"^[A-Za-z_]\w+") + } + pub fn expect_separator(text: &str) -> Option { + regex!(Type::Separator = r"^,") + } + pub fn expect_end_of_file(text: &str) -> Option { + regex!(Type::EndOfFile = r"^$") + } +}} + +impl Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Space => write!(f, "space"), + Self::Endl => write!(f, "newline"), + Self::Comment => write!(f, "comment"), + Self::Label => write!(f, "label definition"), + Self::Insn => write!(f, "instruction mnemonic"), + Self::ByteWidth => write!(f, "byte-width marker"), + Self::WordWidth => write!(f, "word-width marker"), + Self::Register => write!(f, "register mnemonic"), + Self::RadixMarkerDec => write!(f, "decimal radix marker"), + Self::RadixMarkerHex => write!(f, "hexadecimal radix marker"), + Self::RadixMarkerOct => write!(f, "octal radix marker"), + Self::RadixMarkerBin => write!(f, "binary radix marker"), + Self::Number => write!(f, "number"), + Self::Minus => write!(f, "minus sign"), + Self::Plus => write!(f, "plus sign"), + Self::LParen => write!(f, "left parenthesis"), + Self::RParen => write!(f, "right parenthesis"), + Self::Indirect => write!(f, "indirect mode marker"), + Self::Absolute => write!(f, "absolute mode marker"), + Self::Immediate => write!(f, "immediate mode marker"), + Self::Identifier => write!(f, "identifier"), + Self::Directive => write!(f, "directive"), + Self::Separator => write!(f, "comma"), + Self::EndOfFile => write!(f, "EOF"), + } + } +} + +/// Owned version of a token, which can outlive its parent buffer +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct OwnedToken { + /// The type of this token + variant: Type, + /// The sub[String] corresponding to this token + lexeme: String, +} + +impl Display for OwnedToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) } +} + +impl<'t> From<&'t OwnedToken> for Token<'t> { + fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } } +} + +impl From> for OwnedToken { + fn from(value: Token<'_>) -> Self { + let Token { variant, lexeme } = value; + OwnedToken { variant, lexeme: lexeme.to_owned() } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Types(Vec); + +impl> From for Types { + // TODO: Possibly bad. Check out in rust playground. + fn from(value: T) -> Self { Self(value.as_ref().to_owned()) } +} + +impl Display for Types { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (idx, t) in self.0.iter().enumerate() { + write!(f, "{t}")?; + match idx { + i if i < self.0.len() - 2 => write!(f, ", ")?, + i if i < self.0.len() - 1 => write!(f, " or ")?, + _ => (), + } + } + Ok(()) + } +} diff --git a/valid.asm b/valid.asm new file mode 100755 index 0000000..c819644 --- /dev/null +++ b/valid.asm @@ -0,0 +1,260 @@ +;© 2023 John Breaux +; examples of valid assembly +; + +_register_mode: +.define numbered r1 +mov r0, r1 +mov r1, r2 +mov r2, r3 +mov r3, r4 +mov r4, r5 +mov r5, r6 +mov r6, r7 +mov r7, r8 +mov r8, r9 +mov r9, r10 +mov r10, r11 +mov r11, r12 +mov r12, r13 +mov r13, r14 +mov r14, r15 + +.define special r2 +mov pc, r15 +mov sp, r15 +mov sr, r15 +mov cg, r15 + + +indirect_mode: +.define numbered r3 +mov @r0, r1 +mov @r1, r2 +;mov @r2, r3 +;mov @r3, r4 +mov @r4, r5 +mov @r5, r6 +mov @r6, r7 +mov @r7, r8 +mov @r8, r9 +mov @r9, r10 +mov @r10, r11 +mov @r11, r12 +mov @r12, r13 +mov @r13, r14 +mov @r14, r15 + +.define special r4 +mov @pc, r15 +mov @sp, r15 +;mov @sr, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] +;mov @cg, r15 + +indirect_pi_mode: +.define numbered r5 +;mov @r0+, r1 +mov @r1+, r2 +;mov @r2+, r3 +;mov @r3+, r4 +mov @r4+, r5 +mov @r5+, r6 +mov @r6+, r7 +mov @r7+, r8 +mov @r8+, r9 +mov @r9+, r10 +mov @r10+, r11 +mov @r11+, r12 +mov @r12+, r13 +mov @r13+, r14 +mov @r14+, r15 + +.define special r6 +;mov @pc+, r15 ; This is how mov-immediate is encoded, and is not valid +;mov @sp+, r15 ; pop r15 +;mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] +;mov @cg+, r15 + +indexed_mode: +.define numbered r7 +mov.b 10(r0), r1 +mov 10(r1), r2 +;mov 10(r2), r3 ; Invalid: cannot index relative to sr +;mov 10(r3), r4 ; Invalid: cannot index relative to cg +mov 10(r4), r5 +mov 10(r5), r6 +mov 10(r6), r7 +mov 10(r7), r8 +mov 10(r8), r9 +mov 10(r9), r10 +mov 10(r10), r11 +mov 10(r11), r12 +mov 10(r12), r13 +mov 10(r13), r14 +mov 10(r14), r15 + +.define special r8 +mov 10(pc), r15 +mov 10(sp), r15 +;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] +;mov 10(cg), r15 + +_immediate_mode: +.define numbered r9 +mov #beef, r0 +mov #beef, r1 +mov #beef, r2 +mov #beef, r3 +mov #beef, r4 +mov #beef, r5 +mov #beef, r6 +mov #beef, r7 +mov #beef, r8 +mov #beef, r9 +mov #beef, r10 +mov #beef, r11 +mov #beef, r12 +mov #beef, r13 +mov #beef, r14 +mov #beef, r15 + +.define special r10 +mov #beef, pc +mov #beef, sp +mov #beef, sr +mov #beef, cg + +; jmp _register_mode ; TODO: msp430_asm currently has no support for jump labels. +jmp 3fe +jmp -3fc +ret + +; Funky encodings +mov r6, r4 +mov @r6, r4 +mov @r6+, r4 +mov 0(r6), r4 +mov 4141(r6), r4 +mov #-1, r4 +mov #ffff, r4 +mov #0, r4 +mov #1, r4 +mov #2, r4 +mov #4, r4 +mov #8, r4 +mov r6, 0(r4) +mov @r6, 0(r4) +mov @r6+, 0(r4) +mov 0(r6), 0(r4) +mov 4141(r6), 0(r4) +mov #-1, 0(r4) +mov #ffff, 0(r4) +mov #0, 0(r4) +mov #1, 0(r4) +mov #2, 0(r4) +mov #4, 0(r4) +mov #8, 0(r4) +mov r6, 4141(r4) +mov @r6, 4141(r4) +mov @r6+, 4141(r4) +mov 0(r6), 4141(r4) +mov 4141(r6), 4141(r4) +mov #-1, 4141(r4) +mov #ffff, 4141(r4) +mov #0, 4141(r4) +mov #1, 4141(r4) +mov #2, 4141(r4) +mov #4, 4141(r4) +mov #8, 4141(r4) +mov r6, #0 +mov @r6, #0 +mov @r6+, #0 +mov 0(r6), #0 +mov 4141(r6), #0 +mov #-1, #0 +mov #ffff, #0 +mov #0, #0 +mov #1, #0 +mov #2, #0 +mov #4, #0 +mov #8, #0 +mov r6, #1 +mov @r6, #1 +mov @r6+, #1 +mov 0(r6), #1 +mov 4141(r6), #1 +mov #-1, #1 +mov #ffff, #1 +mov #0, #1 +mov #1, #1 +mov #2, #1 +mov #4, #1 +mov #8, #1 + +; Instruction exercise +; Jumps +jne 10 +jeq 10 +jlo 10 +jhs 10 +jn 10 +jge 10 +jl 10 +jmp 10 + +; Two-ops +mov r14, r15 +add r14, r15 +addc r14, r15 +subc r14, r15 +sub r14, r15 +cmp r14, r15 +dadd r14, r15 +bit r14, r15 +bic r14, r15 +bis r14, r15 +xor r14, r15 +and r14, 10(r15) + +; One-ops +rrc r15 +swpb r15 +rra r15 +sxt r15 +push r15 +call r15 +reti r15 + +; Jump aliases +jnc 10 +jnz 10 +jc 10 +jz 10 + +; "emulated" no-op instructions +ret +clrc +setc +clrz +setz +clrn +setn +dint +eint +nop + +; "emulated" one-op instructions +br r15 +pop r15 +rla r15 +rlc r15 +inv r15 +clr r15 +tst r15 +dec r15 +decd r15 +inc r15 +incd r15 +adc r15 +dadc r15 +sbc r15