diff --git a/Cargo.toml b/Cargo.toml index 81a52c6..c8246e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "msp430-asm" -version = "0.1.0" +version = "0.2.0" edition = "2021" authors = ["John Breaux"] publish = false diff --git a/src/assembler.rs b/src/assembler.rs new file mode 100644 index 0000000..e9273b9 --- /dev/null +++ b/src/assembler.rs @@ -0,0 +1,197 @@ +// © 2023 John Breaux +//! Traverses an AST, assembling instructions. +//! +//! [Assembler] carries *some* state + +use crate::parser::preamble::*; +use error::AssemblyError; +use std::collections::HashMap; +use std::path::Path; + +pub mod error; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum IdentType { + Word, + Jump, +} + +/// Takes in an AST's [Root], and outputs a sequence of bytes +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct Assembler { + out: Vec, + /// A map from Labels' [Identifier]s to their location in the binary + labels: HashMap, + /// A list of all referenced [Identifier]s in the binary, and their locations + identifiers: Vec<(usize, Identifier, IdentType)>, +} + +impl Assembler { + pub fn assemble(r: &Root) -> Result, AssemblyError> { + let mut out = Self::default(); + out.visit_root(r)?; + Ok(out.out) + } + pub fn load(&mut self, r: &Root) -> Result<(), AssemblyError> { self.visit_root(r) } + pub fn out(self) -> Vec { self.out } + + fn last_mut(&mut self) -> Result<&mut u16, AssemblyError> { self.out.last_mut().ok_or(AssemblyError::EmptyBuffer) } + fn push_default(&mut self) -> usize { + self.out.push(Default::default()); + self.out.len() - 1 + } +} + +impl Assembler { + /// Visits the [Root] node of a parse tree + fn visit_root(&mut self, r: &Root) -> Result<(), AssemblyError> { + // Visit the entire tree + for (num, line) in r.lines() { + self.visit_line(line).map_err(|e| e.ctx(r.file().unwrap_or(Path::new("stdin")), *num))?; + } + // Link identifiers + for (idx, id, id_type) in self.identifiers.iter() { + let Some(&num) = self.labels.get(id) else { return Err(AssemblyError::UnresolvedIdentifier(id.clone())) }; + let offset = (num as isize - *idx as isize) * 2; + *self.out.get_mut(*idx).expect("idx should be a valid index into out") |= match id_type { + IdentType::Word => offset as u16, + IdentType::Jump => JumpTarget::squish(offset)?, + }; + } + Ok(()) + } + + /// visit a [Line] + fn visit_line(&mut self, line: &Line) -> Result<(), AssemblyError> { + match line { + Line::Insn(insn) => self.visit_instruction(insn), + Line::Label(label) => self.visit_label(label), + Line::Directive(d) => self.visit_directive(d), + _ => Ok(()), + } + } + + /// Visits a [Directive] + fn visit_directive(&mut self, node: &Directive) -> Result<(), AssemblyError> { + match node { + Directive::Org(_) => todo!("{node}"), + Directive::Define(..) => (), + Directive::Include(r) => self.visit_root(r)?, + Directive::Byte(word) | Directive::Word(word) => self.out.push((*word).into()), + Directive::Bytes(words) | Directive::Words(words) => { + for word in words { + self.out.push((*word).into()); + } + } + Directive::String(s) => self.visit_string(s)?, + Directive::Strings(strs) => { + for s in strs { + self.visit_string(s)?; + } + } + }; + Ok(()) + } + + /// Visits a [Label] + fn visit_label(&mut self, node: &Label) -> Result<(), AssemblyError> { + // Register the label + match self.labels.insert(node.0.to_owned(), self.out.len()) { + Some(_) => Err(AssemblyError::RedefinedLabel(node.0.to_owned())), + _ => Ok(()), + } + } + + /// Visits an [Instruction] + fn visit_instruction(&mut self, insn: &Instruction) -> Result<(), AssemblyError> { + self.push_default(); + self.visit_opcode(insn.opcode())?; + self.visit_encoding(insn.encoding())?; + Ok(()) + } + + /// Visits an [Opcode] + fn visit_opcode(&mut self, node: &Opcode) -> Result<(), AssemblyError> { + *self.last_mut()? |= *node as u16; + Ok(()) + } + + /// Visits an [Encoding] + fn visit_encoding(&mut self, node: &Encoding) -> Result<(), AssemblyError> { + *self.last_mut()? |= node.word(); + match node { + Encoding::Single { dst, .. } => { + self.visit_primary_operand(dst)?; + } + Encoding::Jump { target } => { + self.visit_jump_target(target)?; + } + Encoding::Double { src, dst, .. } => { + self.visit_primary_operand(src)?; + self.visit_secondary_operand(dst)?; + } + } + Ok(()) + } + + /// Visits a [JumpTarget] + fn visit_jump_target(&mut self, node: &JumpTarget) -> Result<(), AssemblyError> { + match node { + JumpTarget::Number(num) => self.visit_number(num), + JumpTarget::Identifier(id) => { + self.visit_identifier(id, self.out.len() - 1, IdentType::Jump)?; + Ok(()) + } + } + } + + /// Visits a [SecondaryOperand] + fn visit_secondary_operand(&mut self, node: &SecondaryOperand) -> Result<(), AssemblyError> { + use SecondaryOperand as O; + if let O::Indexed(_, num) | O::Absolute(num) = node { + self.push_default(); + self.visit_number(num)?; + } + Ok(()) + } + + /// Visits a [PrimaryOperand] + fn visit_primary_operand(&mut self, node: &PrimaryOperand) -> Result<(), AssemblyError> { + use PrimaryOperand as O; + match node { + O::Indexed(_, num) | O::Absolute(num) | O::Immediate(num) => { + self.push_default(); + self.visit_number(num)?; + } + O::Relative(id) => { + let addr = self.push_default(); + self.visit_identifier(id, addr, IdentType::Word)?; + } + _ => (), + } + Ok(()) + } + + /// Visits a number and writes it into the last index + fn visit_number(&mut self, node: &Number) -> Result<(), AssemblyError> { + *self.last_mut()? |= u16::from(*node); + Ok(()) + } + + /// Visits a number and appends it to the output buffer + fn visit_string(&mut self, node: &str) -> Result<(), AssemblyError> { + for (idx, byte) in node.bytes().chain([0u8].into_iter()).enumerate() { + if idx % 2 == 0 { + self.push_default(); + } + *self.last_mut()? |= (byte as u16) << (8 * (idx % 2)); + } + Ok(()) + } + + /// Visits an [Identifier], and registers it to the identifier list + fn visit_identifier(&mut self, node: &Identifier, addr: usize, ty: IdentType) -> Result<(), AssemblyError> { + self.identifiers.push((addr, node.clone(), ty)); + Ok(()) + } +} diff --git a/src/assembler/error.rs b/src/assembler/error.rs new file mode 100644 index 0000000..08c4a34 --- /dev/null +++ b/src/assembler/error.rs @@ -0,0 +1,56 @@ +// © 2023 John Breauxs +use crate::parser::{error::ParseError, preamble::*}; +use std::{ + fmt::Display, + path::{Path, PathBuf}, +}; + +#[derive(Debug)] +pub enum AssemblyError { + UnresolvedIdentifier(Identifier), + RedefinedLabel(Identifier), + JumpedTooFar(Identifier, isize), + ParseError(ParseError), + // TODO: This, better' + Context(Box, PathBuf, usize), + EmptyBuffer, +} + +impl AssemblyError { + pub(super) fn ctx + ?Sized>(self, file: &P, line: usize) -> Self { + Self::Context(self.into(), file.as_ref().into(), line) + } +} + +impl From for AssemblyError { + fn from(value: ParseError) -> Self { Self::ParseError(value) } +} + +impl Display for AssemblyError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::UnresolvedIdentifier(id) => { + write!(f, "Identifier {id} is undefined, but referenced anyway.") + } + Self::RedefinedLabel(id) => { + write!(f, "Redefined label '{id}'.") + } + Self::JumpedTooFar(id, num) => { + write!(f, "Label '{id}' is too far away. ({num} is outside range -0x400..=0x3fe)") + } + Self::ParseError(e) => Display::fmt(e, f), + Self::Context(e, file, line) => write!(f, "{}:{line}:\n\t{e}", file.display()), + Self::EmptyBuffer => Display::fmt("Tried to get last element of output buffer, but buffer was empty", f), + } + } +} + +impl std::error::Error for AssemblyError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::ParseError(e) => Some(e), + Self::Context(e, ..) => Some(e), + _ => None, + } + } +} diff --git a/src/bin/msp430-asm/main.rs b/src/bin/msp430-asm/main.rs new file mode 100644 index 0000000..cf1f111 --- /dev/null +++ b/src/bin/msp430-asm/main.rs @@ -0,0 +1,61 @@ +//! Simple frontend for the assembler + +use msp430_asm::preamble::*; +use std::error::Error; +use std::io::Read; + +fn main() -> Result<(), Box> { + let mut repl = true; + for arg in std::env::args() { + match arg.as_str() { + "-" | "-f" | "--file" => repl = false, + _ => (), + } + } + + let mut buf = String::new(); + if repl { + let mut line = String::new(); + while let Ok(len) = std::io::stdin().read_line(&mut line) { + match len { + 0 => break, // No newline (reached EOF) + 1 => continue, // Line is empty + _ => { + // Try to parse this line in isolation (this restricts preprocessing) + match Parser::default().parse(&line) { + Ok(_) => { + buf += &line; + } + Err(error) => println!("{error}"), + } + line.clear(); + } + } + } + match Assembler::assemble(&Parser::default().parse(&buf)?) { + Err(error) => println!("{error}"), + Ok(out) => { + for word in out { + print!("{:04x} ", word.swap_bytes()) + } + } + } + + println!(); + } else { + std::io::stdin().lock().read_to_string(&mut buf)?; + let tree = Parser::default().parse(&buf); + match &tree { + Ok(tree) => { + //msp430_asm::linker::Printer::default().visit_root(tree); + for insn in msp430_asm::assembler::Assembler::assemble(tree)? { + print!("{:04x} ", insn.swap_bytes()) + } + println!(); + } + Err(error) => eprintln!("{error}"), + } + } + + Ok(()) +} diff --git a/src/error.rs b/src/error.rs index a024e8d..c6b46ee 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,126 +1,39 @@ // © 2023 John Breauxs //! Common error type for [msp430-asm](crate) errors +use super::*; use std::fmt::Display; -use super::{ - lexer::token::{OwnedToken, Types}, - *, -}; - -// TODO: Store more error context in error. for example: -// Error {ExpectationFailed{...}, WhileParsing(Register)} - #[derive(Debug)] pub enum Error { - /// Produced by [Parser](crate::parser::Parser::parse()) - ParseError(parser::root::Root, Box), - /// Any other error, tagged with [Context]. Created by [`Error::context()`] - Contextual(Context, Box), - /// Produced by [Token] when the input is entirely unexpected. - UnexpectedSymbol(String), - /// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type] - UnexpectedToken { - expected: Type, - got: OwnedToken, - }, - /// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the expected - /// [Types](Type) - AllExpectationsFailed { - expected: Types, - got: OwnedToken, - }, - /// Produced by - /// [Number](parser::preamble::Number)[::parse()](parser::parsable::Parsable::parse()) - /// when the parsed number contains digits too high for the specified radix - UnexpectedDigits(String, u32), - /// Produced by - /// [Opcode](parser::preamble::Opcode)[::parse()](parser::parsable::Parsable::parse()) - /// when the opcode passed lexing but did not match recognized opcodes. - /// - /// This should be interpreted as a failure in lexing. - UnrecognizedOpcode(String), - /// Produced by [Register](parser::preamble::Register) - /// when attempting to convert from a [str] that isn't a register (pc, sp, sr, cg, or r{number}) - NotARegister(String), - /// Produced by [Register](parser::preamble::Register) - /// when attempting to convert from a [u16] that isn't in the range 0-15 - RegisterTooHigh(u16), - /// Produced by - /// [SecondaryOperand](parser::preamble::SecondaryOperand) - /// when the joke "secondary immediate" form is out of range 0..=1 - FatSecondaryImmediate(isize), - /// Produced by [Number](parser::preamble::Number) when the number is too - /// wide to fit in 16 bits (outside the range `(-2^15) .. (2^16-1)` ) - NumberTooWide(isize), - /// Produced by [JumpTarget](parser::preamble::JumpTarget) - /// when the jump offset is outside the range (-0x3ff..0x3fc) - JumpedTooFar(isize), - /// Produced by [JumpTarget](parser::preamble::JumpTarget) - JumpedOdd(isize), - EndOfFile, + /// Produced by [lexer] + LexError(lexer::error::LexError), + /// Produced by [parser] + ParseError(parser::error::ParseError), + /// Produced by [assembler] + AssemblyError(assembler::error::AssemblyError), } -impl Error { - pub fn context(self, c: Context) -> Self { - match self { - Self::Contextual(..) => self, - _ => Self::Contextual(c, Box::new(self)), - } - } +impl Error {} - // Extracts the root of the error tree - pub fn bare(self) -> Self { - match self { - Self::Contextual(_, bare) => bare.bare(), - _ => self, - } - } +impl From for Error { + fn from(value: lexer::error::LexError) -> Self { Self::LexError(value) } +} - pub fn swap(mut self, other: Self) -> Self { - if let Self::Contextual(_, err) = &mut self { - _ = std::mem::replace(err.as_mut(), other) - } - self - } +impl From for Error { + fn from(value: parser::error::ParseError) -> Self { Self::ParseError(value) } +} - pub fn expected, T: Into>(expected: E, got: T) -> Self { - match expected.as_ref().len() { - 1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() }, - _ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() }, - } - } - - pub fn mask_expectation(mut self, expected: Type) -> Self { - match self { - Error::UnexpectedToken { got, .. } => self = Error::UnexpectedToken { expected, got }, - Error::AllExpectationsFailed { got, .. } => self = Error::UnexpectedToken { expected, got }, - Error::Contextual(context, err) => { - self = Error::Contextual(context, Box::new(err.mask_expectation(expected))) - } - _ => (), - } - self - } +impl From for Error { + fn from(value: assembler::error::AssemblyError) -> Self { Self::AssemblyError(value) } } impl Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Error::Contextual(ctx, error) => write!(f, "{ctx}: {error}"), - Error::ParseError(_, error) => write!(f, "{error}"), - Error::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""), - Error::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."), - Error::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."), - Error::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."), - Error::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"), - Error::NotARegister(reg) => write!(f, "{reg} is not a register"), - Error::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"), - Error::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"), - Error::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"), - Error::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"), - Error::JumpedOdd(num) => write!(f, "Jump targets only encode even numbers: {num} must not be odd."), - Error::EndOfFile => write!(f, "Unexpected end of file"), + Error::LexError(e) => Display::fmt(e, f), + Error::ParseError(e) => Display::fmt(e, f), + Error::AssemblyError(e) => Display::fmt(e, f), } } } @@ -128,8 +41,9 @@ impl Display for Error { impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { - Error::ParseError(_, e) => Some(e.as_ref()), - _ => None, + Error::LexError(e) => Some(e), + Error::ParseError(e) => Some(e), + Error::AssemblyError(e) => Some(e), } } } diff --git a/src/hash.rs b/src/hash.rs index 2ca7dce..2c0dcdd 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -1,14 +1,19 @@ // © 2023 John Breaux //! Convenience functions and traits for dealing with hashable data pub type Hash = u64; + +/// Calculates a hash using Rust hashmap's default hasher. +pub fn hash(hashable: T) -> Hash { + use std::hash::Hasher; + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + hashable.hash(&mut hasher); + hasher.finish() +} + pub trait FromHash: From { - /// Hashes anything that implements [type@Hash] using the [DefaultHasher](std::collections::hash_map::DefaultHasher) - fn hash(hashable: T) -> Hash { - use std::hash::Hasher; - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - hashable.hash(&mut hasher); - hasher.finish() - } + /// Hashes anything that implements [type@Hash] using the + /// [DefaultHasher](std::collections::hash_map::DefaultHasher) + fn hash(hashable: T) -> Hash { hash(hashable) } fn from_hash(hashable: T) -> Self where Self: Sized { Self::from(Self::hash(hashable)) diff --git a/src/lexer.rs b/src/lexer.rs index 568f272..318b833 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,28 +1,15 @@ // © 2023 John Breaux //! Iterates over [`&str`](str), producing [`Token`s](Token) -// Things we need: -// ✔ 1. Lexer/Tokenizer -// ✔ 1. Instructions -// ✔ 1. Instruction mnemonics /ad.../ -// ✔ 2. Byte/Word Mode Marker /(.\[bw\])?/ -// ✔ 2. Operands -// ✔ 1. Registers /(r1[0-5]|r[0-9])/ -// ✔ 2. Immediate Values /#/ -// ✔ 3. Absolute addresses /&/ -// ✔ 4. Numbers /[0-9A-Fa-f]+ -// ✔ 5. Jump Offsets: basically numbers /$?([+-]?[0-9A-Fa-f]{1,4})/ -// ✔ 3. Label definitions /(^.*):/ -// ✔ 4. Comments (may be useful for debugging) - pub mod context; +pub mod error; pub mod ignore; pub mod preprocessed; pub mod token; pub mod token_stream; -use crate::Error; use context::Context; +use error::LexError; use token::{Token, Type}; use token_stream::TokenStream; @@ -70,13 +57,13 @@ impl<'text> TokenStream<'text> for Tokenizer<'text> { // Tokenizer has access to the source buffer, and can implement expect and peek without cloning // itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and // registers are valid identifiers. - fn expect(&mut self, expected: Type) -> Result { + fn expect(&mut self, expected: Type) -> Result { let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?; self.count(&token); Ok(token) } fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) } - fn peek_expect(&mut self, expected: Type) -> Result { - Token::expect(&self.text[self.idx..], expected) + fn peek_expect(&mut self, expected: Type) -> Result { + Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context())) } } diff --git a/src/lexer/error.rs b/src/lexer/error.rs new file mode 100644 index 0000000..652dcf2 --- /dev/null +++ b/src/lexer/error.rs @@ -0,0 +1,68 @@ +// © 2023 John Breauxs +use super::{ + context::Context, + token::{OwnedToken, *}, +}; +use std::fmt::Display; + +#[derive(Debug)] +pub enum LexError { + /// Any other error, tagged with [Context]. Created by [`Error::context()`] + Contextual(Context, Box), + /// Produced by [Token] when the input is entirely unexpected. + UnexpectedSymbol(String), + /// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type] + UnexpectedToken { expected: Type, got: OwnedToken }, + /// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the + /// expected [Types](Type) + AllExpectationsFailed { expected: Types, got: OwnedToken }, +} + +impl LexError { + pub fn context(self, c: Context) -> Self { + match self { + Self::Contextual(..) => self, + _ => Self::Contextual(c, Box::new(self)), + } + } + + // Extracts the root of the error tree + pub fn bare(self) -> Self { + match self { + Self::Contextual(_, bare) => bare.bare(), + _ => self, + } + } + + pub fn expected, T: Into>(expected: E, got: T) -> Self { + match expected.as_ref().len() { + 1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() }, + _ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() }, + } + } + + pub fn mask_expectation(mut self, expected: Type) -> Self { + match self { + LexError::UnexpectedToken { got, .. } => self = LexError::UnexpectedToken { expected, got }, + LexError::AllExpectationsFailed { got, .. } => self = LexError::UnexpectedToken { expected, got }, + LexError::Contextual(context, err) => { + self = LexError::Contextual(context, Box::new(err.mask_expectation(expected))) + } + _ => (), + } + self + } +} + +impl Display for LexError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LexError::Contextual(ctx, error) => write!(f, "{ctx}: {error}"), + LexError::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""), + LexError::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."), + LexError::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."), + } + } +} + +impl std::error::Error for LexError {} diff --git a/src/lexer/ignore.rs b/src/lexer/ignore.rs index f9b4eb7..a9f89b5 100644 --- a/src/lexer/ignore.rs +++ b/src/lexer/ignore.rs @@ -38,7 +38,7 @@ impl<'t, T> TokenStream<'t> for Ignore<'t, T> where T: TokenStream<'t> { fn context(&self) -> Context { self.inner.context() } - fn expect(&mut self, expected: Type) -> Result { + fn expect(&mut self, expected: Type) -> Result { self.inner.allow(self.ignore); self.inner.expect(expected) } @@ -48,7 +48,7 @@ where T: TokenStream<'t> self.inner.peek() } - fn peek_expect(&mut self, expected: Type) -> Result { + fn peek_expect(&mut self, expected: Type) -> Result { self.inner.allow(self.ignore); self.inner.peek_expect(expected) } diff --git a/src/lexer/preprocessed.rs b/src/lexer/preprocessed.rs index d97ec96..4d4ab2e 100644 --- a/src/lexer/preprocessed.rs +++ b/src/lexer/preprocessed.rs @@ -1,6 +1,6 @@ // © 2023 John Breaux -//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band ".define" -//! rules +//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band +//! ".define" rules use super::*; use std::collections::{HashMap, VecDeque}; @@ -47,21 +47,27 @@ impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> { /// Gets a mutable reference to the inner [TokenStream] pub fn inner_mut(&mut self) -> &mut T { self.inner } - fn define(&mut self, token: Token<'t>) -> Result<(), Error> { + /// Preserve the next token in the queue + fn enqueue(&mut self, token: Token<'t>) -> Token<'t> { + self.queue.push_back(token); + token + } + + /// Process .define directives in the preprocessor + fn define(&mut self, token: Token<'t>) -> Result<(), LexError> { if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) { return Ok(()); } // Tokenize the subdocument self.allow(Type::Directive); - - self.require(Type::Space).map_err(|e| e.context(self.context()))?; + self.allow(Type::Space); let Some(k) = self.inner.next() else { return Ok(()) }; if !self.sub_types.contains(&k.variant()) { self.sub_types.push(k.variant()); }; - self.require(Type::Space).map_err(|e| e.context(self.context()))?; + self.allow(Type::Space); let mut replacement = vec![]; loop { @@ -71,7 +77,10 @@ impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> { // ignore comments self.inner.next(); } - _ => replacement.push(self.inner.next().unwrap()), + _ => { + let next = self.inner.next().unwrap(); + replacement.push(self.enqueue(next)); + } } } self.sub_table.insert(k, replacement); @@ -92,10 +101,10 @@ where T: TokenStream<'t> { fn context(&self) -> Context { self.inner.context() } - fn expect(&mut self, expected: Type) -> Result { + fn expect(&mut self, expected: Type) -> Result { match self.queue.front() { Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()), - Some(&token) => Err(Error::expected([expected], token).context(self.context())), + Some(&token) => Err(LexError::expected([expected], token).context(self.context())), None => { // Only resolve defines when expecting, otherwise you'll run into issues. if let Ok(next) = self.inner.expect(expected) { @@ -109,10 +118,9 @@ where T: TokenStream<'t> } return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) }; } - Err(Error::expected([expected], self.inner.peek())) + Err(LexError::expected([expected], self.inner.peek()).context(self.context())) } } - // TODO: preprocessor step } fn peek(&mut self) -> Self::Item { @@ -130,10 +138,10 @@ where T: TokenStream<'t> } } - fn peek_expect(&mut self, expected: Type) -> Result { + fn peek_expect(&mut self, expected: Type) -> Result { match self.queue.front() { Some(&token) if token.is_variant(expected) => Ok(token), - Some(&token) => Err(Error::expected([expected], token).context(self.context())), + Some(&token) => Err(LexError::expected([expected], token).context(self.context())), None => { if let Ok(next) = self.inner.peek_expect(expected) { return Ok(next); @@ -146,7 +154,7 @@ where T: TokenStream<'t> self.peek_expect(expected) }; } - Err(Error::expected([expected], self.inner.peek())) + Err(LexError::expected([expected], self.inner.peek())) } } } diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 60934b4..f3e2dbe 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -1,7 +1,9 @@ // © 2023 John Breaux -//! A [Token] is a [semantically tagged](Type) sequence of characters +//! A [Token] is a [semantically tagged](Type) sequence of characters. +//! +//! Token, and the tokenizer, intend to copy as little as possible. -use crate::Error; +use super::error::LexError; use regex::Regex; use std::{ fmt::{Debug, Display}, @@ -21,10 +23,10 @@ impl<$t> $type { /// Lexes a token only for the expected `variant` /// /// Warning: This bypasses precedence rules. Only use for specific patterns. - pub fn expect(text: &$t str, expected: Type) -> Result { + pub fn expect(text: &$t str, expected: Type) -> Result { match expected {$( $out => Self::$func(text), - )*}.ok_or(Error::UnexpectedToken { + )*}.ok_or(LexError::UnexpectedToken { expected, got: Self::from(text).into(), }) @@ -137,6 +139,10 @@ pub enum Type { LParen, /// Close-Indexed-Mode marker RParen, + /// Open Square Bracket + LBracket, + /// Closed Square Bracket + RBracket, /// Indirect mode marker Indirect, /// absolute address marker @@ -145,6 +151,8 @@ pub enum Type { Immediate, /// Valid identifier. Identifiers must start with a Latin alphabetic character or underline Identifier, + /// A string, encased in "quotes" + String, /// Assembler directive Directive, /// Separator (comma) @@ -209,6 +217,12 @@ regex_impl! {<'text> Token<'text> { pub fn expect_r_paren(text: &str) -> Option { regex!(Type::RParen = r"^\)") } + pub fn expect_l_bracket(text: &str) -> Option { + regex!(Type::LBracket = r"^\[") + } + pub fn expect_r_bracket(text: &str) -> Option { + regex!(Type::RBracket = r"^]") + } pub fn expect_indrect(text: &str) -> Option { regex!(Type::Indirect = r"^@") } @@ -218,8 +232,11 @@ regex_impl! {<'text> Token<'text> { pub fn expect_immediate(text: &str) -> Option { regex!(Type::Immediate = r"^#") } + pub fn expect_string(text: &str) -> Option { + regex!(Type::String = r#"^"[^"]*""#) + } pub fn expect_directive(text: &str) -> Option { - regex!(Type::Directive = r"^\.\S+") + regex!(Type::Directive = r"^\.\w+") } pub fn expect_identifier(text: &str) -> Option { regex!(Type::Identifier = r"^[A-Za-z_]\w*") @@ -255,10 +272,13 @@ impl Display for Type { Self::Plus => Display::fmt("plus sign", f), Self::LParen => Display::fmt("left parenthesis", f), Self::RParen => Display::fmt("right parenthesis", f), + Self::LBracket => Display::fmt("left bracket", f), + Self::RBracket => Display::fmt("right bracket", f), Self::Indirect => Display::fmt("indirect", f), Self::Absolute => Display::fmt("absolute", f), Self::Immediate => Display::fmt("immediate", f), Self::Identifier => Display::fmt("identifier", f), + Self::String => Display::fmt("string", f), Self::Directive => Display::fmt("directive", f), Self::Separator => Display::fmt("comma", f), Self::EndOfFile => Display::fmt("EOF", f), diff --git a/src/lexer/token_stream.rs b/src/lexer/token_stream.rs index 1039eee..7fe2df0 100644 --- a/src/lexer/token_stream.rs +++ b/src/lexer/token_stream.rs @@ -11,12 +11,14 @@ pub trait TokenStream<'text>: Iterator> + std::fmt::Debug { fn context(&self) -> Context; /// Creates an iterator that skips [Type::Space] in the input + #[inline] fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self> where Self: Sized { Ignore::new(variant, self) } /// Creates a [TokenStream] that performs live substitution of the input + #[inline] fn preprocessed(&'text mut self) -> Preprocessed<'text, Self> where Self: Sized { Preprocessed::new(self) @@ -26,51 +28,57 @@ pub trait TokenStream<'text>: Iterator> + std::fmt::Debug { fn peek(&mut self) -> Self::Item; /// Returns the next [Token] if it is of the expected [Type], without advancing - fn peek_expect(&mut self, expected: Type) -> Result; + fn peek_expect(&mut self, expected: Type) -> Result; /// Consumes and returns a [Token] if it is the expected [Type] /// /// Otherwise, does not consume a [Token] - fn expect(&mut self, expected: Type) -> Result; + fn expect(&mut self, expected: Type) -> Result; /// Ignores a [Token] of the expected [Type], propegating errors. - fn require(&mut self, expected: Type) -> Result<(), Error> { self.expect(expected).map(|_| ()) } + #[inline] + fn require(&mut self, expected: Type) -> Result<(), LexError> { self.expect(expected).map(|_| ()) } /// Ignores a [Token] of the expected [Type], discarding errors. + #[inline] fn allow(&mut self, expected: Type) { let _ = self.expect(expected); } /// Runs a function on each - fn any_of(&mut self, f: fn(&mut Self, Type) -> Result, expected: T) -> Result + fn any_of(&mut self, f: fn(&mut Self, Type) -> Result, expected: T) -> Result where T: AsRef<[Type]> { for &expected in expected.as_ref() { match f(self, expected).map_err(|e| e.bare()) { Ok(t) => return Ok(t), - Err(Error::UnexpectedToken { .. }) => continue, + Err(LexError::UnexpectedToken { .. }) => continue, Err(e) => return Err(e.context(self.context())), } } - Err(Error::expected(expected, self.peek()).context(self.context())) + Err(LexError::expected(expected, self.peek()).context(self.context())) } /// Returns the next [Token] if it is of the expected [Types](Type), without advancing - fn peek_expect_any_of(&mut self, expected: T) -> Result + #[inline] + fn peek_expect_any_of(&mut self, expected: T) -> Result where T: AsRef<[Type]> { self.any_of(Self::peek_expect, expected) } /// Consumes and returns a [Token] if it matches any of the expected [Types](Type) /// /// Otherwise, does not consume a [Token] - fn expect_any_of(&mut self, expected: T) -> Result + #[inline] + fn expect_any_of(&mut self, expected: T) -> Result where T: AsRef<[Type]> { self.any_of(Self::expect, expected) } /// Ignores a [Token] of any expected [Type], discarding errors. + #[inline] fn allow_any_of(&mut self, expected: T) where T: AsRef<[Type]> { let _ = self.expect_any_of(expected); } /// Ignores a [Token] of any expected [Type], propegating errors. - fn require_any_of(&mut self, expected: T) -> Result<(), Error> + #[inline] + fn require_any_of(&mut self, expected: T) -> Result<(), LexError> where T: AsRef<[Type]> { self.any_of(Self::require, expected) } diff --git a/src/lib.rs b/src/lib.rs index 3784dce..e879dd1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,7 @@ //! │ └─ Encoding::Single //! │ ├─ Width //! │ └─ PrimaryOperand +//! │ ├─ Identifier // Label, for relative-addressed data/code //! │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. //! │ └─ Number // Index, absolute address or immediate value. //! ├─ Line @@ -35,9 +36,11 @@ //! │ └─ Encoding::Double //! │ ├─ Width //! │ ├─ PrimaryOperand +//! │ ├─ Identifier // Label, for relative-addressed data/code //! │ │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. //! │ │ └─ Number // Index, absolute address or immediate value. //! │ └─ SecondaryOperand +//! │ ├─ Identifier // Label, for relative-addressed data/code //! │ ├─ Register // Direct or indexed register //! │ └─ Number // Index or absolute address //! ├─ Line @@ -45,6 +48,7 @@ //! │ ├─ Opcode //! │ └─ Encoding::Jump //! │ └─ JumpTarget +//! │ ├─ Identifier // Label //! │ └─ Number // Even, PC-relative offset in range (-1024..=1022) //! └─ Line //! └─ EndOfFile @@ -53,21 +57,20 @@ pub mod preamble { //! Common imports for msp430-asm use super::*; + pub use assembler::Assembler; pub use error::Error; - pub use hash::{FromHash, Hash}; pub use lexer::{ context::Context, token::{Token, Type}, token_stream::TokenStream, Tokenizer, }; - pub use linker::{Linker, Visitor}; pub use parser::Parser; } use preamble::*; pub mod error; -pub mod hash; + +pub mod assembler; pub mod lexer; -pub mod linker; pub mod parser; diff --git a/src/linker.rs b/src/linker.rs deleted file mode 100644 index 8dab54c..0000000 --- a/src/linker.rs +++ /dev/null @@ -1,20 +0,0 @@ -// © 2023 John Breaux -/// TODO: tree traversal and label resolution -use crate::parser::preamble::*; -pub trait Visitor { - // visit_node for all nodes - fn visit_register(&mut self, r: &Register) -> T; - fn visit_number(&mut self, n: &Number) -> T; - fn visit_width(&mut self, w: &Width) -> T; - fn visit_primary_operand(&mut self, p: &PrimaryOperand) -> T; - fn visit_secondary_operand(&mut self, d: &SecondaryOperand) -> T; - fn visit_jump_target(&mut self, t: &JumpTarget) -> T; - fn visit_encoding(&mut self, e: &Encoding) -> T; - fn visit_opcode(&mut self, o: &Opcode) -> T; - fn visit_instruction(&mut self, i: &Instruction) -> T; - fn visit_directive(&mut self, d: &Directive) -> T; - // the most important one: resolve identifiers - fn visit_identifier(&mut self, i: &Identifier) -> T; -} -/// TODO: [Linker] -pub struct Linker; diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 2a16f8a..0000000 --- a/src/main.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! Simple frontend for the assembler - -use msp430_asm::preamble::*; -use std::io::Read; - -fn main() -> Result<(), Error> { - let mut repl = true; - for arg in std::env::args() { - match arg.as_str() { - "-" | "-f" | "--file" => repl = false, - _ => (), - } - } - - let mut buf = String::new(); - if repl { - while let Ok(len) = std::io::stdin().read_line(&mut buf) { - match len { - 0 => break, // No newline (reached EOF) - 1 => { - // create a token steam - match Parser::default().parse(&buf) { - Ok(tree) => println!("{tree:x}"), - Err(error) => println!("{error}"), - } - buf.clear(); // Reuse buf's allocation - continue; - } // Line is empty - _ => (), - } - } - } else { - std::io::stdin().lock().read_to_string(&mut buf).map_err(|_| Error::EndOfFile)?; - let tree = Parser::default().parse(&buf); - match &tree { - Ok(tree) => println!("{tree:x}"), - Err(error) => eprintln!("{error}"), - } - } - - Ok(()) -} diff --git a/src/parser.rs b/src/parser.rs index 4fc8e14..6fe8293 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,15 +1,20 @@ // © 2023 John Breaux //! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root) -use crate::{Error, Hash, TokenStream, Type}; -use std::fmt::{Debug, Display, LowerHex}; +use crate::{TokenStream, Type}; +use error::ParseError; +use preamble::*; +use std::{ + fmt::{Debug, Display}, + path::Path, +}; pub mod preamble { //! All the different AST node types use super::*; // Traits pub use parsable::Parsable; - + // Nodes pub use comment::Comment; pub use directive::Directive; pub use identifier::Identifier; @@ -24,173 +29,38 @@ pub mod preamble { pub use label::Label; pub use line::Line; pub use root::Root; + // Error + pub use error::ParseError; } -use preamble::*; pub mod parsable; pub mod comment; pub mod directive; +pub mod error; pub mod identifier; pub mod instruction; pub mod label; - -pub mod line { - // © 2023 John Breaux - //! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same document line will be treated as if they took up multiple [`Line`s](Line). - //! - //! A line contains one of: - //! - [`Label`] - //! - [`Instruction`] - //! - [`Directive`] - //! - [`Comment`] - //! - [Nothing](Line::Empty) - use super::*; - - /// A line contains any one of: - /// - [`Label`] (definition) - /// - [`Instruction`] - /// - [`Directive`] - /// - [`Comment`] - /// - Nothing at all - #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] - pub enum Line { - Empty, - Insn(Instruction), - Comment(Comment), - Directive(Directive), - Label(Label), // TODO: Label resolution - EndOfFile, // Expected end of file - } - - impl Parsable for Line { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - Ok( - match stream - .peek_expect_any_of([ - Type::Endl, - Type::Insn, - Type::Comment, - Type::Directive, - Type::Identifier, - Type::EndOfFile, - ])? - .variant() - { - Type::Endl => { - stream.next(); - Self::Empty - } - Type::Insn => Self::Insn(Instruction::parse(p, stream)?), - Type::Comment => Self::Comment(Comment::parse(p, stream)?), - Type::Directive => Self::Directive(Directive::parse(p, stream)?), - Type::Identifier => Self::Label(Label::parse(p, stream)?), - Type::EndOfFile => { - stream.next(); - Self::EndOfFile - } - _ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"), - }, - ) - } - } - impl Display for Line { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Empty => writeln!(f, "\n"), - Self::Label(arg0) => Display::fmt(arg0, f), - Self::Insn(arg0) => Display::fmt(arg0, f), - Self::Directive(arg0) => Display::fmt(arg0, f), - Self::Comment(arg0) => Display::fmt(arg0, f), - Self::EndOfFile => write!(f, "; End of file."), - } - } - } - impl LowerHex for Line { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Line::Insn(arg0) => LowerHex::fmt(arg0, f), - _ => Ok(()), - } - } - } -} - -pub mod root { - // © 2023 John Breaux - use super::*; - - /// Contains the entire AST - #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] - pub struct Root(pub Vec); - - // TODO: Get data out of ParseTree - // TODO: Maybe implement some sort of follower - impl Parsable for Root { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - let mut lines = vec![]; - loop { - match Line::parse(p, stream) { - Ok(Line::EndOfFile) => break, - Ok(line) => lines.push(line), - Err(e) => { - return Err(Error::ParseError(Self(lines), Box::new(e))); - } - } - } - Ok(Root(lines)) - } - } - - impl Display for Root { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for line in self.0.iter() { - f.pad(&format!("{line} "))?; - } - Ok(()) - } - } - impl LowerHex for Root { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for line in self.0.iter() { - LowerHex::fmt(line, f)?; - } - Ok(()) - } - } - impl Debug for Root { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for line in self.0.iter() { - Display::fmt(line, f)?; - Debug::fmt(line, f)?; - } - Ok(()) - } - } -} - -/// The type for [Parser] callbacks -pub type EmitComment = Box; -pub type DefineLabel = Box Result<(), Error>>; +pub mod line; +pub mod root; pub struct Parser { radix: u32, - // TODO: callbacks for emitted token sequences?! - on_label: Option, - on_comment: Option, } impl Parser { - pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result { + pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result { Root::parse(&self, &mut stream.ignore(Type::Space)) } - pub fn parse(self, input: &T) -> Result + pub fn parse(self, input: &T) -> Result where T: AsRef + ?Sized { Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space)) } - pub fn parse_one(self, input: &T) -> Result + pub fn parse_file

(self, path: &P) -> Result + where P: AsRef + ?Sized { + self.parse(&std::fs::read_to_string(path.as_ref())?).map(|r| r.set_file(path.as_ref().into())) + } + pub fn parse_one(self, input: &T) -> Result where T: AsRef + ?Sized { Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space)) } @@ -198,24 +68,10 @@ impl Parser { /// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number] /// conversion pub fn radix(mut self, radix: u32) { self.radix = radix; } - - /// Inform the caller of a new identifier definition - pub fn define_label(&mut self, l: &Identifier) -> Result<(), Error> { - match self.on_label.as_mut() { - Some(f) => f(l), - _ => Ok(()), - } - } - /// Inform the caller of an identifier being used - pub fn emit_comment(&mut self, d: &str) { - if let Some(f) = self.on_comment.as_mut() { - f(d) - } - } } impl Default for Parser { - fn default() -> Self { Self { radix: 16, on_label: None, on_comment: None } } + fn default() -> Self { Self { radix: 16 } } } impl Debug for Parser { diff --git a/src/parser/comment.rs b/src/parser/comment.rs index edefaa8..c3fc86c 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -5,10 +5,9 @@ use super::*; pub struct Comment(pub String); impl Parsable for Comment { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { - let token = stream.expect(Type::Comment)?; - Ok(Self(token.lexeme().to_string())) + Ok(Self(stream.expect(Type::Comment)?.lexeme().to_string())) } } impl Display for Comment { diff --git a/src/parser/directive.rs b/src/parser/directive.rs index 9f4d169..24fb9e5 100644 --- a/src/parser/directive.rs +++ b/src/parser/directive.rs @@ -1,32 +1,90 @@ // © 2023 John Breaux //! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and //! [Linker](crate::Linker) + +use std::path::PathBuf; + use super::*; -use crate::hash::FromHash; +use crate::lexer::token::OwnedToken; + +// TODO: Parse each kind of *postprocessor* directive into an AST node +// - .org 8000: Directive::Org { base: Number } +// - .define ident tt... Directive::Define { } ; should this be in the AST? How do I put this +// in the AST? +// - .include "" Directive::Include { Root } ; should this include an entire AST in +// the AST? +// - .word 8000 Directive::Word(Number) +// - .words dead beef Directive::Words(Vec|Vec) +// - .byte ff Directive::Byte(Number) +// - .bytes de, ad, be, ef Directive::Bytes(Vec) +// - .string "string" Directive::String(String) +// - .ascii "string" Directive::Ascii(Vec) #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Directive(pub Hash, pub String); - -impl Directive { - fn str(mut self, s: S) -> Self { - self.1 = s.to_string(); - self - } +pub enum Directive { + Org(Number), + Define(Vec), + Include(Root), // TODO: create and parse an entire AST, and stick it in Include + Byte(Number), + Bytes(Vec), + Word(Number), + Words(Vec), + String(String), + Strings(Vec), } -impl From for Directive { - fn from(value: Hash) -> Self { Self(value, String::new()) } -} +impl Directive {} impl Parsable for Directive { - fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { - // expect a directive let d = stream.expect(Type::Directive)?; - // send the directive to the listener - Ok(Self::from_hash(d.lexeme()).str(d.lexeme())) + // match on the directive + Ok(match d.lexeme() { + ".org" => Self::Org(Number::parse(p, stream)?), + ".define" => { + let mut tokens = vec![]; + loop { + match stream.peek().variant() { + Type::Endl | Type::EndOfFile => break, + _ => tokens.push(stream.next().unwrap_or_default().into()), + } + } + Self::Define(tokens) + } + ".include" => { + // Try to get path + Self::Include(Parser::default().parse_file(&PathBuf::parse(p, stream)?)?) + } + ".byte" => Self::Byte(Number::parse(p, stream)?), + ".bytes" => Self::Bytes(Vec::::parse(p, stream)?), + ".word" => Self::Word(Number::parse(p, stream)?), + ".words" => Self::Words(Vec::::parse(p, stream)?), + ".string" => Self::String(String::parse(p, stream)?), + ".strings" => Self::Strings(Vec::::parse(p, stream)?), + e => Err(ParseError::UnrecognizedDirective(e.into()))?, + }) } } + impl Display for Directive { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.1) } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Directive::Org(num) => write!(f, ".org {num}"), + Directive::Define(rep) => { + write!(f, ".define")?; + for t in rep { + write!(f, " {t}")?; + } + Ok(()) + } + Directive::Include(r) => Display::fmt(r, f), + Directive::Byte(num) => write!(f, ".org {num}"), + Directive::Bytes(v) => write!(f, ".bytes {v:?}"), + Directive::Word(num) => write!(f, ".org {num}"), + Directive::Words(v) => write!(f, ".bytes {v:?}"), + Directive::String(s) => write!(f, ".string \"{s}\""), + Directive::Strings(s) => write!(f, ".string \"{s:?}\""), + } + } } diff --git a/src/parser/error.rs b/src/parser/error.rs new file mode 100644 index 0000000..0b3bf88 --- /dev/null +++ b/src/parser/error.rs @@ -0,0 +1,74 @@ +// © 2023 John Breauxs +use super::*; +use crate::lexer::error::LexError; + +#[derive(Debug)] +pub enum ParseError { + /// Produced by [lexer](crate::lexer) + LexError(LexError), + /// Produced by [std::io] + IoError(std::io::Error), + /// Produced by [Number](Number)[::parse()](Parsable::parse()) + /// when the parsed number contains digits too high for the specified radix + UnexpectedDigits(String, u32), + /// Produced by [Opcode](Opcode)[::parse()](Parsable::parse()) + /// when the opcode passed lexing but did not match recognized opcodes. + /// + /// This is always a lexer bug. + UnrecognizedOpcode(String), + /// Produced by [Directive](Directive)[::parse()](Parsable::parse()) + /// when an unknown or unimplemented directive is used + UnrecognizedDirective(String), + /// Produced by [Register] when attempting to convert from a [str] + /// that isn't a register (pc, sp, sr, cg, or r{number}) + NotARegister(String), + /// Produced by [Register] when the r{number} is outside the range 0-15 + RegisterTooHigh(u16), + /// Produced by [SecondaryOperand] when the joke "secondary immediate" form + /// is out of range 0..=1 + FatSecondaryImmediate(isize), + /// Produced by a [Number] too wide to fit in 16 bits + /// (outside the range `(-2^15) .. (2^16-1)` ) + NumberTooWide(isize), + /// Produced by [JumpTarget](parser::preamble::JumpTarget) + /// when the jump offset is outside the range (-0x3ff..0x3fc) + JumpedTooFar(isize), + /// Produced by [JumpTarget](parser::preamble::JumpTarget) + JumpedOdd(isize), +} + +impl From for ParseError { + fn from(value: LexError) -> Self { Self::LexError(value) } +} +impl From for ParseError { + fn from(value: std::io::Error) -> Self { Self::IoError(value) } +} + +impl Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::LexError(error) => Display::fmt(error, f), + Self::IoError(error) => Display::fmt(error, f), + Self::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."), + Self::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"), + Self::UnrecognizedDirective(d) => write!(f, "{d} is not a directive."), + Self::NotARegister(reg) => write!(f, "{reg} is not a register"), + Self::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"), + Self::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"), + Self::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"), + Self::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"), + Self::JumpedOdd(num) => { + write!(f, "Jump targets only encode even numbers: {num} must not be odd.") + } + } + } +} +impl std::error::Error for ParseError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::LexError(e) => Some(e), + Self::IoError(e) => Some(e), + _ => None, + } + } +} diff --git a/src/parser/identifier.rs b/src/parser/identifier.rs index dd88815..3caefbb 100644 --- a/src/parser/identifier.rs +++ b/src/parser/identifier.rs @@ -1,22 +1,18 @@ // © 2023 John Breaux -//! An [Identifier] stores the name of an identifier +//! An [Identifier] stores the hash of an identifier use super::*; +use std::rc::Rc; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Identifier { - Hash(Hash), - Str(String), +pub struct Identifier { + str: Rc, } impl Identifier { - fn str>(s: T) -> Self { Self::Str(s.as_ref().into()) } -} - -impl From for Identifier { - fn from(value: Hash) -> Self { Self::Hash(value) } + fn str>(s: T) -> Self { Self { str: s.as_ref().to_owned().into() } } } impl Parsable for Identifier { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { let token = stream.expect(Type::Identifier)?; match token.variant() { @@ -26,10 +22,5 @@ impl Parsable for Identifier { } } impl Display for Identifier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Identifier::Hash(_) => Display::fmt("Unresolved", f), - Identifier::Str(s) => Display::fmt(s, f), - } - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.str, f) } } diff --git a/src/parser/instruction.rs b/src/parser/instruction.rs index a61828b..38bfaa5 100644 --- a/src/parser/instruction.rs +++ b/src/parser/instruction.rs @@ -15,7 +15,7 @@ pub mod encoding; pub mod opcode; /// Contains the [Opcode] and [Encoding] information for a single msp430 instruction -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Instruction(Opcode, Encoding); impl Instruction { @@ -24,11 +24,11 @@ impl Instruction { /// Gets the Instruction as a [u16] pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() } /// Gets the [extension words] - pub fn ext_words(&self) -> (Option, Option) { self.1.extwords() } + pub fn ext_words(&self) -> [Option; 2] { self.1.extwords() } } impl Parsable for Instruction { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where Self: Sized, T: crate::TokenStream<'text>, @@ -50,17 +50,3 @@ impl From for u16 { impl Display for Instruction { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) } } - -impl LowerHex for Instruction { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let (word, (ext_src, ext_dst)) = (self.word(), self.ext_words()); - write!(f, "{:04x} ", word.swap_bytes())?; - if let Some(e) = ext_src { - write!(f, "{:04x} ", e.swap_bytes())? - } - if let Some(e) = ext_dst { - write!(f, "{:04x} ", e.swap_bytes())? - } - Ok(()) - } -} diff --git a/src/parser/instruction/encoding.rs b/src/parser/instruction/encoding.rs index 1b65f95..1a44938 100644 --- a/src/parser/instruction/encoding.rs +++ b/src/parser/instruction/encoding.rs @@ -32,7 +32,7 @@ use encoding_parser::EncodingParser; /// // Print the Encoding /// println!("{encoding}"); /// ``` -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Encoding { Single { width: Width, dst: PrimaryOperand }, Jump { target: JumpTarget }, @@ -52,20 +52,20 @@ impl Encoding { pub fn reflexive() -> ReflexiveBuilder { Default::default() } /// pub fn word(&self) -> u16 { - match *self { - Encoding::Single { width, dst } => u16::from(width) | dst.mode() | dst.register() as u16, - Encoding::Jump { target } => target.word(), + match self { + Encoding::Single { width, dst } => u16::from(*width) | dst.mode() | dst.register() as u16, + Encoding::Jump { target } => target.word().unwrap_or_default(), Encoding::Double { width, src, dst } => { - u16::from(width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8) + u16::from(*width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8) } } } /// Returns extwords for instruction - pub fn extwords(&self) -> (Option, Option) { + pub fn extwords(&self) -> [Option; 2] { match self { - Encoding::Double { src, dst, .. } => (src.ext_word(), dst.ext_word()), - Encoding::Single { dst, .. } => (dst.ext_word(), None), - Encoding::Jump { .. } => (None, None), + Encoding::Double { src, dst, .. } => [src.ext_word(), dst.ext_word()], + Encoding::Single { dst, .. } => [dst.ext_word(), None], + Encoding::Jump { .. } => [None, None], } } } diff --git a/src/parser/instruction/encoding/builder.rs b/src/parser/instruction/encoding/builder.rs index 4c62910..63080d5 100644 --- a/src/parser/instruction/encoding/builder.rs +++ b/src/parser/instruction/encoding/builder.rs @@ -17,7 +17,7 @@ impl SingleBuilder { self } /// Build - pub fn end(&self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } } + pub fn end(self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } } } #[derive(Debug, Default)] @@ -29,7 +29,7 @@ impl JumpBuilder { self.target = Some(target); self } - pub fn end(&self) -> EncodingParser { EncodingParser::Jump { target: self.target } } + pub fn end(self) -> EncodingParser { EncodingParser::Jump { target: self.target } } } #[derive(Debug, Default)] @@ -54,7 +54,7 @@ impl DoubleBuilder { self.dst = Some(dst); self } - pub fn end(&self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } } + pub fn end(self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } } } #[derive(Debug, Default)] @@ -72,5 +72,5 @@ impl ReflexiveBuilder { self.reg = Some(reg); self } - pub fn end(&self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } } + pub fn end(self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } } } diff --git a/src/parser/instruction/encoding/encoding_parser.rs b/src/parser/instruction/encoding/encoding_parser.rs index 71b57e1..c59515d 100644 --- a/src/parser/instruction/encoding/encoding_parser.rs +++ b/src/parser/instruction/encoding/encoding_parser.rs @@ -2,7 +2,7 @@ //! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`] use super::*; -#[derive(Debug)] +#[derive(Clone, Debug)] /// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream] pub enum EncodingParser { Single { width: Option, dst: Option }, @@ -10,29 +10,27 @@ pub enum EncodingParser { Double { width: Option, src: Option, dst: Option }, Reflexive { width: Option, reg: Option }, } + impl EncodingParser { /// Constructs an [Encoding] from this [EncodingParser], filling holes /// with the tokenstream - pub fn parse<'text, T>(&self, p: &Parser, stream: &mut T) -> Result + pub fn parse<'text, T>(self, p: &Parser, stream: &mut T) -> Result where T: crate::TokenStream<'text> { Ok(match self { - Self::Single { width, dst } => { - let width = width.unwrap_or_else(|| Width::parse_or_default(p, stream)); - let dst = if let Some(dst) = dst { *dst } else { PrimaryOperand::parse(p, stream)? }; - Encoding::Single { width, dst } - } + Self::Single { width, dst } => Encoding::Single { + width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)), + dst: if let Some(dst) = dst { dst } else { PrimaryOperand::parse(p, stream)? }, + }, Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) }, - Self::Double { width, src, dst } => { - let width = width.unwrap_or_else(|| Width::parse_or_default(p, stream)); - let src = if let Some(src) = src { *src } else { PrimaryOperand::parse(p, stream)? }; - let dst = if let Some(dst) = dst { *dst } else { SecondaryOperand::parse(p, stream)? }; - - Encoding::Double { width, src, dst } - } + Self::Double { width, src, dst } => Encoding::Double { + width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)), + src: if let Some(src) = src { src } else { PrimaryOperand::parse(p, stream)? }, + dst: if let Some(dst) = dst { dst } else { SecondaryOperand::parse(p, stream)? }, + }, Self::Reflexive { width, reg } => { let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default()); - let reg = if let Some(reg) = reg { *reg } else { SecondaryOperand::parse(p, stream)? }; - Encoding::Double { width, src: reg.into(), dst: reg } + let reg = if let Some(reg) = reg { reg } else { SecondaryOperand::parse(p, stream)? }; + Encoding::Double { width, src: reg.clone().into(), dst: reg } } }) } diff --git a/src/parser/instruction/encoding/jump_target.rs b/src/parser/instruction/encoding/jump_target.rs index dcb729d..2d9b731 100644 --- a/src/parser/instruction/encoding/jump_target.rs +++ b/src/parser/instruction/encoding/jump_target.rs @@ -5,37 +5,54 @@ use super::*; /// Contains the [pc-relative offset](Number) or [label](Identifier) /// for a [Jump](Encoding::Jump) [Instruction] -// TODO: Allow identifiers in JumpTarget -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct JumpTarget(Number); +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum JumpTarget { + Number(Number), + Identifier(Identifier), +} impl JumpTarget { - pub fn word(&self) -> u16 { u16::from(self.0) & 0x3ff } + pub fn word(&self) -> Option { + match self { + JumpTarget::Number(n) => Some(u16::from(*n) & 0x3ff), + JumpTarget::Identifier(_) => None, + } + } + pub fn squish(value: isize) -> Result { + match value { + i if i % 2 != 0 => Err(ParseError::JumpedOdd(i))?, + i if (-1024..=1022).contains(&(i - 2)) => Ok(((value >> 1) - 1) as u16 & 0x3ff), + i => Err(ParseError::JumpedTooFar(i))?, + } + } + pub fn unsquish(value: u16) -> isize { (value as isize + 1) << 1 } } impl Parsable for JumpTarget { - /// - Identifier - /// - Number - /// - Negative - /// - Number - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + // - Identifier + // - Number + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where T: crate::TokenStream<'text> { // Try to parse a number - let target = Number::parse(p, stream)?; - match target.into() { - i if i % 2 != 0 => Err(Error::JumpedOdd(i).context(stream.context()))?, - i if (-1024..=1022).contains(&(i - 2)) => Ok(Self((target - 2) >> 1)), - i => Err(Error::JumpedTooFar(i).context(stream.context()))?, + if let Some(num) = Number::try_parse(p, stream)? { + Self::try_from(num) + } else { + // if that fails, try to parse an identifier instead + Ok(Self::Identifier(Identifier::parse(p, stream)?)) } } } -impl From for u16 { - fn from(value: JumpTarget) -> Self { value.0.into() } +impl TryFrom for JumpTarget { + type Error = ParseError; + fn try_from(value: Number) -> Result { Ok(Self::Number(Self::squish(value.into())?.into())) } } impl Display for JumpTarget { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", (1 + isize::from(self.0)) << 1) + match self { + Self::Number(num) => write!(f, "{:x}", Self::unsquish(u16::from(*num))), + Self::Identifier(id) => write!(f, "{id}"), + } } } diff --git a/src/parser/instruction/encoding/number.rs b/src/parser/instruction/encoding/number.rs index ed4c3f3..849e0f9 100644 --- a/src/parser/instruction/encoding/number.rs +++ b/src/parser/instruction/encoding/number.rs @@ -2,56 +2,54 @@ //! A [`Number`] represents a 16-bit signed or unsigned word use super::*; -// TODO: Allow identifiers/expressions in place of numbers -// - Dependency inversion in TokenStream to allow swapping the parser mid-parse? -// - Oh my god, not relying on std::iter::Iterator allows for so many more parsing options - #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Number(isize, u32); // (value, radix) impl Parsable for Number { // A number is: - // RadixMarker[Hex|Oct|Bin]? - // - Number - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + // [Minus|Plus]? RadixMarker[Hex|Dec|Oct|Bin]? Number + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { - use Type::*; + use Type as Ty; // The number is negative when it begins with a Minus, but Plus is also acceptable. - let negative = stream.expect_any_of([Minus, Plus]).map_or(false, |t| t.is_variant(Minus)); + let negative = stream.expect_any_of([Ty::Minus, Ty::Plus]).map_or(false, |t| t.is_variant(Ty::Minus)); let radix = match stream - .expect_any_of([RadixMarkerHex, RadixMarkerDec, RadixMarkerOct, RadixMarkerBin]) + .expect_any_of([Ty::RadixMarkerHex, Ty::RadixMarkerDec, Ty::RadixMarkerOct, Ty::RadixMarkerBin]) .ok() .map(|t| t.variant()) { - Some(RadixMarkerHex) => 16, - Some(RadixMarkerDec) => 10, - Some(RadixMarkerOct) => 8, - Some(RadixMarkerBin) => 2, + Some(Ty::RadixMarkerHex) => 16, + Some(Ty::RadixMarkerDec) => 10, + Some(Ty::RadixMarkerOct) => 8, + Some(Ty::RadixMarkerBin) => 2, _ => p.radix, }; - let number = stream.expect(Number)?; + let number = stream.expect(Ty::Number)?; + // TODO: Reintroduce error context let number = isize::from_str_radix(number.lexeme(), radix) - .map_err(|_| Error::UnexpectedDigits(number.lexeme().into(), radix).context(stream.context()))? + .map_err(|_| ParseError::UnexpectedDigits(number.lexeme().into(), radix))? * if negative { -1 } else { 1 }; // Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit) Ok(Self( - if (-0x8000..0x10000).contains(&number) { - number - } else { - Err(Error::NumberTooWide(number).context(stream.context()))? - }, + if (-0x8000..0x10000).contains(&number) { number } else { Err(ParseError::NumberTooWide(number))? }, radix, )) } } + +impl From for Number { + fn from(value: isize) -> Self { Self(value, 16) } +} + impl From for isize { fn from(value: Number) -> Self { value.0 as Self } } -impl From for i32 { - fn from(value: Number) -> Self { value.0 as Self } + +impl From for Number { + fn from(value: u16) -> Self { Self(value as isize, 16) } } + impl From for u16 { - /// Converts this type from the input type. fn from(value: Number) -> Self { value.0 as Self } } @@ -72,5 +70,12 @@ impl std::ops::Shr for Number { } impl std::fmt::Display for Number { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:x}", self.0) } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.1 { + 2 => std::fmt::Binary::fmt(&self.0, f), + 8 => std::fmt::Octal::fmt(&self.0, f), + 16 => std::fmt::LowerHex::fmt(&self.0, f), + _ => std::fmt::Display::fmt(&self.0, f), + } + } } diff --git a/src/parser/instruction/encoding/primary_operand.rs b/src/parser/instruction/encoding/primary_operand.rs index 02009a0..8742e92 100644 --- a/src/parser/instruction/encoding/primary_operand.rs +++ b/src/parser/instruction/encoding/primary_operand.rs @@ -5,12 +5,13 @@ use super::*; /// Contains the first [Register], addressing mode, and Extension Word for a /// [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction] -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum PrimaryOperand { Direct(Register), Indirect(Register), PostInc(Register), Indexed(Register, Number), + Relative(Identifier), Absolute(Number), Immediate(Number), Four, @@ -27,7 +28,7 @@ impl PrimaryOperand { use PrimaryOperand::*; match self { Direct(_) | Zero => 0, - Indexed(_, _) | Absolute(_) | One => 1 << 4, + Indexed(_, _) | Relative(_) | Absolute(_) | One => 1 << 4, Indirect(_) | Two | Four => 2 << 4, PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4, } @@ -37,7 +38,7 @@ impl PrimaryOperand { use PrimaryOperand::*; match self { Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r, - Immediate(_) => Register::pc, + Immediate(_) | Relative(_) => Register::pc, Absolute(_) | Four | Eight => Register::sr, Zero | One | Two | MinusOne => Register::cg, } @@ -53,21 +54,8 @@ impl PrimaryOperand { } impl Parsable for PrimaryOperand { - // - Register - // - Indirect - // - Register - // - PostInc? - // - Number - // - OpenIdx - // - Register - // - CloseIdx - // - Absolute - // - Number - // - Immediate - // - Number - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where T: crate::TokenStream<'text> { - use PrimaryOperand::*; // Try parsing as Register (Direct) if let Some(r) = Register::try_parse(p, stream)? { return Ok(Self::Direct(r)); @@ -79,33 +67,43 @@ impl Parsable for PrimaryOperand { stream.expect(Type::RParen)?; return Ok(Self::Indexed(reg, idx)); } + // Try parsing as Identifier (Relative, label mode) + if let Some(id) = Identifier::try_parse(p, stream)? { + return Ok(Self::Relative(id)); + } // Or directly match any of the valid prefix markers - // Type::Register and Type::Number are included here to make error messages clearer. + // Register, Number, and Identifier are included here to make error messages clearer. // their inclusion will cause a negligible slowdown when the next token is not a prefix marker // (a failure condition) - let token = - stream.expect_any_of([Type::Indirect, Type::Absolute, Type::Immediate, Type::Register, Type::Number])?; + let token = stream.expect_any_of([ + Type::Indirect, + Type::Absolute, + Type::Immediate, + Type::Register, + Type::Number, + Type::Identifier, + ])?; Ok(match token.variant() { Type::Indirect => { let reg = Register::parse(p, stream)?; match stream.expect(Type::Plus) { - Ok(_) => PostInc(reg), - Err(_) => Indirect(reg), + Ok(_) => Self::PostInc(reg), + Err(_) => Self::Indirect(reg), } } - Type::Absolute => Absolute(Number::parse(p, stream)?), + Type::Absolute => Self::Absolute(Number::parse(p, stream)?), Type::Immediate => { let number = Number::parse(p, stream)?; match number.into() { // There are two representations for the all-ones constant, since Number preserves // signedness. - -1 | 0xffff => MinusOne, - 0 => Zero, - 1 => One, - 2 => Two, - 4 => Four, - 8 => Eight, - _ => Immediate(number), + -1_isize | 0xffff => Self::MinusOne, + 0 => Self::Zero, + 1 => Self::One, + 2 => Self::Two, + 4 => Self::Four, + 8 => Self::Eight, + _ => Self::Immediate(number), } } _ => unreachable!("Token {token:?} passed expectation but failed match!"), @@ -119,6 +117,7 @@ impl From for PrimaryOperand { SecondaryOperand::Direct(r) => Self::Direct(r), SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n), SecondaryOperand::Absolute(n) => Self::Absolute(n), + SecondaryOperand::Relative(id) => Self::Relative(id), SecondaryOperand::Zero => Self::Zero, SecondaryOperand::One => Self::One, } @@ -133,6 +132,7 @@ impl Display for PrimaryOperand { Self::Indirect(r) => write!(f, "@{r}"), Self::PostInc(r) => write!(f, "@{r}+"), Self::Indexed(r, idx) => write!(f, "{idx}({r})"), + Self::Relative(id) => Display::fmt(id, f), Self::Absolute(n) => write!(f, "&{n}"), Self::Immediate(n) => write!(f, "#{n}"), Self::Four => Display::fmt("#4", f), diff --git a/src/parser/instruction/encoding/register.rs b/src/parser/instruction/encoding/register.rs index e2e1715..7c4c1aa 100644 --- a/src/parser/instruction/encoding/register.rs +++ b/src/parser/instruction/encoding/register.rs @@ -30,14 +30,9 @@ pub enum Register { } impl Parsable for Register { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result where T: crate::TokenStream<'text> { - stream - .expect(Type::Register) - .map_err(|e: Error| e.context(stream.context()))? - .lexeme() - .parse() - .map_err(|e: Error| e.context(stream.context())) + stream.expect(Type::Register)?.lexeme().parse() } } @@ -46,7 +41,7 @@ impl From for u16 { } impl TryFrom for Register { - type Error = Error; + type Error = ParseError; fn try_from(value: u16) -> Result { use Register::*; Ok(match value { @@ -66,13 +61,13 @@ impl TryFrom for Register { 13 => r13, 14 => r14, 15 => r15, - _ => return Err(Error::RegisterTooHigh(value)), + _ => return Err(ParseError::RegisterTooHigh(value)), }) } } impl FromStr for Register { - type Err = Error; + type Err = ParseError; fn from_str(s: &str) -> Result { use Register::*; @@ -81,7 +76,9 @@ impl FromStr for Register { "sp" => Ok(sp), "sr" => Ok(sr), "cg" => Ok(cg), - _ => str::parse::(&s[1..]).map_err(|_| -> Self::Err { Error::NotARegister(s.into()) })?.try_into(), + _ => { + str::parse::(&s[1..]).map_err(|_| -> Self::Err { ParseError::NotARegister(s.into()) })?.try_into() + } } } } diff --git a/src/parser/instruction/encoding/secondary_operand.rs b/src/parser/instruction/encoding/secondary_operand.rs index f0aab77..59ca3b1 100644 --- a/src/parser/instruction/encoding/secondary_operand.rs +++ b/src/parser/instruction/encoding/secondary_operand.rs @@ -4,28 +4,31 @@ use super::*; /// The destination of a [Double](Encoding::Double) -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum SecondaryOperand { Direct(Register), Indexed(Register, Number), + Relative(Identifier), Absolute(Number), // Joke encodings? Zero, One, } +use SecondaryOperand as So; + impl SecondaryOperand { pub fn mode(&self) -> u16 { - use SecondaryOperand::*; match self { - Direct(_) | Zero => 0, - Indexed(_, _) | Absolute(_) | One => 1 << 7, + So::Direct(_) | So::Zero => 0, + So::Indexed(_, _) | So::Relative(_) | So::Absolute(_) | So::One => 1 << 7, } } pub fn register(&self) -> Register { use SecondaryOperand::*; match self { Direct(r) | Indexed(r, _) => *r, + Relative(_) => Register::pc, Absolute(_) => Register::sr, Zero | One => Register::cg, } @@ -51,7 +54,7 @@ impl Parsable for SecondaryOperand { // - Number // - Immediate // - Number == 0, 1 - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where T: crate::TokenStream<'text> { use SecondaryOperand::*; stream.allow(Type::Separator); @@ -66,16 +69,22 @@ impl Parsable for SecondaryOperand { stream.expect(Type::RParen)?; return Ok(Self::Indexed(reg, idx)); } - // Type::Register and Type::Number are included here to make error messages clearer. + // Try parsing as Identifier (Relative, label mode) + if let Some(id) = Identifier::try_parse(p, stream)? { + return Ok(Self::Relative(id)); + } + // Register, Number, and Identifier are included here to make error messages clearer. // their inclusion will cause a negligible slowdown when the next token is not a prefix marker // (a failure condition) but should not match a token - let token = stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number])?; + let token = + stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number, Type::Identifier])?; Ok(match token.variant() { Type::Absolute => Absolute(Number::parse(p, stream)?), + // TODO: Reintroduce error context Type::Immediate => match Number::parse(p, stream)?.into() { 0 => Zero, 1 => One, - n => Err(Error::FatSecondaryImmediate(n as isize).context(stream.context()))?, + n => Err(ParseError::FatSecondaryImmediate(n))?, }, _ => unreachable!("Token {token:?} passed expectation but failed match!"), }) @@ -87,6 +96,7 @@ impl Display for SecondaryOperand { match self { Self::Direct(r) => Display::fmt(r, f), Self::Indexed(r, idx) => write!(f, "{idx}({r})"), + Self::Relative(id) => Display::fmt(id, f), Self::Absolute(n) => write!(f, "&{n}"), Self::Zero => Display::fmt("#0", f), Self::One => Display::fmt("#1", f), diff --git a/src/parser/instruction/encoding/width.rs b/src/parser/instruction/encoding/width.rs index 0fd5974..3a4924e 100644 --- a/src/parser/instruction/encoding/width.rs +++ b/src/parser/instruction/encoding/width.rs @@ -10,7 +10,7 @@ use super::*; pub struct Width(bool); impl Parsable for Width { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else { return Ok(Self(false)); diff --git a/src/parser/instruction/opcode.rs b/src/parser/instruction/opcode.rs index 4790db7..8100e60 100644 --- a/src/parser/instruction/opcode.rs +++ b/src/parser/instruction/opcode.rs @@ -71,195 +71,189 @@ pub enum Opcode { } impl Opcode { - pub fn takes_width(&self) -> bool { - use Opcode::*; - match self { - Rrc => true, - Swpb => false, - Rra => true, - Sxt => false, - Push => true, - Call | Reti => false, - Jnz | Jz | Jnc | Jc | Jn | Jge | Jl | Jmp => false, - Mov | Add | Addc | Subc | Sub | Cmp | Dadd | Bit | Bic | Bis | Xor | And => true, - Nop | Pop | Br | Ret | Clrc | Setc | Clrz | Setz | Clrn | Setn | Dint | Eint | Rla | Rlc | Inv | Clr - | Tst | Dec | Decd | Inc | Incd | Adc | Dadc | Sbc => true, - } - } /// Resolve an Opcode into an [Opcode] and an [EncodingParser] pub fn resolve(self) -> (Opcode, EncodingParser) { use super::Encoding as Enc; - use Opcode::*; - use Register::*; + use Register as Reg; use {PrimaryOperand as Src, SecondaryOperand as Dst}; match self { - Rrc | Swpb | Rra | Sxt | Push | Call | Reti => (self, Enc::single().end()), - Jnz | Jz | Jnc | Jc | Jn | Jge | Jl | Jmp => (self, Enc::jump().end()), - Mov | Add | Addc | Subc | Sub | Cmp | Dadd | Bit | Bic | Bis | Xor | And => (self, Enc::double().end()), - Nop => (Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()), - Pop => (Mov, Enc::double().src(Src::PostInc(sp)).end()), - Br => (Mov, Enc::double().dst(Dst::Direct(pc)).end()), - Ret => (Mov, Enc::double().src(Src::PostInc(sp)).dst(Dst::Direct(pc)).end()), - Clrc => (Bic, Enc::double().src(Src::One).dst(Dst::Direct(sr)).end()), - Setc => (Bis, Enc::double().src(Src::One).dst(Dst::Direct(sr)).end()), - Clrz => (Bic, Enc::double().src(Src::Two).dst(Dst::Direct(sr)).end()), - Setz => (Bis, Enc::double().src(Src::Two).dst(Dst::Direct(sr)).end()), - Clrn => (Bic, Enc::double().src(Src::Four).dst(Dst::Direct(sr)).end()), - Setn => (Bis, Enc::double().src(Src::Four).dst(Dst::Direct(sr)).end()), - Dint => (Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(sr)).end()), - Eint => (Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(sr)).end()), - Rla => (Add, Enc::reflexive().end()), - Rlc => (Addc, Enc::reflexive().end()), - Inv => (Xor, Enc::double().src(Src::MinusOne).end()), - Clr => (Mov, Enc::double().src(Src::Zero).end()), - Tst => (Cmp, Enc::double().src(Src::Zero).end()), - Dec => (Sub, Enc::double().src(Src::One).end()), - Decd => (Sub, Enc::double().src(Src::Two).end()), - Inc => (Add, Enc::double().src(Src::One).end()), - Incd => (Add, Enc::double().src(Src::Two).end()), - Adc => (Addc, Enc::double().src(Src::Zero).end()), - Dadc => (Dadd, Enc::double().src(Src::Zero).end()), - Sbc => (Subc, Enc::double().src(Src::Zero).end()), + Self::Rrc | Self::Swpb | Self::Rra | Self::Sxt | Self::Push | Self::Call | Self::Reti => { + (self, Enc::single().end()) + } + Self::Jnz | Self::Jz | Self::Jnc | Self::Jc | Self::Jn | Self::Jge | Self::Jl | Self::Jmp => { + (self, Enc::jump().end()) + } + Self::Mov + | Self::Add + | Self::Addc + | Self::Subc + | Self::Sub + | Self::Cmp + | Self::Dadd + | Self::Bit + | Self::Bic + | Self::Bis + | Self::Xor + | Self::And => (self, Enc::double().end()), + Self::Nop => (Self::Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()), + Self::Pop => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).end()), + Self::Br => (Self::Mov, Enc::double().dst(Dst::Direct(Reg::pc)).end()), + Self::Ret => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).dst(Dst::Direct(Reg::pc)).end()), + Self::Clrc => (Self::Bic, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()), + Self::Setc => (Self::Bis, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()), + Self::Clrz => (Self::Bic, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()), + Self::Setz => (Self::Bis, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()), + Self::Clrn => (Self::Bic, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()), + Self::Setn => (Self::Bis, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()), + Self::Dint => (Self::Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()), + Self::Eint => (Self::Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()), + Self::Rla => (Self::Add, Enc::reflexive().end()), + Self::Rlc => (Self::Addc, Enc::reflexive().end()), + Self::Inv => (Self::Xor, Enc::double().src(Src::MinusOne).end()), + Self::Clr => (Self::Mov, Enc::double().src(Src::Zero).end()), + Self::Tst => (Self::Cmp, Enc::double().src(Src::Zero).end()), + Self::Dec => (Self::Sub, Enc::double().src(Src::One).end()), + Self::Decd => (Self::Sub, Enc::double().src(Src::Two).end()), + Self::Inc => (Self::Add, Enc::double().src(Src::One).end()), + Self::Incd => (Self::Add, Enc::double().src(Src::Two).end()), + Self::Adc => (Self::Addc, Enc::double().src(Src::Zero).end()), + Self::Dadc => (Self::Dadd, Enc::double().src(Src::Zero).end()), + Self::Sbc => (Self::Subc, Enc::double().src(Src::Zero).end()), } } } impl Parsable for Opcode { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { - stream.expect(Type::Insn)?.parse().map_err(|e: Error| e.context(stream.context())) + // TODO: Reintroduce error context + stream.expect(Type::Insn)?.parse() } } impl FromStr for Opcode { - type Err = Error; + type Err = ParseError; fn from_str(s: &str) -> Result { - use Opcode::*; //TODO: Reduce allocations here? let s = s.to_ascii_lowercase(); Ok(match s.as_str() { - "rrc" => Rrc, - "swpb" => Swpb, - "rra" => Rra, - "sxt" => Sxt, - "push" => Push, - "call" => Call, - "reti" => Reti, + "rrc" => Self::Rrc, + "swpb" => Self::Swpb, + "rra" => Self::Rra, + "sxt" => Self::Sxt, + "push" => Self::Push, + "call" => Self::Call, + "reti" => Self::Reti, - "jne" | "jnz" => Jnz, - "jeq" | "jz" => Jz, - "jnc" | "jlo" => Jnc, - "jc" | "jhs" => Jc, - "jn" => Jn, - "jge" => Jge, - "jl" => Jl, - "jmp" => Jmp, + "jne" | "jnz" => Self::Jnz, + "jeq" | "jz" => Self::Jz, + "jnc" | "jlo" => Self::Jnc, + "jc" | "jhs" => Self::Jc, + "jn" => Self::Jn, + "jge" => Self::Jge, + "jl" => Self::Jl, + "jmp" => Self::Jmp, - "mov" => Mov, - "add" => Add, - "addc" => Addc, - "subc" => Subc, - "sub" => Sub, - "cmp" => Cmp, - "dadd" => Dadd, - "bit" => Bit, - "bic" => Bic, - "bis" => Bis, - "xor" => Xor, - "and" => And, + "mov" => Self::Mov, + "add" => Self::Add, + "addc" => Self::Addc, + "subc" => Self::Subc, + "sub" => Self::Sub, + "cmp" => Self::Cmp, + "dadd" => Self::Dadd, + "bit" => Self::Bit, + "bic" => Self::Bic, + "bis" => Self::Bis, + "xor" => Self::Xor, + "and" => Self::And, - "nop" => Nop, - "pop" => Pop, - "br" => Br, - "ret" => Ret, - "clrc" => Clrc, - "setc" => Setc, - "clrz" => Clrz, - "setz" => Setz, - "clrn" => Clrn, - "setn" => Setn, - "dint" => Dint, - "eint" => Eint, - "rla" => Rla, - "rlc" => Rlc, - "inv" => Inv, - "clr" => Clr, - "tst" => Tst, - "dec" => Dec, - "decd" => Decd, - "inc" => Inc, - "incd" => Incd, - "adc" => Adc, - "dadc" => Dadc, - "sbc" => Sbc, - _ => Err(Error::UnrecognizedOpcode(s))?, + "nop" => Self::Nop, + "pop" => Self::Pop, + "br" => Self::Br, + "ret" => Self::Ret, + "clrc" => Self::Clrc, + "setc" => Self::Setc, + "clrz" => Self::Clrz, + "setz" => Self::Setz, + "clrn" => Self::Clrn, + "setn" => Self::Setn, + "dint" => Self::Dint, + "eint" => Self::Eint, + "rla" => Self::Rla, + "rlc" => Self::Rlc, + "inv" => Self::Inv, + "clr" => Self::Clr, + "tst" => Self::Tst, + "dec" => Self::Dec, + "decd" => Self::Decd, + "inc" => Self::Inc, + "incd" => Self::Incd, + "adc" => Self::Adc, + "dadc" => Self::Dadc, + "sbc" => Self::Sbc, + _ => Err(ParseError::UnrecognizedOpcode(s))?, }) } } impl Display for Opcode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use Opcode::*; write!( f, "{}", match self { - Nop => "nop", - Pop => "pop", - Br => "br", - Ret => "ret", - Clrc => "clrc", - Setc => "setc", - Clrz => "clrz", - Setz => "setz", - Clrn => "clrn", - Setn => "setn", - Dint => "dint", - Eint => "eint", - Rla => "rla", - Rlc => "rlc", - Inv => "inv", - Clr => "clr", - Tst => "tst", - Dec => "dec", - Decd => "decd", - Inc => "inc", - Incd => "incd", - Adc => "adc", - Dadc => "dadc", - Sbc => "sbc", - Rrc => "rrc", - Swpb => "swpb", - Rra => "rra", - Sxt => "sxt", - Push => "push", - Call => "call", - Reti => "reti", - Jnz => "jnz", - Jz => "jz", - Jnc => "jnc", - Jc => "jc", - Jn => "jn", - Jge => "jge", - Jl => "jl", - Jmp => "jmp", - Mov => "mov", - Add => "add", - Addc => "addc", - Subc => "subc", - Sub => "sub", - Cmp => "cmp", - Dadd => "dadd", - Bit => "bit", - Bic => "bic", - Bis => "bis", - Xor => "xor", - And => "and", + Self::Nop => "nop", + Self::Pop => "pop", + Self::Br => "br", + Self::Ret => "ret", + Self::Clrc => "clrc", + Self::Setc => "setc", + Self::Clrz => "clrz", + Self::Setz => "setz", + Self::Clrn => "clrn", + Self::Setn => "setn", + Self::Dint => "dint", + Self::Eint => "eint", + Self::Rla => "rla", + Self::Rlc => "rlc", + Self::Inv => "inv", + Self::Clr => "clr", + Self::Tst => "tst", + Self::Dec => "dec", + Self::Decd => "decd", + Self::Inc => "inc", + Self::Incd => "incd", + Self::Adc => "adc", + Self::Dadc => "dadc", + Self::Sbc => "sbc", + Self::Rrc => "rrc", + Self::Swpb => "swpb", + Self::Rra => "rra", + Self::Sxt => "sxt", + Self::Push => "push", + Self::Call => "call", + Self::Reti => "reti", + Self::Jnz => "jnz", + Self::Jz => "jz", + Self::Jnc => "jnc", + Self::Jc => "jc", + Self::Jn => "jn", + Self::Jge => "jge", + Self::Jl => "jl", + Self::Jmp => "jmp", + Self::Mov => "mov", + Self::Add => "add", + Self::Addc => "addc", + Self::Subc => "subc", + Self::Sub => "sub", + Self::Cmp => "cmp", + Self::Dadd => "dadd", + Self::Bit => "bit", + Self::Bic => "bic", + Self::Bis => "bis", + Self::Xor => "xor", + Self::And => "and", } ) } } - -impl LowerHex for Opcode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:04x}", *self as u16) } -} diff --git a/src/parser/label.rs b/src/parser/label.rs index f7e50e4..0b4c484 100644 --- a/src/parser/label.rs +++ b/src/parser/label.rs @@ -7,13 +7,12 @@ use super::*; pub struct Label(pub Identifier); impl Parsable for Label { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where T: TokenStream<'text> { - Ok(Self( - Identifier::parse(p, stream) - .and_then(|t| stream.require(Type::Label).and(Ok(t))) - .map_err(|e| e.context(stream.context()))?, - )) + Ok(Self(Identifier::parse(p, stream).and_then(|t| { + stream.require(Type::Label)?; + Ok(t) + })?)) } } diff --git a/src/parser/line.rs b/src/parser/line.rs new file mode 100644 index 0000000..3e1f7e2 --- /dev/null +++ b/src/parser/line.rs @@ -0,0 +1,72 @@ +// © 2023 John Breaux +//! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same +//! document line will be treated as if they took up multiple [`Line`s](Line). +//! +//! A line contains one of: +//! - [`Label`] +//! - [`Instruction`] +//! - [`Directive`] +//! - [`Comment`] +//! - [Nothing](Line::Empty) +use super::*; + +/// A line contains any one of: +/// - [`Label`] (definition) +/// - [`Instruction`] +/// - [`Directive`] +/// - [`Comment`] +/// - Nothing at all +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Line { + Empty, + Insn(Instruction), + Comment(Comment), + Directive(Directive), + Label(Label), + EndOfFile, // Expected end of file +} + +impl Parsable for Line { + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + Ok( + match stream + .peek_expect_any_of([ + Type::Endl, + Type::Insn, + Type::Comment, + Type::Directive, + Type::Identifier, + Type::EndOfFile, + ])? + .variant() + { + Type::Endl => { + stream.next(); + Self::Empty + } + Type::Insn => Self::Insn(Instruction::parse(p, stream)?), + Type::Comment => Self::Comment(Comment::parse(p, stream)?), + Type::Directive => Self::Directive(Directive::parse(p, stream)?), + Type::Identifier => Self::Label(Label::parse(p, stream)?), + Type::EndOfFile => { + stream.next(); + Self::EndOfFile + } + _ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"), + }, + ) + } +} +impl Display for Line { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Empty => writeln!(f, "\n"), + Self::Label(arg0) => Display::fmt(arg0, f), + Self::Insn(arg0) => Display::fmt(arg0, f), + Self::Directive(arg0) => Display::fmt(arg0, f), + Self::Comment(arg0) => Display::fmt(arg0, f), + Self::EndOfFile => write!(f, "; End of file."), + } + } +} diff --git a/src/parser/parsable.rs b/src/parser/parsable.rs index 9aaa434..3216ad2 100644 --- a/src/parser/parsable.rs +++ b/src/parser/parsable.rs @@ -4,7 +4,7 @@ use super::*; /// Parses tokens from [stream](TokenStream) into Self node pub trait Parsable { /// Parses tokens from [TokenStream](TokenStream) into Self nodes - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result where Self: Sized, T: TokenStream<'text>; @@ -12,19 +12,23 @@ pub trait Parsable { /// Attempts to parse tokens from [stream](TokenStream) into Self nodes. /// /// Masks failed expectations. - fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result, Error> + fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result, ParseError> where Self: Sized, T: TokenStream<'text>, { - match Self::parse(p, stream).map_err(|e| e.bare()) { - Ok(tt) => Ok(Some(tt)), - Err(Error::UnexpectedToken { .. }) | Err(Error::AllExpectationsFailed { .. }) => Ok(None), - Err(e) => Err(e.context(stream.context())), + match Self::parse(p, stream) { + Ok(some) => Ok(Some(some)), + Err(ParseError::LexError(_)) => Ok(None), + Err(e) => Err(e), } } - fn parse_and<'text, T, R>(p: &Parser, stream: &mut T, f: fn(p: &Parser, &mut T) -> R) -> Result<(Self, R), Error> + fn parse_and<'text, T, R>( + p: &Parser, + stream: &mut T, + f: fn(p: &Parser, &mut T) -> R, + ) -> Result<(Self, R), ParseError> where Self: Sized, T: TokenStream<'text>, @@ -43,3 +47,39 @@ pub trait Parsable { Self::parse(p, stream).unwrap_or_default() } } + +macro_rules! parsable_str_types { + ($($t:ty),*$(,)?) => {$( + impl Parsable for $t { + fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into()) + } + } + )*}; +} +use std::{path::PathBuf, rc::Rc}; +parsable_str_types![String, Rc, Box, PathBuf]; + +/// Vectors of arbitrary parsables are cool +impl Parsable for Vec

{ + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + // [dead beef] + // [A, B,] + // [c d e f] + // [ something + // else ] + + stream.require(Type::LBracket)?; + stream.allow(Type::Endl); + let mut out = vec![]; + while let Some(t) = P::try_parse(p, stream)? { + out.push(t); + stream.allow(Type::Separator); + stream.allow(Type::Endl); + } + stream.require(Type::RBracket)?; + Ok(out) + } +} diff --git a/src/parser/root.rs b/src/parser/root.rs new file mode 100644 index 0000000..62341f3 --- /dev/null +++ b/src/parser/root.rs @@ -0,0 +1,51 @@ +use std::path::{Path, PathBuf}; + +// © 2023 John Breaux +use super::*; + +/// Contains the entire AST +#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Root(Option, Vec<(usize, Line)>); +// pub struct Root { pub path: PathBuf, pub lines: Vec } + +impl Root { + pub fn file(&self) -> Option<&Path> { self.0.as_deref() } + pub(crate) fn set_file(mut self, path: PathBuf) -> Self { + self.0 = Some(path); + self + } + pub fn lines(&self) -> &[(usize, Line)] { &self.1 } +} + +impl Parsable for Root { + fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result + where T: TokenStream<'text> { + let mut lines = vec![]; + loop { + let number = stream.context().line(); + match Line::parse(p, stream)? { + Line::EndOfFile => break, + line => lines.push((number, line)), + } + } + Ok(Root(None, lines)) + } +} + +impl Display for Root { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (num, line) in &self.1 { + f.pad(&format!("{num:3}: {line} "))?; + } + Ok(()) + } +} + +impl Debug for Root { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for line in self.0.iter() { + Debug::fmt(line, f)?; + } + Ok(()) + } +} diff --git a/valid.asm b/valid.asm index 5dc88b6..c32ac7b 100755 --- a/valid.asm +++ b/valid.asm @@ -2,6 +2,16 @@ ; examples of valid assembly ; +; testing labels +jmp main + +; testing directives +.string "ABA" +.string "ABAB" +.word 0b0101101001011010 +.words [dead beef] + +main: ; testing defines .define asdfgh #1000 .define qwerty @sp+ @@ -132,7 +142,7 @@ mov #beef, sp mov #beef, sr mov #beef, cg -; jmp _register_mode ; TODO: msp430_asm currently has no support for jump labels. +jmp _register_mode jmp 3fe jmp -3fc ret