From fc8f8b962297aee529d6c27fad188916b0d359fd Mon Sep 17 00:00:00 2001 From: John Breaux Date: Tue, 30 Jan 2024 05:27:12 -0600 Subject: [PATCH 01/12] v0.3.0: Total overhaul - Everything has been rewritten - Modularity is improved somewhat - No dependency injection in preprocessor/parser, though - There are now early and late constant evaluation engines - This engine allows for by-value access to already-assembled code - Performs basic math operations, remainder, bitwise logic, bit shifts, negation, and bit inversion - Also allows for indexing into already-generated code using pointer-arithmetic syntax: `*(&main + 10)`. This is subject to change? It's clunky, and only allows word-aligned access. However, this rewrite is taking far too long, so I'll call the bikeshedding here. - Pretty sure this constant evaluation is computationally equivalent to Deadfish? --- .rustfmt.toml | 4 +- Cargo.toml | 33 +- LICENSE.md | 9 + examples/msp430-help/data.rs | 422 +++++----- examples/msp430-help/main.rs | 108 +-- msp430-asm/Cargo.toml | 14 + msp430-asm/src/lib.rs | 116 +++ .../msp430-asm => msp430-asm/src}/main.rs | 71 +- valid.asm => sample-asm/valid.asm | 158 ++-- src/assembler.rs | 574 +++++++++---- src/assembler/error.rs | 56 -- src/error.rs | 49 -- src/hash.rs | 22 - src/lexer.rs | 311 +++++-- src/lexer/context.rs | 38 - src/lexer/error.rs | 68 -- src/lexer/ignore.rs | 55 -- src/lexer/preprocessed.rs | 174 ---- src/lexer/tests.rs | 66 ++ src/lexer/token.rs | 790 +++++++++++------- src/lexer/token_stream.rs | 85 -- src/lib.rs | 62 +- src/parser.rs | 648 ++++++++++++-- src/parser/ast.rs | 679 +++++++++++++++ src/parser/comment.rs | 15 - src/parser/directive.rs | 90 -- src/parser/error.rs | 74 -- src/parser/identifier.rs | 26 - src/parser/instruction.rs | 52 -- src/parser/instruction/encoding.rs | 81 -- src/parser/instruction/encoding/builder.rs | 76 -- .../instruction/encoding/encoding_parser.rs | 37 - .../instruction/encoding/jump_target.rs | 58 -- src/parser/instruction/encoding/number.rs | 81 -- .../instruction/encoding/primary_operand.rs | 146 ---- src/parser/instruction/encoding/register.rs | 112 --- .../instruction/encoding/secondary_operand.rs | 105 --- src/parser/instruction/encoding/width.rs | 32 - src/parser/instruction/opcode.rs | 261 ------ src/parser/label.rs | 21 - src/parser/line.rs | 72 -- src/parser/parsable.rs | 85 -- src/parser/root.rs | 51 -- src/preprocessor.rs | 87 ++ 44 files changed, 3119 insertions(+), 3055 deletions(-) create mode 100644 LICENSE.md create mode 100644 msp430-asm/Cargo.toml create mode 100644 msp430-asm/src/lib.rs rename {examples/msp430-asm => msp430-asm/src}/main.rs (58%) rename valid.asm => sample-asm/valid.asm (64%) delete mode 100644 src/assembler/error.rs delete mode 100644 src/error.rs delete mode 100644 src/hash.rs delete mode 100644 src/lexer/context.rs delete mode 100644 src/lexer/error.rs delete mode 100644 src/lexer/ignore.rs delete mode 100644 src/lexer/preprocessed.rs create mode 100644 src/lexer/tests.rs delete mode 100644 src/lexer/token_stream.rs create mode 100644 src/parser/ast.rs delete mode 100644 src/parser/comment.rs delete mode 100644 src/parser/directive.rs delete mode 100644 src/parser/error.rs delete mode 100644 src/parser/identifier.rs delete mode 100644 src/parser/instruction.rs delete mode 100644 src/parser/instruction/encoding.rs delete mode 100644 src/parser/instruction/encoding/builder.rs delete mode 100644 src/parser/instruction/encoding/encoding_parser.rs delete mode 100644 src/parser/instruction/encoding/jump_target.rs delete mode 100644 src/parser/instruction/encoding/number.rs delete mode 100644 src/parser/instruction/encoding/primary_operand.rs delete mode 100644 src/parser/instruction/encoding/register.rs delete mode 100644 src/parser/instruction/encoding/secondary_operand.rs delete mode 100644 src/parser/instruction/encoding/width.rs delete mode 100644 src/parser/instruction/opcode.rs delete mode 100644 src/parser/label.rs delete mode 100644 src/parser/line.rs delete mode 100644 src/parser/parsable.rs delete mode 100644 src/parser/root.rs create mode 100644 src/preprocessor.rs diff --git a/.rustfmt.toml b/.rustfmt.toml index 6d174c4..276ed8a 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,12 +1,12 @@ unstable_features = true -max_width = 120 +max_width = 100 wrap_comments = true comment_width = 100 # Allow structs to fill an entire line use_small_heuristics = "Max" # Allow small functions on single line -fn_single_line = true +# fn_single_line = true # Alignment enum_discrim_align_threshold = 12 diff --git a/Cargo.toml b/Cargo.toml index c45bf58..954ceed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,23 +1,24 @@ -[package] -name = "msp430-asm" -version = "0.2.0" -edition = "2021" -rust-version = "1.70" +[workspace] +members = ["msp430-asm"] +# default-members = ["msp430-asm"] + +[workspace.package] authors = ["John Breaux "] +version = "0.3.0" +license = "MIT" +edition = "2021" publish = false -[features] -default = [] +[package] +name = "libmsp430" +authors.workspace = true +version.workspace = true +license.workspace = true +edition.workspace = true +publish.workspace = true -[[example]] -name = "msp430-asm" -path = "examples/msp430-asm/main.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -regex = "1.9.3" -# TODO: Remove dependency on regex - -[dev-dependencies] -anes = { version = "0.1.6" } -argp = { version = "0.3.0" } +# Provides very quick boolean tests for XID_START and XID_CONTINUE +unicode-ident = "1.0.12" diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..279fb64 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,9 @@ + The MIT License (MIT) + +Copyright © 2023-2024 John Breaux + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/examples/msp430-help/data.rs b/examples/msp430-help/data.rs index 26e2a96..3de6fdb 100644 --- a/examples/msp430-help/data.rs +++ b/examples/msp430-help/data.rs @@ -1,222 +1,224 @@ +//! TODO: rewrite for v0.3.0 use super::*; -/// Creates a [Parsable] implementation for an enum whose variants -/// are named after other [Parsable] items -macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) { - $( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* } - impl ::msp430_asm::parser::parsable::Parsable for $id { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - $(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )* - { Err(ParseError::UnrecognizedDirective("".into())) } - } - } - impl TryFrom<&str> for $id { - type Error = ParseError; - fn try_from(value: &str) -> Result { - Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed()) - } - } -} -make_parsable! { - #[derive(Debug)] - pub enum SyntaxFragment { - Opcode, - PrimaryOperand, - Number, - } -} +// /// Creates a [Parsable] implementation for an enum whose variants +// /// are named after other [Parsable] items +// macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) { +// $( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* } +// impl ::msp430_asm::parser::parsable::Parsable for $id { +// fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result +// where T: TokenStream<'text> { +// $(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )* +// { Err(ParseError::UnrecognizedDirective("".into())) } +// } +// } +// impl TryFrom<&str> for $id { +// type Error = ParseError; +// fn try_from(value: &str) -> Result { +// Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed()) +// } +// } +// } -impl SyntaxFragment { - pub fn info(&self) { - match self { - SyntaxFragment::Opcode(o) => Self::opcode_info(o), - SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o), - SyntaxFragment::Number(n) => println!("The number {n}"), - } - } - fn opcode_info(o: &Opcode) { - let (desc, as_rust) = usage(o); - println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o)); - footer!("https://mspgcc.sourceforge.net/manual/x223.html"); - } - // TODO: re-enable full instruction decoding - // fn encoding_info(e: &Encoding) { - // match e { - // Encoding::Single { dst, .. } => Self::operand_info(dst), - // Encoding::Jump { target } => println!("Jumps to (pc + {target})"), - // Encoding::Double { src, dst, .. } => { - // Self::operand_info(src); - // Self::operand_info(&dst.clone().into()) - // } - // } - // } - fn operand_info(o: &PrimaryOperand) { - match o { - PrimaryOperand::Direct(r) => Self::register_info(r), - PrimaryOperand::Indirect(r) => { - Self::register_info(r); - println!("Indirect addressing mode: use data pointed to by {r}"); - } - PrimaryOperand::PostInc(r) => { - Self::register_info(r); - println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}"); - } - PrimaryOperand::Indexed(r, n) => { - Self::register_info(r); - println!("Indexed mode: use the data at {r}[{n}]"); - } - PrimaryOperand::Relative(_) => return, - PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"), - PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"), - PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"), - PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"), - PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"), - PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"), - PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"), - PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"), - } - footer!("https://mspgcc.sourceforge.net/manual/x82.html"); - } - fn register_info(r: &Register) { - use Register as Re; - match r { - Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."), - Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."), - Re::sr => println!( - "sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC", - ), - Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."), - Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => { - println!("{r} is a callee-saved general purpose register.") - } - Re::r12 | Re::r13 | Re::r14 | Re::r15 => { - println!("{r} is a caller-saved general purpose register, allowed for return values.") - } - } - } -} +// make_parsable! { +// #[derive(Debug)] +// pub enum SyntaxFragment { +// Opcode, +// PrimaryOperand, +// Number, +// } +// } -// Gets parameter usage information from the opcode's EncodingParser -pub fn params(opcode: &Opcode) -> &'static str { - match opcode.resolve().1 { - EncodingParser::Jump { target: None } => " target (relative address or label)", - EncodingParser::Single { width: None, dst: None } => "[.b] dst", - EncodingParser::Single { dst: None, .. } => " dst", - EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst", - EncodingParser::Double { src: None, .. } => "[.b] src", - EncodingParser::Double { dst: None, .. } => "[.b] dst", - EncodingParser::Double { .. } => "[.b]", - EncodingParser::Reflexive { reg: None, .. } => "[.b] dst", - _ => "", - } -} +// impl SyntaxFragment { +// pub fn info(&self) { +// match self { +// SyntaxFragment::Opcode(o) => Self::opcode_info(o), +// SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o), +// SyntaxFragment::Number(n) => println!("The number {n}"), +// } +// } +// fn opcode_info(o: &Opcode) { +// let (desc, as_rust) = usage(o); +// println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o)); +// footer!("https://mspgcc.sourceforge.net/manual/x223.html"); +// } +// // TODO: re-enable full instruction decoding +// // fn encoding_info(e: &Encoding) { +// // match e { +// // Encoding::Single { dst, .. } => Self::operand_info(dst), +// // Encoding::Jump { target } => println!("Jumps to (pc + {target})"), +// // Encoding::Double { src, dst, .. } => { +// // Self::operand_info(src); +// // Self::operand_info(&dst.clone().into()) +// // } +// // } +// // } +// fn operand_info(o: &PrimaryOperand) { +// match o { +// PrimaryOperand::Direct(r) => Self::register_info(r), +// PrimaryOperand::Indirect(r) => { +// Self::register_info(r); +// println!("Indirect addressing mode: use data pointed to by {r}"); +// } +// PrimaryOperand::PostInc(r) => { +// Self::register_info(r); +// println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}"); +// } +// PrimaryOperand::Indexed(r, n) => { +// Self::register_info(r); +// println!("Indexed mode: use the data at {r}[{n}]"); +// } +// PrimaryOperand::Relative(_) => return, +// PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"), +// PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"), +// PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"), +// PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"), +// PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"), +// PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"), +// PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"), +// PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"), +// } +// footer!("https://mspgcc.sourceforge.net/manual/x82.html"); +// } +// fn register_info(r: &Register) { +// use Register as Re; +// match r { +// Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."), +// Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."), +// Re::sr => println!( +// "sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC", +// ), +// Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."), +// Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => { +// println!("{r} is a callee-saved general purpose register.") +// } +// Re::r12 | Re::r13 | Re::r14 | Re::r15 => { +// println!("{r} is a caller-saved general purpose register, allowed for return values.") +// } +// } +// } +// } -pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) { - match opcode { - // Single - Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"), - Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"), - Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"), - Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"), - Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"), - Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"), - Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"), - // Jump - Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"), - Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"), - Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"), - Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"), - Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"), - Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"), - Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"), - Opcode::Jmp => ("Jump unconditionally", "pc += target"), - // Double - Opcode::Mov => ("Copy src into dst", "dst = src"), - Opcode::Add => ("Add src to dst", "dst += src"), - Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"), - Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"), - Opcode::Sub => ("Subtract src from dst", "dst -= src"), - Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"), - Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"), - Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"), - Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"), - Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"), - Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"), - Opcode::And => ("Bitwise And src into dst", "dst &= src"), - // Emulated - Opcode::Nop => ("Does nothing", "{}"), - Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"), - Opcode::Br => ("Branches to the absolute address in src", "pc = src"), - Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"), - Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"), - Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"), - Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"), - Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"), - Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"), - Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"), - Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"), - Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"), - Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"), - Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"), - Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"), - Opcode::Clr => ("Sets dst to 0", "dst = 0"), - Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""), - Opcode::Dec => ("Decrements dst", "dst -= 1"), - Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"), - Opcode::Inc => ("Increments dst", "dst += 1"), - Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"), - Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"), - Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"), - Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"), - } -} +// // Gets parameter usage information from the opcode's EncodingParser +// pub fn params(opcode: &Opcode) -> &'static str { +// match opcode.resolve().1 { +// EncodingParser::Jump { target: None } => " target (relative address or label)", +// EncodingParser::Single { width: None, dst: None } => "[.b] dst", +// EncodingParser::Single { dst: None, .. } => " dst", +// EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst", +// EncodingParser::Double { src: None, .. } => "[.b] src", +// EncodingParser::Double { dst: None, .. } => "[.b] dst", +// EncodingParser::Double { .. } => "[.b]", +// EncodingParser::Reflexive { reg: None, .. } => "[.b] dst", +// _ => "", +// } +// } -const SINGLE: [Opcode; 7] = - [Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti]; +// pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) { +// match opcode { +// // Single +// Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"), +// Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"), +// Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"), +// Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"), +// Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"), +// Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"), +// Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"), +// // Jump +// Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"), +// Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"), +// Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"), +// Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"), +// Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"), +// Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"), +// Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"), +// Opcode::Jmp => ("Jump unconditionally", "pc += target"), +// // Double +// Opcode::Mov => ("Copy src into dst", "dst = src"), +// Opcode::Add => ("Add src to dst", "dst += src"), +// Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"), +// Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"), +// Opcode::Sub => ("Subtract src from dst", "dst -= src"), +// Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"), +// Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"), +// Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"), +// Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"), +// Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"), +// Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"), +// Opcode::And => ("Bitwise And src into dst", "dst &= src"), +// // Emulated +// Opcode::Nop => ("Does nothing", "{}"), +// Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"), +// Opcode::Br => ("Branches to the absolute address in src", "pc = src"), +// Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"), +// Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"), +// Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"), +// Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"), +// Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"), +// Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"), +// Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"), +// Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"), +// Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"), +// Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"), +// Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"), +// Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"), +// Opcode::Clr => ("Sets dst to 0", "dst = 0"), +// Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""), +// Opcode::Dec => ("Decrements dst", "dst -= 1"), +// Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"), +// Opcode::Inc => ("Increments dst", "dst += 1"), +// Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"), +// Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"), +// Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"), +// Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"), +// } +// } -const JUMP: [Opcode; 8] = - [Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp]; +// const SINGLE: [Opcode; 7] = +// [Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti]; -#[rustfmt::skip] -const DOUBLE: [Opcode; 12] = [ - Opcode::Mov, Opcode::Add, Opcode::Addc, Opcode::Subc, Opcode::Sub, Opcode::Cmp, - Opcode::Dadd, Opcode::Bit, Opcode::Bic, Opcode::Bis, Opcode::Xor, Opcode::And, -]; -#[rustfmt::skip] -const SIMULATED: [Opcode; 24] = [ - Opcode::Nop, Opcode::Pop, Opcode::Br, Opcode::Ret, Opcode::Clrc, Opcode::Setc, - Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint, - Opcode::Rla, Opcode::Rlc, Opcode::Inv, Opcode::Clr, Opcode::Tst, Opcode::Dec, - Opcode::Decd, Opcode::Inc, Opcode::Incd, Opcode::Adc, Opcode::Dadc, Opcode::Sbc, -]; +// const JUMP: [Opcode; 8] = +// [Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp]; -pub fn list_opcodes() { - let mut stdout = std::io::stdout().lock(); - header!(stdout, "Single-operand instructions:"); - let _ = write_opcode_list(&mut stdout, &SINGLE); - header!(stdout, "Relative Jump instructions:"); - let _ = write_opcode_list(&mut stdout, &JUMP); - header!(stdout, "Double-operand instructions:"); - let _ = write_opcode_list(&mut stdout, &DOUBLE); - header!(stdout, "Simulated instructions:"); - let _ = write_opcode_list(&mut stdout, &SIMULATED); -} +// #[rustfmt::skip] +// const DOUBLE: [Opcode; 12] = [ +// Opcode::Mov, Opcode::Add, Opcode::Addc, Opcode::Subc, Opcode::Sub, Opcode::Cmp, +// Opcode::Dadd, Opcode::Bit, Opcode::Bic, Opcode::Bis, Opcode::Xor, Opcode::And, +// ]; +// #[rustfmt::skip] +// const SIMULATED: [Opcode; 24] = [ +// Opcode::Nop, Opcode::Pop, Opcode::Br, Opcode::Ret, Opcode::Clrc, Opcode::Setc, +// Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint, +// Opcode::Rla, Opcode::Rlc, Opcode::Inv, Opcode::Clr, Opcode::Tst, Opcode::Dec, +// Opcode::Decd, Opcode::Inc, Opcode::Incd, Opcode::Adc, Opcode::Dadc, Opcode::Sbc, +// ]; -fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> { - for (idx, opcode) in list.iter().enumerate() { - write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?; - } - if list.len() % 6 != 0 { - writeln!(f)?; - } - Ok(()) -} +// pub fn list_opcodes() { +// let mut stdout = std::io::stdout().lock(); +// header!(stdout, "Single-operand instructions:"); +// let _ = write_opcode_list(&mut stdout, &SINGLE); +// header!(stdout, "Relative Jump instructions:"); +// let _ = write_opcode_list(&mut stdout, &JUMP); +// header!(stdout, "Double-operand instructions:"); +// let _ = write_opcode_list(&mut stdout, &DOUBLE); +// header!(stdout, "Simulated instructions:"); +// let _ = write_opcode_list(&mut stdout, &SIMULATED); +// } -macro header ($f:ident, $($x: expr),+) { - {write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();} -} -macro footer ($($x: expr),+) { - {print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);} -} +// fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> { +// for (idx, opcode) in list.iter().enumerate() { +// write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?; +// } +// if list.len() % 6 != 0 { +// writeln!(f)?; +// } +// Ok(()) +// } + +// macro header ($f:ident, $($x: expr),+) { +// {write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();} +// } +// macro footer ($($x: expr),+) { +// {print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);} +// } diff --git a/examples/msp430-help/main.rs b/examples/msp430-help/main.rs index bf9271e..02b60c8 100644 --- a/examples/msp430-help/main.rs +++ b/examples/msp430-help/main.rs @@ -3,59 +3,63 @@ // https://mspgcc.sourceforge.net/manual/ln16.html #![feature(decl_macro)] -use anes::{Color, ResetAttributes, SetForegroundColor}; -use msp430_asm::parser::preamble::*; -use msp430_asm::preamble::*; -use std::{ - error::Error, - io::{stdin, IsTerminal, Write}, -}; - -type AsmResult = Result>; - -mod data; - -fn main() -> AsmResult<()> { - if stdin().is_terminal() { - hello(); - } - repl() +fn main() { + println!("Hello, world!") } -fn hello() { - println!( - "{}{} v{} -This software contains instruction and register descriptions adapted from -the mspgcc project's fantastic documentation, which is licensed under the GPL. -https://mspgcc.sourceforge.net/manual/book1.html{}\n", - SetForegroundColor(Color::DarkGray), - env!("CARGO_BIN_NAME"), - env!("CARGO_PKG_VERSION"), - ResetAttributes - ); -} +// use anes::{Color, ResetAttributes, SetForegroundColor}; +// use msp430_asm::parser::preamble::*; +// use msp430_asm::preamble::*; +// use std::{ +// error::Error, +// io::{stdin, IsTerminal, Write}, +// }; -fn repl() -> AsmResult<()> { - printflush!("> "); - let mut line = String::new(); - while let Ok(len) = stdin().read_line(&mut line) { - match len { - 0 => break, // No newline (reached EOF) - 1 => (), // Line is empty - _ => { - if line.starts_with('?') || line.starts_with("help") { - data::list_opcodes() - } else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) { - sf.info(); - } - } - } - printflush!("> "); - line.clear(); - } - Ok(()) -} +// type AsmResult = Result>; -macro printflush ($($x: expr),+) { - {print!($($x),+); let _ = ::std::io::stdout().flush();} -} +// mod data; + +// fn main() -> AsmResult<()> { +// if stdin().is_terminal() { +// hello(); +// } +// repl() +// } + +// fn hello() { +// println!( +// "{}{} v{} +// This software contains instruction and register descriptions adapted from +// the mspgcc project's fantastic documentation, which is licensed under the GPL. +// https://mspgcc.sourceforge.net/manual/book1.html{}\n", +// SetForegroundColor(Color::DarkGray), +// env!("CARGO_BIN_NAME"), +// env!("CARGO_PKG_VERSION"), +// ResetAttributes +// ); +// } + +// fn repl() -> AsmResult<()> { +// printflush!("> "); +// let mut line = String::new(); +// while let Ok(len) = stdin().read_line(&mut line) { +// match len { +// 0 => break, // No newline (reached EOF) +// 1 => (), // Line is empty +// _ => { +// if line.starts_with('?') || line.starts_with("help") { +// data::list_opcodes() +// } else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) { +// sf.info(); +// } +// } +// } +// printflush!("> "); +// line.clear(); +// } +// Ok(()) +// } + +// macro printflush ($($x: expr),+) { +// {print!($($x),+); let _ = ::std::io::stdout().flush();} +// } diff --git a/msp430-asm/Cargo.toml b/msp430-asm/Cargo.toml new file mode 100644 index 0000000..6192e9d --- /dev/null +++ b/msp430-asm/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "msp430-asm" +authors.workspace = true +version.workspace = true +license.workspace = true +edition.workspace = true +publish.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +libmsp430 = { path = ".." } +anes = { version = "0.2.0" } +argp = { version = "0.3.0" } diff --git a/msp430-asm/src/lib.rs b/msp430-asm/src/lib.rs new file mode 100644 index 0000000..715377c --- /dev/null +++ b/msp430-asm/src/lib.rs @@ -0,0 +1,116 @@ +//! Helper library for msp430-asm +#![feature(decl_macro)] +pub mod split_twice { + /// Slices a collection into a beginning, middle, and end, based on two unordered indices + pub trait SplitTwice<'t> { + type Slice; + type Idx; + /// Splits a collection into a beginning, middle, and end slice, + /// based on two unordered indices + /// + /// # Examples + /// ```rust + /// # use msp430_asm::split_twice::SplitTwice; + /// let string = "foo,bar,baz"; + /// let (foo, bar, baz) = string.split_twice(4, 8); + /// assert_eq!(foo, "foo,"); + /// assert_eq!(bar, "bar,"); + /// assert_eq!(baz, "baz"); + /// ``` + fn split_twice( + &'t self, + a: Self::Idx, + b: Self::Idx, + ) -> (Self::Slice, Self::Slice, Self::Slice); + } + + impl<'t, T: 't> SplitTwice<'t> for [T] { + type Slice = &'t [T]; + type Idx = usize; + fn split_twice( + &'t self, + a: Self::Idx, + b: Self::Idx, + ) -> (Self::Slice, Self::Slice, Self::Slice) { + let (a, b) = if a < b { (a, b) } else { (b, a) }; + let (mid, end) = + if b < self.len() { self.split_at(b) } else { (self, Default::default()) }; + let (start, mid) = + if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) }; + (start, mid, end) + } + } + + impl<'t> SplitTwice<'t> for str { + type Slice = &'t str; + type Idx = usize; + fn split_twice( + &'t self, + a: Self::Idx, + b: Self::Idx, + ) -> (Self::Slice, Self::Slice, Self::Slice) { + let (a, b) = if a < b { (a, b) } else { (b, a) }; + let (mid, end) = + if b < self.len() { self.split_at(b) } else { (self, Default::default()) }; + let (start, mid) = + if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) }; + (start, mid, end) + } + } +} + +pub mod cursor { + use std::fmt::{Arguments, Display}; + + pub macro csi($($t:tt)*) {format_args!("\x1b[{}", format_args!($($t)*))} + + pub macro color($fg:expr, $($t:tt)*) { + Colorized::new(Some($fg), None, format_args!($($t)*)) + } + + #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum Color { + #[default] + Black = 30, + Red, + Green, + Yellow, + Blue, + Magenta, + Cyan, + Gray, + DarkGray = 90, + Pink, + Lime, + Sunflower, + SkyBlue, + HotPink, + Turquoise, + White, + } + + #[derive(Clone, Copy, Debug)] + pub struct Colorized<'args> { + fg: Option, + bg: Option, + args: Arguments<'args>, + } + + impl<'t> Colorized<'t> { + pub fn new(fg: Option, bg: Option, args: Arguments<'t>) -> Self { + Self { fg, bg, args } + } + } + impl<'t> Display for Colorized<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let &Self { fg, bg, args } = self; + if let Some(fg) = fg { + write!(f, "{}", csi!("{}m", fg as u8))?; + } + if let Some(bg) = bg { + write!(f, "{}", csi!("{}m", bg as u8 + 10))?; + } + write!(f, "{args}{}", csi!("0m")) + } + } +} diff --git a/examples/msp430-asm/main.rs b/msp430-asm/src/main.rs similarity index 58% rename from examples/msp430-asm/main.rs rename to msp430-asm/src/main.rs index 7cf8870..a40c4d0 100644 --- a/examples/msp430-asm/main.rs +++ b/msp430-asm/src/main.rs @@ -1,7 +1,15 @@ //! Simple frontend for the assembler #![feature(decl_macro)] use argp::parse_args_or_exit; -use msp430_asm::preamble::*; +use libmsp430::{ + assembler::Assemble, + parser::ast::{canonical::Canonicalize, *}, + parser::{error::Error as PError, Parser}, +}; +use msp430_asm::{ + cursor::{color, Color::*}, + split_twice::SplitTwice, +}; use std::{ error::Error, io::{stdin, IsTerminal, Read}, @@ -36,21 +44,18 @@ mod args { } mod repl { use super::*; - use anes::{Color, MoveCursorToPreviousLine, ResetAttributes, SetForegroundColor}; - use msp430_asm::{ - assembler::error::AssemblyError, error::Error as MspError, lexer::error::LexError, parser::error::ParseError, - }; + use anes::MoveCursorToPreviousLine; use std::io::{stderr, Write}; - macro color ($color: expr, $fmt: literal, $($str: expr),*) { - format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)* ::anes::ResetAttributes) - } + // macro color ($color: expr, $fmt: literal, $($str: expr),*) { + // format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)* + // ::anes::ResetAttributes) } macro linenr($n: expr) { format_args!("{:4}: ", $n) } - macro printflush ($($x: expr),+) { + macro printfl ($($x: expr),+) { {print!($($x),+); let _ = ::std::io::stdout().flush();} } @@ -62,13 +67,10 @@ mod repl { let mut line = String::new(); let mut linenr = 1; println!( - "{}{} v{}{}", - SetForegroundColor(Color::DarkGray), - env!("CARGO_BIN_NAME"), - env!("CARGO_PKG_VERSION"), - ResetAttributes + "{}", + color!(DarkGray, "{} v{}", env!("CARGO_BIN_NAME"), env!("CARGO_PKG_VERSION")) ); - printflush!("{}", linenr!(linenr)); + printfl!("{}", linenr!(linenr)); while let Ok(len) = stdin().read_line(&mut line) { match len { 0 => break, // No newline (reached EOF) @@ -76,8 +78,8 @@ mod repl { _ => (), } // Try to parse this line in isolation (this restricts preprocessing) - match Parser::default().parse(&line) { - Err(error) => errpp(&line, linenr, &error.into()), + match Parser::new(&line).parse::() { + Err(error) => errpp(&line, linenr, &error), Ok(_) => { okpp(&line, linenr); *buf += &line; @@ -85,36 +87,29 @@ mod repl { } } line.clear(); - printflush!("{}", linenr!(linenr)); + printfl!("{}", linenr!(linenr)); } - println!(); + println!("{}", color!(Gray, "[EOF]")); Ok(()) } fn okpp(line: &str, linenr: i32) { - println!("{}{}{}", move_cursor!(1, 5), color!(Color::Green, "{:4}", linenr!(linenr)), line.trim_end(),); + println!( + "{}{}{}", + move_cursor!(1, 5), + color!(Green, "{:4}", linenr!(linenr)), + line.trim_end(), + ); } /// Pretty-prints a line error - fn errpp(line: &str, linenr: i32, err: &msp430_asm::error::Error) { + fn errpp(line: &str, linenr: i32, err: &PError) { + let loc = err.loc; if stderr().is_terminal() { let line = line.trim_end(); - eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Color::Red, "{}", linenr!(linenr))); - match err { - // TODO: use a recursive enum to store all valid states - MspError::LexError(LexError::Contextual(c, e)) - | MspError::ParseError(ParseError::LexError(LexError::Contextual(c, e))) - | MspError::AssemblyError(AssemblyError::ParseError(ParseError::LexError(LexError::Contextual( - c, - e, - )))) => { - let (start, end) = line.split_at(c.position() - 1); - eprintln!("{start}{} ({e})", color!(Color::Red, "{}", end)); - } - _ => { - eprintln!("{} ({err})", color!(Color::Red, "{}", line)); - } - } + eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Red, "{}", linenr!(linenr))); + let (start, mid, end) = line.split_twice(loc.start, loc.end); + eprintln!("{start}{}{end} {}", color!(Red, "{}", mid), color!(DarkGray, "; {}", err)); } else { eprintln!("{} ({err})", line.trim()) } @@ -123,7 +118,7 @@ mod repl { // Parses and assembles a buffer, then prints it in hex to stdout fn asm(buf: &str) -> Result<(), Box> { - match Assembler::assemble(&Parser::default().parse(&buf)?) { + match Parser::new(buf).parse::()?.to_canonical().assemble() { Err(error) => println!("{error}"), Ok(out) => { for word in out { diff --git a/valid.asm b/sample-asm/valid.asm similarity index 64% rename from valid.asm rename to sample-asm/valid.asm index c32ac7b..5e29ec4 100755 --- a/valid.asm +++ b/sample-asm/valid.asm @@ -9,11 +9,11 @@ jmp main .string "ABA" .string "ABAB" .word 0b0101101001011010 -.words [dead beef] +.words [0xdead 0xbeef 0x0000] main: ; testing defines -.define asdfgh #1000 +.define asdfgh #0x1000 .define qwerty @sp+ br asdfgh mov qwerty, r15 @@ -88,73 +88,74 @@ mov @r13+, r14 mov @r14+, r15 .define special r6 -;mov @pc+, r15 ; This is how mov-immediate is encoded, and is not valid -;mov @sp+, r15 ; pop r15 -;mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] -;mov @cg+, r15 +; mov , r14 +; mov @pc+, r15 ; This is a mov-immediate, and may corrupt your output +mov @sp+, r15 ; pop r15 +mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] +mov @cg+, r15 indexed_mode: .define numbered r7 -mov.b 10(r0), r1 -mov 10(r1), r2 +mov.b 0x10(r0), r1 +mov 0x10(r1), r2 ;mov 10(r2), r3 ; Invalid: cannot index relative to sr ;mov 10(r3), r4 ; Invalid: cannot index relative to cg -mov 10(r4), r5 -mov 10(r5), r6 -mov 10(r6), r7 -mov 10(r7), r8 -mov 10(r8), r9 -mov 10(r9), r10 -mov 10(r10), r11 -mov 10(r11), r12 -mov 10(r12), r13 -mov 10(r13), r14 -mov 10(r14), r15 +mov 0x10(r4), r5 +mov 0x10(r5), r6 +mov 0x10(r6), r7 +mov 0x10(r7), r8 +mov 0x10(r8), r9 +mov 0x10(r9), r10 +mov 0x10(r10), r11 +mov 0x10(r11), r12 +mov 0x10(r12), r13 +mov 0x10(r13), r14 +mov 0x10(r14), r15 .define special r8 -mov 10(pc), r15 -mov 10(sp), r15 +mov 0x10(pc), r15 +mov 0x10(sp), r15 ;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] ;mov 10(cg), r15 _immediate_mode: .define numbered r9 -mov #beef, r0 -mov #beef, r1 -mov #beef, r2 -mov #beef, r3 -mov #beef, r4 -mov #beef, r5 -mov #beef, r6 -mov #beef, r7 -mov #beef, r8 -mov #beef, r9 -mov #beef, r10 -mov #beef, r11 -mov #beef, r12 -mov #beef, r13 -mov #beef, r14 -mov #beef, r15 +mov #0xbeef, r0 +mov #0xbeef, r1 +mov #0xbeef, r2 +mov #0xbeef, r3 +mov #0xbeef, r4 +mov #0xbeef, r5 +mov #0xbeef, r6 +mov #0xbeef, r7 +mov #0xbeef, r8 +mov #0xbeef, r9 +mov #0xbeef, r10 +mov #0xbeef, r11 +mov #0xbeef, r12 +mov #0xbeef, r13 +mov #0xbeef, r14 +mov #0xbeef, r15 .define special r10 -mov #beef, pc -mov #beef, sp -mov #beef, sr -mov #beef, cg +mov #0xbeef, pc +mov #0xbeef, sp +mov #0xbeef, sr +mov #0xbeef, cg jmp _register_mode -jmp 3fe -jmp -3fc +jmp 0x3fe +jmp -0x3fc ret ; Funky encodings mov r6, r4 mov @r6, r4 mov @r6+, r4 -mov 0(r6), r4 -mov 4141(r6), r4 +mov 0x0(r6), r4 +mov 0x4141(r6), r4 mov #-1, r4 -mov #ffff, r4 +mov #0xffff, r4 mov #0, r4 mov #1, r4 mov #2, r4 @@ -164,33 +165,33 @@ mov r6, 0(r4) mov @r6, 0(r4) mov @r6+, 0(r4) mov 0(r6), 0(r4) -mov 4141(r6), 0(r4) +mov 0x4141(r6), 0(r4) mov #-1, 0(r4) -mov #ffff, 0(r4) +mov #0xffff, 0(r4) mov #0, 0(r4) mov #1, 0(r4) mov #2, 0(r4) mov #4, 0(r4) mov #8, 0(r4) -mov r6, 4141(r4) -mov @r6, 4141(r4) -mov @r6+, 4141(r4) -mov 0(r6), 4141(r4) -mov 4141(r6), 4141(r4) -mov #-1, 4141(r4) -mov #ffff, 4141(r4) -mov #0, 4141(r4) -mov #1, 4141(r4) -mov #2, 4141(r4) -mov #4, 4141(r4) -mov #8, 4141(r4) +mov r6, 0x4141(r4) +mov @r6, 0x4141(r4) +mov @r6+, 0x4141(r4) +mov 0(r6), 0x4141(r4) +mov 0x4141(r6), 0x4141(r4) +mov #-1, 0x4141(r4) +mov #0xffff, 0x4141(r4) +mov #0, 0x4141(r4) +mov #1, 0x4141(r4) +mov #2, 0x4141(r4) +mov #4, 0x4141(r4) +mov #8, 0x4141(r4) mov r6, #0 mov @r6, #0 mov @r6+, #0 mov 0(r6), #0 -mov 4141(r6), #0 +mov 0x4141(r6), #0 mov #-1, #0 -mov #ffff, #0 +mov #0xffff, #0 mov #0, #0 mov #1, #0 mov #2, #0 @@ -200,9 +201,9 @@ mov r6, #1 mov @r6, #1 mov @r6+, #1 mov 0(r6), #1 -mov 4141(r6), #1 +mov 0x4141(r6), #1 mov #-1, #1 -mov #ffff, #1 +mov #0xffff, #1 mov #0, #1 mov #1, #1 mov #2, #1 @@ -211,14 +212,14 @@ mov #8, #1 ; Instruction exercise ; Jumps -jne 10 -jeq 10 -jlo 10 -jhs 10 -jn 10 -jge 10 -jl 10 -jmp 10 +jne 0x10 +jeq 0x10 +jlo 0x10 +jhs 0x10 +jn 0x10 +jge 0x10 +jl 0x10 +jmp 0x10 ; Two-ops mov r14, r15 @@ -232,7 +233,7 @@ bit r14, r15 bic r14, r15 bis r14, r15 xor r14, r15 -and r14, 10(r15) +and r14, 0x10(r15) ; One-ops rrc r15 @@ -241,13 +242,14 @@ rra r15 sxt r15 push r15 call r15 -reti r15 +; reti is special +reti ; Jump aliases -jnc 10 -jnz 10 -jc 10 -jz 10 +jnc 0x10 +jnz 0x10 +jc 0x10 +jz 0x10 ; "emulated" no-op instructions ret diff --git a/src/assembler.rs b/src/assembler.rs index e9273b9..22aef2f 100644 --- a/src/assembler.rs +++ b/src/assembler.rs @@ -1,197 +1,423 @@ -// © 2023 John Breaux -//! Traverses an AST, assembling instructions. -//! -//! [Assembler] carries *some* state +//! Assembles a binary using the given [AST](crate::parser::ast) -use crate::parser::preamble::*; -use error::AssemblyError; +use error::{AResult, ErrorKind::*}; use std::collections::HashMap; -use std::path::Path; -pub mod error; +use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, util::Span}; -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum IdentType { - Word, - Jump, -} +use self::error::{Error, ErrorKind}; -/// Takes in an AST's [Root], and outputs a sequence of bytes +/// Assembles a binary using the given [Assemble]-able item #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub struct Assembler { - out: Vec, - /// A map from Labels' [Identifier]s to their location in the binary - labels: HashMap, - /// A list of all referenced [Identifier]s in the binary, and their locations - identifiers: Vec<(usize, Identifier, IdentType)>, +pub struct Assembler<'t> { + /// The assembled output + output: Vec, + /// Table of labels, for backpatching + labels: HashMap<&'t str, usize>, + /// Backpatch table for jump instructions + jump_queue: Vec<(usize, &'t str)>, + /// Backpatch table for immediate values + expr_queue: Vec<(usize, Expr<'t>)>, + /// Base address from .org directives + org_base: usize, + /// Last seen index in input + loc: Span, } -impl Assembler { - pub fn assemble(r: &Root) -> Result, AssemblyError> { - let mut out = Self::default(); - out.visit_root(r)?; - Ok(out.out) +impl<'t> Assembler<'t> { + pub fn new() -> Self { + Default::default() } - pub fn load(&mut self, r: &Root) -> Result<(), AssemblyError> { self.visit_root(r) } - pub fn out(self) -> Vec { self.out } - - fn last_mut(&mut self) -> Result<&mut u16, AssemblyError> { self.out.last_mut().ok_or(AssemblyError::EmptyBuffer) } - fn push_default(&mut self) -> usize { - self.out.push(Default::default()); - self.out.len() - 1 + pub fn assemble>(&mut self, t: &T) -> AResult<&mut Self> { + t.assemble_in(self) } -} - -impl Assembler { - /// Visits the [Root] node of a parse tree - fn visit_root(&mut self, r: &Root) -> Result<(), AssemblyError> { - // Visit the entire tree - for (num, line) in r.lines() { - self.visit_line(line).map_err(|e| e.ctx(r.file().unwrap_or(Path::new("stdin")), *num))?; + /// Gets the address of a label + pub fn addrof(&self, name: &str) -> Option { + self.labels.get(name).map(|v| *v as u16) + } + /// Gets the value at a label + pub fn valueof(&self, name: &str) -> Option { + self.output.get(self.addrof(name)? as usize).copied() + } + fn push(&mut self, word: u16) { + self.output.push(word) + } + fn error(&self, kind: ErrorKind) -> Error { + Error { span: self.loc, kind } + } + /// Backpatches everything, and yoinks the output buffer. + pub fn out(&mut self) -> AResult> { + // Resolve jumps + for (idx, key) in &self.jump_queue { + // eprintln!("Patching jump at {idx} with key {key}"); + match self.labels.get(key).map(|addr| addr.wrapping_sub(*idx as _) as i16) { + None => Err(self.error(UndefinedLabel(key.to_string())))?, + Some(value @ -0x3ff..=0x3fc) => self.output[*idx] |= (value - 1) as u16 & 0x3ff, + Some(value) => Err(self.error(LongJump(value)))?, + } } - // Link identifiers - for (idx, id, id_type) in self.identifiers.iter() { - let Some(&num) = self.labels.get(id) else { return Err(AssemblyError::UnresolvedIdentifier(id.clone())) }; - let offset = (num as isize - *idx as isize) * 2; - *self.out.get_mut(*idx).expect("idx should be a valid index into out") |= match id_type { - IdentType::Word => offset as u16, - IdentType::Jump => JumpTarget::squish(offset)?, - }; + // Resolve immediates through late expression evaluation. + for (idx, expr) in &self.expr_queue { + // eprintln!("Patching immediate at {idx} with expression {expr:?}"); + self.output[*idx] = self.eval(expr)?; + } + let out = std::mem::take(&mut self.output); + *self = Default::default(); + Ok(out) + } + + pub fn add_label(&mut self, label: &'t str) -> AResult<()> { + if *self.labels.entry(label).or_insert(self.output.len()) != self.output.len() { + Err(self.error(RedefinedLabel(label.into())))? } Ok(()) } - /// visit a [Line] - fn visit_line(&mut self, line: &Line) -> Result<(), AssemblyError> { - match line { - Line::Insn(insn) => self.visit_instruction(insn), - Line::Label(label) => self.visit_label(label), - Line::Directive(d) => self.visit_directive(d), - _ => Ok(()), + /// Appends an expr as an extword, deferring its calculation for later + pub fn defer_expr(&mut self, e: Expr<'t>) { + self.expr_queue.push((self.output.len(), e)); + self.push(0); + } + /// Defers resolution of a jump label until output time + /// The jump label will be later resolved to the NEXT word. + pub fn defer_jump(&mut self, label: &'t str) { + self.jump_queue.push((self.output.len(), label)) + } +} + +pub trait Assemble<'t> { + fn assemble(&self) -> AResult> { + self.assemble_in(&mut Default::default())?.out() + } + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>>; +} + +impl<'t> Assemble<'t> for Statements<'t> { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + for stmt in &self.stmts { + stmt.assemble_in(a)?; + } + Ok(a) + } +} +impl<'t> Assemble<'t> for Statement<'t> { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + match self { + Statement::Label(label) => a.add_label(label).map(|_| a), + Statement::Insn(i) => i.assemble_in(a), + Statement::Directive(d) => d.assemble_in(a), + Statement::Comment(_) => Ok(a), } } - - /// Visits a [Directive] - fn visit_directive(&mut self, node: &Directive) -> Result<(), AssemblyError> { - match node { - Directive::Org(_) => todo!("{node}"), - Directive::Define(..) => (), - Directive::Include(r) => self.visit_root(r)?, - Directive::Byte(word) | Directive::Word(word) => self.out.push((*word).into()), - Directive::Bytes(words) | Directive::Words(words) => { - for word in words { - self.out.push((*word).into()); +} +impl<'t> Assemble<'t> for Directive<'t> { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + match self { + Directive::Define(_) => {} + Directive::Org(base) => a.org_base = a.eval(base)? as usize, + Directive::Word(expr) => a.defer_expr(*expr.clone()), + Directive::Words(exprs) => { + for expr in exprs { + a.defer_expr(expr.clone()) } } - Directive::String(s) => self.visit_string(s)?, - Directive::Strings(strs) => { - for s in strs { - self.visit_string(s)?; - } - } - }; - Ok(()) - } - - /// Visits a [Label] - fn visit_label(&mut self, node: &Label) -> Result<(), AssemblyError> { - // Register the label - match self.labels.insert(node.0.to_owned(), self.out.len()) { - Some(_) => Err(AssemblyError::RedefinedLabel(node.0.to_owned())), - _ => Ok(()), - } - } - - /// Visits an [Instruction] - fn visit_instruction(&mut self, insn: &Instruction) -> Result<(), AssemblyError> { - self.push_default(); - self.visit_opcode(insn.opcode())?; - self.visit_encoding(insn.encoding())?; - Ok(()) - } - - /// Visits an [Opcode] - fn visit_opcode(&mut self, node: &Opcode) -> Result<(), AssemblyError> { - *self.last_mut()? |= *node as u16; - Ok(()) - } - - /// Visits an [Encoding] - fn visit_encoding(&mut self, node: &Encoding) -> Result<(), AssemblyError> { - *self.last_mut()? |= node.word(); - match node { - Encoding::Single { dst, .. } => { - self.visit_primary_operand(dst)?; - } - Encoding::Jump { target } => { - self.visit_jump_target(target)?; - } - Encoding::Double { src, dst, .. } => { - self.visit_primary_operand(src)?; - self.visit_secondary_operand(dst)?; + Directive::String(str) => { + str.assemble_in(a)?; + } + } + Ok(a) + } +} + +impl<'t> Assemble<'t> for &'t str { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + for chunk in self.as_bytes().chunks(2) { + match chunk.len() { + 0 => a.push(0), + 1 => { + a.push(chunk[0] as u16); + return Ok(a); + } + 2 => a.push((chunk[1] as u16) << 8 | chunk[0] as u16), + n => unreachable!("expected chunks of length 2, got length {n}"), + } + } + a.push(0); + Ok(a) + } +} + +impl<'t> Assemble<'t> for Instruction<'t> { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + let Self { span, kind } = self; + a.loc = *span; + kind.assemble_in(a) + } +} +impl<'t> Assemble<'t> for InstructionKind<'t> { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + match self { + InstructionKind::NoEm(v) => v.assemble_in(a), + InstructionKind::OneEm(v) => v.assemble_in(a), + InstructionKind::OneArg(v) => v.assemble_in(a), + InstructionKind::TwoArg(v) => v.assemble_in(a), + InstructionKind::Jump(v) => v.assemble_in(a), + InstructionKind::Reti(v) => v.assemble_in(a), + InstructionKind::Br(v) => v.assemble_in(a), + } + } +} +impl<'t> Assemble<'t> for NoEm { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + eprintln!( + "Warning: directly assembling a noncanonical instruction may lead to unwanted overhead" + ); + self.clone().to_canonical().assemble_in(a) + } +} +impl<'t> Assemble<'t> for OneEm<'t> { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + eprintln!( + "Warning: directly assembling a noncanonical instruction may lead to unwanted overhead" + ); + self.clone().to_canonical().assemble_in(a) + } +} +impl<'t> Assemble<'t> for OneArg<'t> { + /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] + /// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ] + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + let Self { opcode, width, src } = self; + let (src_reg, src_mode, src_ext) = source(src); + a.push( + 0b000100 << 10 | one_arg(*opcode) << 7 | (*width as u16) << 6 | src_mode << 4 | src_reg, + ); + if let Some(expr) = src_ext { + a.defer_expr(expr) + } + Ok(a) + } +} +impl<'t> Assemble<'t> for TwoArg<'t> { + /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] + /// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ] + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + let Self { opcode, width, src, dst } = self; + let (src_reg, src_mode, src_ext) = source(src); + let (dst_reg, dst_mode, dst_ext) = destination(dst); + a.push( + two_arg(*opcode) << 12 + | src_reg << 8 + | dst_mode << 7 + | (*width as u16) << 6 + | src_mode << 4 + | dst_reg, + ); + + if let Some(expr) = src_ext { + a.defer_expr(expr) + } + if let Some(expr) = dst_ext { + a.defer_expr(expr) + } + Ok(a) + } +} +impl<'t> Assemble<'t> for Jump<'t> { + /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] + /// [ 0 0 1 [cond:3] +- [word_offset:10 ] ] + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + let Self { opcode, dst } = self; + let word = 1 << 13 + | jump(*opcode) << 10 + | match *dst { + JumpDst::Rel(value) if value & 1 == 1 => return Err(a.error(OddJump(value))), + JumpDst::Rel(value) if !(-0x3fe..=0x400).contains(&value) => { + return Err(a.error(LongJump(value))) + } + JumpDst::Rel(value) => (value - 1) as u16 >> 1 & 0x3ff, + JumpDst::Label(label) => { + a.defer_jump(label); + 0 + } + } & 0x3ff; + a.push(word); + Ok(a) + } +} +impl<'t> Assemble<'t> for Reti { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + a.output.push(0b0001_0011_0000_0000); + Ok(a) + } +} +impl<'t> Assemble<'t> for Br<'t> { + fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { + eprintln!( + "Warning: directly assembling a noncanonical instruction may lead to unwanted overhead" + ); + self.clone().to_canonical().assemble_in(a) + } +} + +pub fn one_arg(opcode: token::OneArg) -> u16 { + opcode as u16 +} + +pub fn two_arg(opcode: token::TwoArg) -> u16 { + opcode as u16 + 4 +} + +pub fn jump(opcode: token::Jump) -> u16 { + use token::Jump; + match opcode { + Jump::Jne | Jump::Jnz => 0, + Jump::Jeq | Jump::Jz => 1, + Jump::Jnc | Jump::Jlo => 2, + Jump::Jc | Jump::Jhs => 3, + Jump::Jn => 4, + Jump::Jge => 5, + Jump::Jl => 6, + Jump::Jmp => 7, + } +} + +/// Returns a tuple of (Reg, AddrMode, extword) +pub fn source<'t>(src: &Src<'t>) -> (u16, u16, Option>) { + use SrcSpecial::*; + match src { + Src::Special(Four) => (2, 2, None), + Src::Special(Eight) => (2, 3, None), + Src::Special(Zero) => (3, 0, None), + Src::Special(One) => (3, 1, None), + Src::Special(Two) => (3, 2, None), + Src::Special(NegOne) => (3, 3, None), + Src::Immediate(e) => (0, 3, Some(*e.clone())), + Src::Absolute(e) => (2, 1, Some(*e.clone())), + Src::Direct(r) => (*r as u16, 0, None), + Src::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())), + Src::Indirect(r) => (*r as u16, 2, None), + Src::PostInc(r) => (*r as u16, 3, None), + Src::BareExpr(e) => (0, 3, Some(*e.clone())), + } +} +/// Returns a tuple of (Reg, AddrMode, Extword) +pub fn destination<'t>(dst: &Dst<'t>) -> (u16, u16, Option>) { + use DstSpecial::*; + match dst { + Dst::Special(Zero) => (3, 0, None), + Dst::Special(One) => (3, 1, None), + Dst::Absolute(e) => (2, 1, Some(*e.clone())), + Dst::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())), + Dst::Direct(r) => (*r as u16, 0, None), + } +} + +impl<'t> Assembler<'t> { + /// Evaluates an [Expr] using labels and constants defined in the current program + fn eval(&self, expr: &Expr) -> AResult { + match expr { + Expr::Binary(head, tails) => { + let mut head = self.eval(head)?; + for (op, tail) in tails { + let tail = self.eval(tail)?; + head = match op { + BinOp::Mul => head.wrapping_mul(tail), + BinOp::Div => head.wrapping_div(tail), + BinOp::Rem => head.wrapping_rem(tail), + BinOp::Add => head.wrapping_add(tail), + BinOp::Sub => head.wrapping_sub(tail), + BinOp::Lsh => head.wrapping_shl(tail as u32), + BinOp::Rsh => head.wrapping_shr(tail as u32), + BinOp::And => head & tail, + BinOp::Xor => head ^ tail, + BinOp::Or => head | tail, + }; + } + Ok(head) + } + Expr::Unary(ops, tail) => { + let mut tail = self.eval(tail)?; + for op in ops { + tail = match op { + UnOp::Not => !tail, + UnOp::Neg => 0u16.wrapping_sub(tail), + UnOp::Deref => *self + .output + .get(tail.wrapping_sub(self.org_base as u16) as usize >> 1) + .ok_or_else(|| self.error(OobRead(tail)))?, + } + } + Ok(tail) + } + Expr::Group(e) => self.eval(e), + Expr::Number(n) => Ok(*n), + Expr::Ident(name) => { + self.valueof(name).ok_or_else(|| self.error(UndefinedLabel(name.to_string()))) + } + Expr::AddrOf(name) => self + .addrof(name) + .map(|p| (p << 1).wrapping_add(self.org_base as u16)) + .ok_or_else(|| self.error(UndefinedLabel(name.to_string()))), + } + } +} + +pub mod error { + use std::fmt::Display; + + use crate::util::Span; + + pub type AResult = Result; + + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct Error { + pub span: Span, + pub kind: ErrorKind, + } + impl std::error::Error for Error {} + + #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum ErrorKind { + #[default] + Todo, + /// A label was used, but not defined + UndefinedLabel(String), + RedefinedLabel(String), + OobRead(u16), + OddJump(i16), + LongJump(i16), + /// A plethora of [Error]s + Errors(Vec), + } + impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { kind, span } = self; + write!(f, "[{span}]: ")?; + write!(f, "Error: {kind}") + } + } + impl Display for ErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ErrorKind::Todo => write!(f, "Not yet implemented"), + ErrorKind::UndefinedLabel(label) => write!(f, "Label '{label}' not defined"), + ErrorKind::RedefinedLabel(label) => write!(f, "Label '{label}' already defined"), + ErrorKind::OobRead(addr) => { + write!(f, "Out of bounds read in constant expression: {addr}") + } + ErrorKind::OddJump(to) => write!(f, "Cannot jump to odd location: {to}"), + ErrorKind::LongJump(to) => { + write!(f, "Jump target ({to}) outside of range -0x400..=0x3fe") + } + ErrorKind::Errors(errors) => { + writeln!(f, "Could not complete assembly:")?; + for error in errors { + writeln!(f, "{error}")?; + } + Ok(()) + } } } - Ok(()) - } - - /// Visits a [JumpTarget] - fn visit_jump_target(&mut self, node: &JumpTarget) -> Result<(), AssemblyError> { - match node { - JumpTarget::Number(num) => self.visit_number(num), - JumpTarget::Identifier(id) => { - self.visit_identifier(id, self.out.len() - 1, IdentType::Jump)?; - Ok(()) - } - } - } - - /// Visits a [SecondaryOperand] - fn visit_secondary_operand(&mut self, node: &SecondaryOperand) -> Result<(), AssemblyError> { - use SecondaryOperand as O; - if let O::Indexed(_, num) | O::Absolute(num) = node { - self.push_default(); - self.visit_number(num)?; - } - Ok(()) - } - - /// Visits a [PrimaryOperand] - fn visit_primary_operand(&mut self, node: &PrimaryOperand) -> Result<(), AssemblyError> { - use PrimaryOperand as O; - match node { - O::Indexed(_, num) | O::Absolute(num) | O::Immediate(num) => { - self.push_default(); - self.visit_number(num)?; - } - O::Relative(id) => { - let addr = self.push_default(); - self.visit_identifier(id, addr, IdentType::Word)?; - } - _ => (), - } - Ok(()) - } - - /// Visits a number and writes it into the last index - fn visit_number(&mut self, node: &Number) -> Result<(), AssemblyError> { - *self.last_mut()? |= u16::from(*node); - Ok(()) - } - - /// Visits a number and appends it to the output buffer - fn visit_string(&mut self, node: &str) -> Result<(), AssemblyError> { - for (idx, byte) in node.bytes().chain([0u8].into_iter()).enumerate() { - if idx % 2 == 0 { - self.push_default(); - } - *self.last_mut()? |= (byte as u16) << (8 * (idx % 2)); - } - Ok(()) - } - - /// Visits an [Identifier], and registers it to the identifier list - fn visit_identifier(&mut self, node: &Identifier, addr: usize, ty: IdentType) -> Result<(), AssemblyError> { - self.identifiers.push((addr, node.clone(), ty)); - Ok(()) } } diff --git a/src/assembler/error.rs b/src/assembler/error.rs deleted file mode 100644 index 08c4a34..0000000 --- a/src/assembler/error.rs +++ /dev/null @@ -1,56 +0,0 @@ -// © 2023 John Breauxs -use crate::parser::{error::ParseError, preamble::*}; -use std::{ - fmt::Display, - path::{Path, PathBuf}, -}; - -#[derive(Debug)] -pub enum AssemblyError { - UnresolvedIdentifier(Identifier), - RedefinedLabel(Identifier), - JumpedTooFar(Identifier, isize), - ParseError(ParseError), - // TODO: This, better' - Context(Box, PathBuf, usize), - EmptyBuffer, -} - -impl AssemblyError { - pub(super) fn ctx + ?Sized>(self, file: &P, line: usize) -> Self { - Self::Context(self.into(), file.as_ref().into(), line) - } -} - -impl From for AssemblyError { - fn from(value: ParseError) -> Self { Self::ParseError(value) } -} - -impl Display for AssemblyError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::UnresolvedIdentifier(id) => { - write!(f, "Identifier {id} is undefined, but referenced anyway.") - } - Self::RedefinedLabel(id) => { - write!(f, "Redefined label '{id}'.") - } - Self::JumpedTooFar(id, num) => { - write!(f, "Label '{id}' is too far away. ({num} is outside range -0x400..=0x3fe)") - } - Self::ParseError(e) => Display::fmt(e, f), - Self::Context(e, file, line) => write!(f, "{}:{line}:\n\t{e}", file.display()), - Self::EmptyBuffer => Display::fmt("Tried to get last element of output buffer, but buffer was empty", f), - } - } -} - -impl std::error::Error for AssemblyError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - Self::ParseError(e) => Some(e), - Self::Context(e, ..) => Some(e), - _ => None, - } - } -} diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index c6b46ee..0000000 --- a/src/error.rs +++ /dev/null @@ -1,49 +0,0 @@ -// © 2023 John Breauxs -//! Common error type for [msp430-asm](crate) errors - -use super::*; -use std::fmt::Display; - -#[derive(Debug)] -pub enum Error { - /// Produced by [lexer] - LexError(lexer::error::LexError), - /// Produced by [parser] - ParseError(parser::error::ParseError), - /// Produced by [assembler] - AssemblyError(assembler::error::AssemblyError), -} - -impl Error {} - -impl From for Error { - fn from(value: lexer::error::LexError) -> Self { Self::LexError(value) } -} - -impl From for Error { - fn from(value: parser::error::ParseError) -> Self { Self::ParseError(value) } -} - -impl From for Error { - fn from(value: assembler::error::AssemblyError) -> Self { Self::AssemblyError(value) } -} - -impl Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::LexError(e) => Display::fmt(e, f), - Error::ParseError(e) => Display::fmt(e, f), - Error::AssemblyError(e) => Display::fmt(e, f), - } - } -} - -impl std::error::Error for Error { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - Error::LexError(e) => Some(e), - Error::ParseError(e) => Some(e), - Error::AssemblyError(e) => Some(e), - } - } -} diff --git a/src/hash.rs b/src/hash.rs deleted file mode 100644 index 2c0dcdd..0000000 --- a/src/hash.rs +++ /dev/null @@ -1,22 +0,0 @@ -// © 2023 John Breaux -//! Convenience functions and traits for dealing with hashable data -pub type Hash = u64; - -/// Calculates a hash using Rust hashmap's default hasher. -pub fn hash(hashable: T) -> Hash { - use std::hash::Hasher; - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - hashable.hash(&mut hasher); - hasher.finish() -} - -pub trait FromHash: From { - /// Hashes anything that implements [type@Hash] using the - /// [DefaultHasher](std::collections::hash_map::DefaultHasher) - fn hash(hashable: T) -> Hash { hash(hashable) } - fn from_hash(hashable: T) -> Self - where Self: Sized { - Self::from(Self::hash(hashable)) - } -} -impl> FromHash for T {} diff --git a/src/lexer.rs b/src/lexer.rs index 318b833..992d785 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,69 +1,268 @@ -// © 2023 John Breaux -//! Iterates over [`&str`](str), producing [`Token`s](Token) +// © 2023-2024 John Breaux +//! The [Lexer] turns a [sequence of characters](str) into a stream of +//! [lexically-tagged tokens](token) -pub mod context; -pub mod error; -pub mod ignore; -pub mod preprocessed; pub mod token; -pub mod token_stream; -use context::Context; -use error::LexError; -use token::{Token, Type}; -use token_stream::TokenStream; +use self::token::{Special, TokenKind, *}; +use crate::util::Span; +use std::{ + iter::Peekable, + str::{CharIndices, FromStr}, +}; +use unicode_ident::*; -/// Iterates over &[str], producing [Token]s -#[must_use = "iterators are lazy and do nothing unless consumed"] -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Tokenizer<'t> { +const DEFAULT_BASE: u32 = 10; + +/// Turns a [sequence of characters](str) into a stream of [lexically identified tokens](token). +/// +/// # Examples +/// ```rust +/// # use libmsp430::lexer::{Lexer, token::*}; +/// let text = "mov r14, r15"; +/// let mut lexer = Lexer::new(text); +/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::TwoArg(TwoArg::Mov)); +/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R14)); +/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Comma); +/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R15)); +/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Eof); +/// ``` +#[derive(Clone, Debug)] +pub struct Lexer<'t> { + /// Keeps track of the byte offset into the string + iter: Peekable>, text: &'t str, - idx: usize, - context: Context, + start: usize, + index: usize, } -impl<'t> Tokenizer<'t> { - /// Produces a new [Tokenizer] from a [str]ing slice - pub fn new(text: &'t T) -> Self - where T: AsRef + ?Sized { - Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() } +impl<'t> Lexer<'t> { + /// Creates a new [Lexer] over some [text](str) + pub fn new(text: &'t str) -> Self { + Self { iter: text.char_indices().peekable(), text, start: 0, index: 0 } } - - fn count(&mut self, token: &Token) { - // update the context - self.context.count(token); - // advance the index - self.idx += token.len(); + /// Gets the current byte-position + pub fn location(&self) -> usize { + self.start } -} - -impl<'text> Iterator for Tokenizer<'text> { - type Item = Token<'text>; - - fn next(&mut self) -> Option { - if self.idx >= self.text.len() { - return None; + /// Internal: Emits a token with the provided [TokenKind], providing its extents. + fn emit(&mut self, kind: TokenKind) -> Option> { + let out = + Some(Token::new(self.next_lexeme(), kind, Span { start: self.start, end: self.index })); + self.start = self.index; + out + } + fn next_lexeme(&self) -> &'t str { + &self.text[self.start..self.index] + } + fn repeat(&mut self, f: impl Fn(char) -> bool) -> &mut Self { + while let Some(&c) = self.peek() { + if !f(c) { + break; + } + self.next(); } - let token = Token::from(&self.text[self.idx..]); - // Process [Type::Directive]s - // Count the token - self.count(&token); - Some(token) + self + } + fn space(&mut self) -> Option<&mut Self> { + while self.peek()?.is_whitespace() && *self.peek()? != '\n' { + self.next(); + } + self.start = self.index; + Some(self) + } + /// Consumes a [char] without checking, for ergonomic chaining + fn then(&mut self) -> &mut Self { + self.next(); + self + } + fn peek(&mut self) -> Option<&char> { + self.iter.peek().map(|(_, c)| c) + } + fn next(&mut self) -> Option { + let (index, c) = self.iter.next()?; + self.index = index + c.len_utf8(); + Some(c) + } + + /// Scans for the next [Token] in the stream + pub fn scan(&mut self) -> Option> { + if self.space().is_none() { + return self.emit(TokenKind::Eof); + } + let Some(c) = self.peek() else { + return self.emit(TokenKind::Eof); + }; + match c { + '\n' => self.then().emit(TokenKind::Newline), + '!' => self.then().emit(TokenKind::Bang), + '#' => self.then().emit(TokenKind::Hash), + '$' => self.then().emit(TokenKind::Dollar), + '%' => self.then().emit(TokenKind::Percent), + '&' => self.then().emit(TokenKind::Amp), + '\'' => self.then().char(), + '"' => self.then().string(), + '(' => self.then().emit(TokenKind::OpenParen), + ')' => self.then().emit(TokenKind::CloseParen), + '*' => self.then().emit(TokenKind::Star), + '+' => self.then().emit(TokenKind::Plus), + ',' => self.then().emit(TokenKind::Comma), + '-' => self.then().emit(TokenKind::Minus), + '.' => self.then().directive_or_bw(), + '/' => self.then().comment_or_slash(), + '0' => self.then().number_with_base(), + ':' => self.then().emit(TokenKind::Colon), + ';' => self.repeat(|c| c != '\n').emit(TokenKind::Comment), + '<' => self.then().less(), + '>' => self.then().greater(), + '@' => self.then().emit(TokenKind::At), + '[' => self.then().emit(TokenKind::OpenBrace), + ']' => self.then().emit(TokenKind::CloseBrace), + '^' => self.then().emit(TokenKind::Caret), + '_' => self.then().identifier(), + '{' => self.then().emit(TokenKind::OpenCurly), + '|' => self.then().emit(TokenKind::Bar), + '}' => self.then().emit(TokenKind::CloseCurly), + c if c.is_numeric() => self.number::(), + &c if is_xid_start(c) => self.then().identifier(), + c => todo!("Unrecognized character: {c}"), + } + } + fn number_with_base(&mut self) -> Option> { + match self.peek() { + Some('x') => self.then().number::<16>(), + Some('d') => self.then().number::<10>(), + Some('o') => self.then().number::<8>(), + Some('b') => self.then().number::<2>(), + Some(c) if c.is_ascii_digit() => self.number::(), + _ => self.emit(TokenKind::Number(0, 10)), + } + } + fn number(&mut self) -> Option> { + let mut num = self.digit::()?; + while let Some(digit) = self.digit::() { + num = num * B + digit; + } + if num > u16::MAX as u32 { + None + } else { + self.emit(TokenKind::Number(num as u16, B as u8)) + } + } + fn digit(&mut self) -> Option { + let digit = self.peek()?.to_digit(B)?; + self.then(); + Some(digit) + } + + fn comment_or_slash(&mut self) -> Option> { + match self.peek() { + Some('/') => self.repeat(|c| c != '\n').emit(TokenKind::Comment), + _ => self.emit(TokenKind::Slash), + } + } + fn less(&mut self) -> Option> { + match self.peek() { + Some('<') => self.then().emit(TokenKind::Lsh), + _ => todo!("less"), + } + } + fn greater(&mut self) -> Option> { + match self.peek() { + Some('>') => self.then().emit(TokenKind::Lsh), + _ => todo!("greater"), + } + } + fn identifier(&mut self) -> Option> { + while let Some(c) = self.then().peek() { + if !is_xid_continue(*c) { + break; + } + } + let lexeme = self.next_lexeme(); + if let Ok(op) = Reg::from_str(lexeme) { + self.emit(TokenKind::Reg(op)) + } else if let Ok(op) = NoEm::from_str(lexeme) { + self.emit(TokenKind::NoEm(op)) + } else if let Ok(op) = OneEm::from_str(lexeme) { + self.emit(TokenKind::OneEm(op)) + } else if let Ok(op) = Special::from_str(lexeme) { + self.emit(TokenKind::Special(op)) + } else if let Ok(op) = OneArg::from_str(lexeme) { + self.emit(TokenKind::OneArg(op)) + } else if let Ok(op) = TwoArg::from_str(lexeme) { + self.emit(TokenKind::TwoArg(op)) + } else if let Ok(op) = Jump::from_str(lexeme) { + self.emit(TokenKind::Jump(op)) + } else { + self.emit(TokenKind::Identifier) + } + } + fn directive_or_bw(&mut self) -> Option> { + while let Some(c) = self.then().peek() { + if !is_xid_continue(*c) { + break; + } + } + match self.next_lexeme() { + ".b" => self.emit(TokenKind::Byte), + ".w" => self.emit(TokenKind::Word), + _ => self.emit(TokenKind::Directive), + } + } + + /// Todo: Character unescaping in Lexer::string + fn string(&mut self) -> Option> { + while '"' != self.next()? {} + self.emit(TokenKind::String) + } + fn char(&mut self) -> Option> { + let out = self.unescape()?; + self.next().filter(|c| *c == '\'').and_then(|_| self.emit(TokenKind::Char(out))) + } + /// Unescape a single character + fn unescape(&mut self) -> Option { + match self.next() { + Some('\\') => (), + other => return other, + } + Some(match self.next()? { + 'a' => '\x07', + 'b' => '\x08', + 'f' => '\x0c', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'x' => self.hex_escape()?, + 'u' => self.unicode_escape()?, + '0' => '\0', + chr => chr, + }) + } + /// unescape a single 2-digit hex escape + fn hex_escape(&mut self) -> Option { + let out = (self.digit::<16>()? << 4) + self.digit::<16>()?; + char::from_u32(out) //.ok_or(Error::bad_unicode(out, self.line(), self.col())) + } + /// unescape a single \u{} unicode escape + fn unicode_escape(&mut self) -> Option { + let mut out = 0; + let Some('{') = self.peek() else { + return None; //Err(Error::invalid_escape('u', self.line(), self.col())); + }; + self.then(); + while let Some(c) = self.peek() { + match c { + '}' => { + self.then(); + return char::from_u32(out); //.ok_or(Error::bad_unicode(out, self.line(), self.col())); + } + _ => out = (out << 4) + self.digit::<16>()?, + } + } + None //Err(Error::invalid_escape('u', self.line(), self.col())) } } -impl<'text> TokenStream<'text> for Tokenizer<'text> { - fn context(&self) -> Context { self.context } - // Tokenizer has access to the source buffer, and can implement expect and peek without cloning - // itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and - // registers are valid identifiers. - fn expect(&mut self, expected: Type) -> Result { - let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?; - self.count(&token); - Ok(token) - } - fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) } - fn peek_expect(&mut self, expected: Type) -> Result { - Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context())) - } -} +#[cfg(test)] +mod tests; diff --git a/src/lexer/context.rs b/src/lexer/context.rs deleted file mode 100644 index 9791fcc..0000000 --- a/src/lexer/context.rs +++ /dev/null @@ -1,38 +0,0 @@ -// © 2023 John Breaux -//! A [Context] stores contextual information about the current tokenizer state -//! -//! This data is trivially copyable and can be provided in error messages using the -//! [Error::Contextual] specialization) -use super::*; -/// Stores contextual information about the current tokenizer state, useful for printing errors -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Context { - line: usize, - position: usize, - tokens: usize, -} - -impl Context { - pub fn new() -> Self { Default::default() } - pub fn line(&self) -> usize { self.line } - pub fn tokens(&self) -> usize { self.tokens } - pub fn position(&self) -> usize { self.position } - pub(super) fn count(&mut self, t: &Token) { - match t.variant() { - Type::EndOfFile => return, - Type::Endl => { - self.line += 1; - self.position = 1; - } - _ => self.position += t.len(), - } - self.tokens += 1; - } -} -impl Default for Context { - fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } } -} - -impl std::fmt::Display for Context { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) } -} diff --git a/src/lexer/error.rs b/src/lexer/error.rs deleted file mode 100644 index 652dcf2..0000000 --- a/src/lexer/error.rs +++ /dev/null @@ -1,68 +0,0 @@ -// © 2023 John Breauxs -use super::{ - context::Context, - token::{OwnedToken, *}, -}; -use std::fmt::Display; - -#[derive(Debug)] -pub enum LexError { - /// Any other error, tagged with [Context]. Created by [`Error::context()`] - Contextual(Context, Box), - /// Produced by [Token] when the input is entirely unexpected. - UnexpectedSymbol(String), - /// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type] - UnexpectedToken { expected: Type, got: OwnedToken }, - /// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the - /// expected [Types](Type) - AllExpectationsFailed { expected: Types, got: OwnedToken }, -} - -impl LexError { - pub fn context(self, c: Context) -> Self { - match self { - Self::Contextual(..) => self, - _ => Self::Contextual(c, Box::new(self)), - } - } - - // Extracts the root of the error tree - pub fn bare(self) -> Self { - match self { - Self::Contextual(_, bare) => bare.bare(), - _ => self, - } - } - - pub fn expected, T: Into>(expected: E, got: T) -> Self { - match expected.as_ref().len() { - 1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() }, - _ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() }, - } - } - - pub fn mask_expectation(mut self, expected: Type) -> Self { - match self { - LexError::UnexpectedToken { got, .. } => self = LexError::UnexpectedToken { expected, got }, - LexError::AllExpectationsFailed { got, .. } => self = LexError::UnexpectedToken { expected, got }, - LexError::Contextual(context, err) => { - self = LexError::Contextual(context, Box::new(err.mask_expectation(expected))) - } - _ => (), - } - self - } -} - -impl Display for LexError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - LexError::Contextual(ctx, error) => write!(f, "{ctx}: {error}"), - LexError::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""), - LexError::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."), - LexError::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."), - } - } -} - -impl std::error::Error for LexError {} diff --git a/src/lexer/ignore.rs b/src/lexer/ignore.rs deleted file mode 100644 index a9f89b5..0000000 --- a/src/lexer/ignore.rs +++ /dev/null @@ -1,55 +0,0 @@ -// © 2023 John Breaux -//! Removes a single [kind](Type) of [`Token`] from a [`TokenStream`] -use super::*; -#[must_use = "iterators are lazy and do nothing unless consumed"] -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Ignore<'t, T> -where T: TokenStream<'t> -{ - ignore: Type, - inner: &'t mut T, -} - -impl<'t, T> Ignore<'t, T> -where T: TokenStream<'t> -{ - /// Creates a new [Ignore], which ignores the [ignore Type](Type) - pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } } - - /// Gets a mutable reference to the inner [Iterator] - pub fn inner_mut(&mut self) -> &mut T { self.inner } -} - -impl<'t, T> Iterator for Ignore<'t, T> -where T: TokenStream<'t> -{ - type Item = Token<'t>; - fn next(&mut self) -> Option { - let next = self.inner.next()?; - // Space tokens are greedy, so the next token shouldn't be a Space - match next.variant() { - Type::Space => self.next(), - _ => Some(next), - } - } -} - -impl<'t, T> TokenStream<'t> for Ignore<'t, T> -where T: TokenStream<'t> -{ - fn context(&self) -> Context { self.inner.context() } - fn expect(&mut self, expected: Type) -> Result { - self.inner.allow(self.ignore); - self.inner.expect(expected) - } - - fn peek(&mut self) -> Self::Item { - self.inner.allow(self.ignore); - self.inner.peek() - } - - fn peek_expect(&mut self, expected: Type) -> Result { - self.inner.allow(self.ignore); - self.inner.peek_expect(expected) - } -} diff --git a/src/lexer/preprocessed.rs b/src/lexer/preprocessed.rs deleted file mode 100644 index 4d4ab2e..0000000 --- a/src/lexer/preprocessed.rs +++ /dev/null @@ -1,174 +0,0 @@ -// © 2023 John Breaux -//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band -//! ".define" rules -use super::*; -use std::collections::{HashMap, VecDeque}; - -// TODO: Clean this spaghetti mess up - -/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define" -/// rules -#[must_use = "iterators are lazy and do nothing unless consumed"] -#[derive(PartialEq, Eq)] -pub struct Preprocessed<'t, T> -where T: TokenStream<'t> -{ - sub_table: HashMap, Vec>>, - sub_types: Vec, - queue: VecDeque>, - inner: &'t mut T, -} - -impl<'t, T> Iterator for Preprocessed<'t, T> -where T: TokenStream<'t> -{ - type Item = Token<'t>; - fn next(&mut self) -> Option { - match self.queue.pop_front() { - Some(token) => Some(token), - None => { - let next = self.inner.next()?; - if let Some(subs) = self.sub_table.get(&next) { - self.queue.extend(subs); - return self.next(); - } - Some(next) - } - } - } -} - -impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> { - /// Creates a new [Preprocessed] [TokenStream] - pub fn new(inner: &'t mut T) -> Self { - Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner } - } - - /// Gets a mutable reference to the inner [TokenStream] - pub fn inner_mut(&mut self) -> &mut T { self.inner } - - /// Preserve the next token in the queue - fn enqueue(&mut self, token: Token<'t>) -> Token<'t> { - self.queue.push_back(token); - token - } - - /// Process .define directives in the preprocessor - fn define(&mut self, token: Token<'t>) -> Result<(), LexError> { - if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) { - return Ok(()); - } - // Tokenize the subdocument - self.allow(Type::Directive); - self.allow(Type::Space); - - let Some(k) = self.inner.next() else { return Ok(()) }; - if !self.sub_types.contains(&k.variant()) { - self.sub_types.push(k.variant()); - }; - - self.allow(Type::Space); - - let mut replacement = vec![]; - loop { - match self.inner.peek().variant() { - Type::Endl | Type::EndOfFile => break, - Type::Comment | Type::Space => { - // ignore comments - self.inner.next(); - } - _ => { - let next = self.inner.next().unwrap(); - replacement.push(self.enqueue(next)); - } - } - } - self.sub_table.insert(k, replacement); - Ok(()) - } - - /// Does the preprocessing step - fn preprocess(&mut self, token: Token<'t>) { - if let Some(subs) = self.sub_table.get(&token) { - self.queue.extend(subs); - self.inner.next(); - } - } -} - -impl<'t, T> TokenStream<'t> for Preprocessed<'t, T> -where T: TokenStream<'t> -{ - fn context(&self) -> Context { self.inner.context() } - - fn expect(&mut self, expected: Type) -> Result { - match self.queue.front() { - Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()), - Some(&token) => Err(LexError::expected([expected], token).context(self.context())), - None => { - // Only resolve defines when expecting, otherwise you'll run into issues. - if let Ok(next) = self.inner.expect(expected) { - self.define(next)?; - return Ok(next); - } - if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) { - if let Some(subs) = self.sub_table.get(&next) { - self.inner.allow_any_of(&self.sub_types); - self.queue.extend(subs); - } - return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) }; - } - Err(LexError::expected([expected], self.inner.peek()).context(self.context())) - } - } - } - - fn peek(&mut self) -> Self::Item { - match self.queue.front() { - Some(token) => *token, - None => { - // Only allow substitution when the next token is unexpected - let old = self.inner.peek(); - self.preprocess(old); - match self.queue.front() { - Some(&new) => new, - None => old, - } - } - } - } - - fn peek_expect(&mut self, expected: Type) -> Result { - match self.queue.front() { - Some(&token) if token.is_variant(expected) => Ok(token), - Some(&token) => Err(LexError::expected([expected], token).context(self.context())), - None => { - if let Ok(next) = self.inner.peek_expect(expected) { - return Ok(next); - } - if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) { - self.preprocess(next); - return if self.queue.is_empty() { - self.inner.peek_expect(expected) - } else { - self.peek_expect(expected) - }; - } - Err(LexError::expected([expected], self.inner.peek())) - } - } - } -} - -impl<'t, T> std::fmt::Debug for Preprocessed<'t, T> -where T: TokenStream<'t> -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Preprocessed") - .field("sub_table", &self.sub_table) - .field("sub_types", &self.sub_types) - .field("queue", &self.queue) - .field("context", &self.context()) - .finish_non_exhaustive() - } -} diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs new file mode 100644 index 0000000..df3c0dc --- /dev/null +++ b/src/lexer/tests.rs @@ -0,0 +1,66 @@ +use super::*; +macro_rules! lex { + (type ($t:tt), $expected:expr) => { + let token = Lexer::new(stringify!($t)).scan().expect(stringify!($t:tt should yield a valid token)); + assert_eq!(token.kind, $expected); + }; + ({ $($t:tt)* }) => { + Lexer::new(stringify!($($t)*)) + }; +} +#[test] +fn ascii_char() { + lex!(type ('A'), TokenKind::Char('A')); // 'A' should be a valid char + lex!(type ('\x1b'), TokenKind::Char('\x1b')); // '\\x1b' should be a valid char +} +#[test] +fn unicode_escape_char() { + lex!(type ('\u{1f988}'), TokenKind::Char('🦈')); // '\\u{1f988}' should be a valid 🦈 +} +#[test] +fn number_with_base() { + lex!(type (0), TokenKind::Number(0, 10)); // 0 should be a 16-bit base-10 number + lex!(type (42069), TokenKind::Number(42069, 10)); // 42069 should be a 16-bit base-10 number + lex!(type (0x420), TokenKind::Number(0x420, 16)); // 0x420 should be a 16-bit base-16 number + lex!(type (0d100), TokenKind::Number(100, 10)); // 0d100 should be a 16-bit base-10 number + lex!(type (0o100), TokenKind::Number(64, 8)); // 0o100 should be a 16-bit base-8 number + lex!(type (0b100), TokenKind::Number(4, 2)); // 0b100 should be a 16-bit base-8 number +} +#[test] +fn no_operand_emulated() { + lex!(type (nop), TokenKind::NoEm(NoEm::Nop)); // nop should be a valid NoEm + lex!(type (ret), TokenKind::NoEm(NoEm::Ret)); // ret should be a valid NoEm + lex!(type (clrc), TokenKind::NoEm(NoEm::Clrc)); // clrc should be a valid NoEm + lex!(type (clrz), TokenKind::NoEm(NoEm::Clrz)); // clrz should be a valid NoEm + lex!(type (clrn), TokenKind::NoEm(NoEm::Clrn)); // clrn should be a valid NoEm + lex!(type (setc), TokenKind::NoEm(NoEm::Setc)); // setc should be a valid NoEm + lex!(type (setz), TokenKind::NoEm(NoEm::Setz)); // setz should be a valid NoEm + lex!(type (setn), TokenKind::NoEm(NoEm::Setn)); // setn should be a valid NoEm + lex!(type (dint), TokenKind::NoEm(NoEm::Dint)); // dint should be a valid NoEm + lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm +} +#[test] +fn registers() { + lex!(type(pc), TokenKind::Reg(Reg::PC)); + lex!(type(sp), TokenKind::Reg(Reg::SP)); + lex!(type(sr), TokenKind::Reg(Reg::SR)); + lex!(type(cg), TokenKind::Reg(Reg::CG)); + lex!(type(r0), TokenKind::Reg(Reg::PC)); + lex!(type(r1), TokenKind::Reg(Reg::SP)); + lex!(type(r2), TokenKind::Reg(Reg::SR)); + lex!(type(r3), TokenKind::Reg(Reg::CG)); + lex!(type(r4), TokenKind::Reg(Reg::R4)); + lex!(type(r5), TokenKind::Reg(Reg::R5)); + lex!(type(r6), TokenKind::Reg(Reg::R6)); + lex!(type(r7), TokenKind::Reg(Reg::R7)); + lex!(type(r8), TokenKind::Reg(Reg::R8)); + lex!(type(r9), TokenKind::Reg(Reg::R9)); + lex!(type(r10), TokenKind::Reg(Reg::R10)); + lex!(type(r11), TokenKind::Reg(Reg::R11)); + lex!(type(r12), TokenKind::Reg(Reg::R12)); + lex!(type(r13), TokenKind::Reg(Reg::R13)); + lex!(type(r14), TokenKind::Reg(Reg::R14)); + lex!(type(r15), TokenKind::Reg(Reg::R15)); +} + +// TODO: opcode tests, misc. special character tests, etc. diff --git a/src/lexer/token.rs b/src/lexer/token.rs index f3e2dbe..a467f68 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -1,335 +1,479 @@ -// © 2023 John Breaux -//! A [Token] is a [semantically tagged](Type) sequence of characters. +// © 2023-2024 John Breaux +//! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span] //! -//! Token, and the tokenizer, intend to copy as little as possible. - -use super::error::LexError; -use regex::Regex; -use std::{ - fmt::{Debug, Display}, - sync::OnceLock, -}; - -/// Implements regex matching functions on [`Token`] for each [`Type`], -/// and implements [`From<&str>`] for [`Token`] -macro_rules! regex_impl { -(<$t:lifetime> $type:ty {$( - $(#[$meta:meta])* - pub fn $func:ident (text: &str) -> Option { - regex!($out:path = $re:literal) +//! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`) +use crate::util::Span; +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Token<'t> { + pub lexeme: &'t str, + pub kind: TokenKind, + pub pos: Span, +} +impl<'t> Token<'t> { + pub fn new(lexeme: &'t str, kind: TokenKind, pos: Span) -> Self { + Self { lexeme, kind, pos } } -)*}) => { -impl<$t> $type { - /// Lexes a token only for the expected `variant` - /// - /// Warning: This bypasses precedence rules. Only use for specific patterns. - pub fn expect(text: &$t str, expected: Type) -> Result { - match expected {$( - $out => Self::$func(text), - )*}.ok_or(LexError::UnexpectedToken { - expected, - got: Self::from(text).into(), - }) - } - $( - $(#[$meta])* - /// Tries to read [` - #[doc = stringify!($out)] - /// `] from `text` - pub fn $func(text: &$t str) -> Option { - static RE: OnceLock = OnceLock::new(); - let lexeme = RE.get_or_init(|| Regex::new($re).unwrap()) - .find(text)?.into(); - Some(Self { variant: $out, lexeme }) - })* -} -impl<$t> From<&$t str> for $type { - fn from (value: &$t str) -> Self { - $( - if let Some(token) = Self::$func(value) { - token - } else - )* - {todo!("Unexpected input: {value:#?} (Tokenization failure)")} - } -} -}; -} - -/// A [Token] is a [semantically tagged](Type) sequence of characters -#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Token<'text> { - /// The type of this token - variant: Type, - /// The sub[str]ing corresponding to this token - lexeme: &'text str, -} - -impl<'text> Token<'text> { - /// Returns the [Type] of this [Token] - pub fn variant(&self) -> Type { self.variant } - - /// Returns the lexeme (originating string slice) of this token - pub fn lexeme(&self) -> &'text str { self.lexeme } - - /// Parses this [Token] into another type - pub fn parse(&self) -> Result::Err> - where F: std::str::FromStr { - self.lexeme.parse() - } - /// Returns whether the Lexeme is the expected [Type] - pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected } - - /// Returns the length of [Self::lexeme] in bytes. - pub fn len(&self) -> usize { self.lexeme.len() } - - /// Returns `true` if [Self::lexeme] has a length of zero bytes. - pub fn is_empty(&self) -> bool { self.lexeme.is_empty() } -} - -impl<'text> Debug for Token<'text> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_list().entry(&self.variant).entry(&self.lexeme).finish() + pub fn kind(&self) -> TokenKind { + self.kind } } -impl<'text> Display for Token<'text> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.variant { - Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f), - v => write!(f, "{v} \"{}\"", self.lexeme), +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TokenKind { + Eof, + Newline, // \n + OpenParen, // ( + CloseParen, // ) + OpenCurly, // { + CloseCurly, // } + OpenBrace, // [ + CloseBrace, // ] + Comma, // , + Colon, // : + Bang, // ! + At, // @ + Amp, // & + Bar, // | + Caret, // ^ + Star, // * + Hash, // # + Dollar, // $ + Percent, // % + Plus, // + + Minus, // - + Slash, // / + Lsh, // << + Rsh, // >> + + Comment, // (';' | '//') .* '\n' | + Directive, // '.' XID_CONTINUE* + Identifier, // XID_START XID_CONTINUE* + Number(u16, u8), // varies depending on base + Char(char), // '\'' ('\' Escape | .) '\'' + String, // '"' .* '"' + Reg(Reg), + NoEm(NoEm), + OneEm(OneEm), + Special(Special), + OneArg(OneArg), + TwoArg(TwoArg), + Jump(Jump), + + Byte, // .b + Word, // .w +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Reg { + PC, + SP, + SR, + CG, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, +} +/// Fake instructions of the form `opcode` +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum NoEm { + Nop, + Ret, + Clrc, + Clrz, + Clrn, + Setc, + Setz, + Setn, + Dint, + Eint, +} +/// Fake instructions of the form `opcode dst` +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum OneEm { + Pop, + Rla, + Rlc, + Inv, + Clr, + Tst, + Dec, + Decd, + Inc, + Incd, + Adc, + Dadc, + Sbc, +} +/// These opcodes have bespoke grammatical rules +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Special { + /// Br = "br" Src + Br, +} +/// Real instructions of the form `opcode src` +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum OneArg { + Rrc, + Swpb, + Rra, + Sxt, + Push, + Call, + Reti, +} +/// Real instructions of the form `opcode src, dst` +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TwoArg { + Mov, + Add, + Addc, + Subc, + Sub, + Cmp, + Dadd, + Bit, + Bic, + Bis, + Xor, + And, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Jump { + Jne, + Jnz, + Jeq, + Jz, + Jnc, + Jlo, + Jc, + Jhs, + Jn, + Jge, + Jl, + Jmp, +} +mod convert { + //! Implementations of [FromStr] for [token](super) types. + use super::*; + use std::str::FromStr; + + impl FromStr for Reg { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(match s { + "pc" => Reg::PC, + "sp" => Reg::SP, + "sr" => Reg::SR, + "cg" => Reg::CG, + "r0" => Reg::PC, + "r1" => Reg::SP, + "r2" => Reg::SR, + "r3" => Reg::CG, + "r4" => Reg::R4, + "r5" => Reg::R5, + "r6" => Reg::R6, + "r7" => Reg::R7, + "r8" => Reg::R8, + "r9" => Reg::R9, + "r10" => Reg::R10, + "r11" => Reg::R11, + "r12" => Reg::R12, + "r13" => Reg::R13, + "r14" => Reg::R14, + "r15" => Reg::R15, + _ => Err(())?, + }) + } + } + impl FromStr for NoEm { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(match s { + "nop" => NoEm::Nop, + "ret" => NoEm::Ret, + "clrc" => NoEm::Clrc, + "clrz" => NoEm::Clrz, + "clrn" => NoEm::Clrn, + "setc" => NoEm::Setc, + "setz" => NoEm::Setz, + "setn" => NoEm::Setn, + "dint" => NoEm::Dint, + "eint" => NoEm::Eint, + _ => Err(())?, + }) + } + } + impl FromStr for OneEm { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(match s { + "pop" => OneEm::Pop, + "rla" => OneEm::Rla, + "rlc" => OneEm::Rlc, + "inv" => OneEm::Inv, + "clr" => OneEm::Clr, + "tst" => OneEm::Tst, + "dec" => OneEm::Dec, + "decd" => OneEm::Decd, + "inc" => OneEm::Inc, + "incd" => OneEm::Incd, + "adc" => OneEm::Adc, + "dadc" => OneEm::Dadc, + "sbc" => OneEm::Sbc, + _ => Err(())?, + }) + } + } + impl FromStr for Special { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(match s { + "br" => Special::Br, + _ => Err(())?, + }) + } + } + impl FromStr for OneArg { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(match s { + "rrc" => OneArg::Rrc, + "swpb" => OneArg::Swpb, + "rra" => OneArg::Rra, + "sxt" => OneArg::Sxt, + "push" => OneArg::Push, + "call" => OneArg::Call, + "reti" => OneArg::Reti, + _ => Err(())?, + }) + } + } + impl FromStr for TwoArg { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(match s { + "mov" => TwoArg::Mov, + "add" => TwoArg::Add, + "addc" => TwoArg::Addc, + "subc" => TwoArg::Subc, + "sub" => TwoArg::Sub, + "cmp" => TwoArg::Cmp, + "dadd" => TwoArg::Dadd, + "bit" => TwoArg::Bit, + "bic" => TwoArg::Bic, + "bis" => TwoArg::Bis, + "xor" => TwoArg::Xor, + "and" => TwoArg::And, + _ => Err(())?, + }) + } + } + impl FromStr for Jump { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(match s { + "jne" => Jump::Jne, + "jnz" => Jump::Jnz, + "jeq" => Jump::Jeq, + "jz" => Jump::Jz, + "jnc" => Jump::Jnc, + "jlo" => Jump::Jlo, + "jc" => Jump::Jc, + "jhs" => Jump::Jhs, + "jn" => Jump::Jn, + "jge" => Jump::Jge, + "jl" => Jump::Jl, + "jmp" => Jump::Jmp, + _ => Err(())?, + }) } } } - -/// A [token Type](Type) is a semantic tag for a sequence of characters -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Type { - /// contiguous whitespace, excluding newline - Space, - /// newline and contiguous whitespace - Endl, - /// A line-comment - Comment, - /// Jump label *definition* - Label, - /// Instructions - Insn, - /// Operand width is byte - ByteWidth, - /// Operand width is word - WordWidth, - /// Register mnemonic (i.e. `pc`, `r14`) - Register, - /// Marker for base-10 - RadixMarkerDec, - /// Marker for base-16 - RadixMarkerHex, - /// Marker for base-8 - RadixMarkerOct, - /// Marker for base-2 - RadixMarkerBin, - /// 1-4 hexadigit numbers only - Number, - /// Negative number marker - Minus, - /// post-increment mode marker - Plus, - /// Open-Indexed-Mode marker - LParen, - /// Close-Indexed-Mode marker - RParen, - /// Open Square Bracket - LBracket, - /// Closed Square Bracket - RBracket, - /// Indirect mode marker - Indirect, - /// absolute address marker - Absolute, - /// immediate value marker - Immediate, - /// Valid identifier. Identifiers must start with a Latin alphabetic character or underline - Identifier, - /// A string, encased in "quotes" - String, - /// Assembler directive - Directive, - /// Separator (comma) - Separator, - /// End of File marker - #[default] - EndOfFile, - /// Invalid token - Invalid, -} - -regex_impl! {<'text> Token<'text> { - pub fn expect_space(text: &str) -> Option { - regex!(Type::Space = r"^[\s--\n]+") - } - pub fn expect_endl(text: &str) -> Option { - regex!(Type::Endl = r"^\n[\s--\n]*") - } - pub fn expect_comment(text: &str) -> Option { - regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*") - } - pub fn expect_label(text: &str) -> Option { - regex!(Type::Label = r"^:") - } - pub fn expect_insn(text: &str) -> Option { - regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)") - } - pub fn expect_byte_width(text: &str) -> Option { - regex!(Type::ByteWidth = r"(?i)^\.b") - } - pub fn expect_word_width(text: &str) -> Option { - regex!(Type::WordWidth = r"(?i)^\.w") - } - pub fn expect_register(text: &str) -> Option { - // old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)") - regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)") - } - pub fn expect_radix_marker_dec(text: &str) -> Option { - regex!(Type::RadixMarkerDec = r"(?i)^0d") - } - pub fn expect_radix_marker_hex(text: &str) -> Option { - regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)") - } - pub fn expect_radix_marker_oct(text: &str) -> Option { - regex!(Type::RadixMarkerOct = r"(?i)^0o") - } - pub fn expect_radix_marker_bin(text: &str) -> Option { - regex!(Type::RadixMarkerBin = r"(?i)^0b") - } - pub fn expect_number(text: &str) -> Option { - regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)") - } - pub fn expect_minus(text: &str) -> Option { - regex!(Type::Minus = r"^-") - } - pub fn expect_plus(text: &str) -> Option { - regex!(Type::Plus = r"^\+") - } - pub fn expect_l_paren(text: &str) -> Option { - regex!(Type::LParen = r"^\(") - } - pub fn expect_r_paren(text: &str) -> Option { - regex!(Type::RParen = r"^\)") - } - pub fn expect_l_bracket(text: &str) -> Option { - regex!(Type::LBracket = r"^\[") - } - pub fn expect_r_bracket(text: &str) -> Option { - regex!(Type::RBracket = r"^]") - } - pub fn expect_indrect(text: &str) -> Option { - regex!(Type::Indirect = r"^@") - } - pub fn expect_absolute(text: &str) -> Option { - regex!(Type::Absolute = r"^&") - } - pub fn expect_immediate(text: &str) -> Option { - regex!(Type::Immediate = r"^#") - } - pub fn expect_string(text: &str) -> Option { - regex!(Type::String = r#"^"[^"]*""#) - } - pub fn expect_directive(text: &str) -> Option { - regex!(Type::Directive = r"^\.\w+") - } - pub fn expect_identifier(text: &str) -> Option { - regex!(Type::Identifier = r"^[A-Za-z_]\w*") - } - pub fn expect_separator(text: &str) -> Option { - regex!(Type::Separator = r"^,") - } - pub fn expect_end_of_file(text: &str) -> Option { - regex!(Type::EndOfFile = r"^$") - } - pub fn expect_anything(text: &str) -> Option { - regex!(Type::Invalid = r"^.*") - } -}} - -impl Display for Type { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Space => Display::fmt("space", f), - Self::Endl => Display::fmt("newline", f), - Self::Comment => Display::fmt("comment", f), - Self::Label => Display::fmt("label definition", f), - Self::Insn => Display::fmt("opcode", f), - Self::ByteWidth => Display::fmt("byte-width", f), - Self::WordWidth => Display::fmt("word-width", f), - Self::Register => Display::fmt("register", f), - Self::RadixMarkerDec => Display::fmt("decimal marker", f), - Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f), - Self::RadixMarkerOct => Display::fmt("octal marker", f), - Self::RadixMarkerBin => Display::fmt("binary marker", f), - Self::Number => Display::fmt("number", f), - Self::Minus => Display::fmt("minus sign", f), - Self::Plus => Display::fmt("plus sign", f), - Self::LParen => Display::fmt("left parenthesis", f), - Self::RParen => Display::fmt("right parenthesis", f), - Self::LBracket => Display::fmt("left bracket", f), - Self::RBracket => Display::fmt("right bracket", f), - Self::Indirect => Display::fmt("indirect", f), - Self::Absolute => Display::fmt("absolute", f), - Self::Immediate => Display::fmt("immediate", f), - Self::Identifier => Display::fmt("identifier", f), - Self::String => Display::fmt("string", f), - Self::Directive => Display::fmt("directive", f), - Self::Separator => Display::fmt("comma", f), - Self::EndOfFile => Display::fmt("EOF", f), - Self::Invalid => Display::fmt("invalid token", f), - } - } -} - -/// A [Token] which can outlive its parent buffer -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct OwnedToken { - /// The type of this token - variant: Type, - /// The sub[String] corresponding to this token - lexeme: String, -} - -impl Display for OwnedToken { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) } -} - -impl<'t> From<&'t OwnedToken> for Token<'t> { - fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } } -} - -impl From> for OwnedToken { - fn from(value: Token<'_>) -> Self { - let Token { variant, lexeme } = value; - OwnedToken { variant, lexeme: lexeme.to_owned() } - } -} - -/// [Types] are an owned array of [types](Type), with a custom [Display] implementation -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Types(Vec); - -impl> From for Types { - // TODO: Possibly bad. Check out in rust playground. - fn from(value: T) -> Self { Self(value.as_ref().to_owned()) } -} - -impl Display for Types { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for (idx, t) in self.0.iter().enumerate() { - Display::fmt(t, f)?; - match idx { - i if i < self.0.len() - 2 => Display::fmt(", ", f)?, - i if i < self.0.len() - 1 => Display::fmt(" or ", f)?, - _ => (), +mod display { + //! Implementations of [Display] for [token](super) types. + use super::*; + use std::fmt::Display; + impl<'t> Display for Token<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { lexeme, kind, pos: _ } = self; + match kind { + TokenKind::Comment + | TokenKind::Directive + | TokenKind::Identifier + | TokenKind::String => { + write!(f, "{}", lexeme) + } + ty => ty.fmt(f), + } + } + } + impl Display for TokenKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TokenKind::Eof => write!(f, "[EOF]"), + TokenKind::Newline => writeln!(f), + TokenKind::OpenParen => write!(f, "("), + TokenKind::CloseParen => write!(f, ")"), + TokenKind::OpenCurly => write!(f, "{{"), + TokenKind::CloseCurly => write!(f, "}}"), + TokenKind::OpenBrace => write!(f, "["), + TokenKind::CloseBrace => write!(f, "]"), + TokenKind::Comma => write!(f, ","), + TokenKind::Colon => write!(f, ":"), + TokenKind::Bang => write!(f, "!"), + TokenKind::At => write!(f, "@"), + TokenKind::Amp => write!(f, "&"), + TokenKind::Bar => write!(f, "|"), + TokenKind::Caret => write!(f, "^"), + TokenKind::Star => write!(f, "*"), + TokenKind::Hash => write!(f, "#"), + TokenKind::Dollar => write!(f, "$"), + TokenKind::Percent => write!(f, "%"), + TokenKind::Plus => write!(f, "+"), + TokenKind::Minus => write!(f, "-"), + TokenKind::Slash => write!(f, "/"), + TokenKind::Lsh => write!(f, "<<"), + TokenKind::Rsh => write!(f, ">>"), + TokenKind::Comment => write!(f, "; "), + TokenKind::Directive => write!(f, "."), + TokenKind::Identifier => write!(f, "Identifier"), + TokenKind::Number(val, 2) => write!(f, "0b{val:b}"), + TokenKind::Number(val, 8) => write!(f, "0o{val:o}"), + TokenKind::Number(val, 16) => write!(f, "0x{val:x}"), + TokenKind::Number(val, _) => write!(f, "{val}"), + TokenKind::Char(c) => write!(f, "'{c}'"), + TokenKind::String => write!(f, "\"String\""), + TokenKind::Reg(kw) => write!(f, "{kw}"), + TokenKind::NoEm(kw) => write!(f, "{kw}"), + TokenKind::OneEm(kw) => write!(f, "{kw}"), + TokenKind::Special(kw) => write!(f, "{kw}"), + TokenKind::OneArg(kw) => write!(f, "{kw}"), + TokenKind::TwoArg(kw) => write!(f, "{kw}"), + TokenKind::Jump(kw) => write!(f, "{kw}"), + TokenKind::Byte => write!(f, ".b"), + TokenKind::Word => write!(f, ".w"), + } + } + } + impl Display for Reg { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Reg::PC => "pc".fmt(f), + Reg::SP => "sp".fmt(f), + Reg::SR => "sr".fmt(f), + Reg::CG => "cg".fmt(f), + Reg::R4 => "r4".fmt(f), + Reg::R5 => "r5".fmt(f), + Reg::R6 => "r6".fmt(f), + Reg::R7 => "r7".fmt(f), + Reg::R8 => "r8".fmt(f), + Reg::R9 => "r9".fmt(f), + Reg::R10 => "r10".fmt(f), + Reg::R11 => "r11".fmt(f), + Reg::R12 => "r12".fmt(f), + Reg::R13 => "r13".fmt(f), + Reg::R14 => "r14".fmt(f), + Reg::R15 => "r15".fmt(f), + } + } + } + impl Display for NoEm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + NoEm::Nop => "nop".fmt(f), + NoEm::Ret => "ret".fmt(f), + NoEm::Clrc => "clrc".fmt(f), + NoEm::Clrz => "clrz".fmt(f), + NoEm::Clrn => "clrn".fmt(f), + NoEm::Setc => "setc".fmt(f), + NoEm::Setz => "setz".fmt(f), + NoEm::Setn => "setn".fmt(f), + NoEm::Dint => "dint".fmt(f), + NoEm::Eint => "eint".fmt(f), + } + } + } + impl Display for OneEm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OneEm::Pop => "pop".fmt(f), + OneEm::Rla => "rla".fmt(f), + OneEm::Rlc => "rlc".fmt(f), + OneEm::Inv => "inv".fmt(f), + OneEm::Clr => "clr".fmt(f), + OneEm::Tst => "tst".fmt(f), + OneEm::Dec => "dec".fmt(f), + OneEm::Decd => "decd".fmt(f), + OneEm::Inc => "inc".fmt(f), + OneEm::Incd => "incd".fmt(f), + OneEm::Adc => "adc".fmt(f), + OneEm::Dadc => "dadc".fmt(f), + OneEm::Sbc => "sbc".fmt(f), + } + } + } + impl Display for Special { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Special::Br => "br".fmt(f), + } + } + } + impl Display for OneArg { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OneArg::Rrc => "rrc".fmt(f), + OneArg::Swpb => "swpb".fmt(f), + OneArg::Rra => "rra".fmt(f), + OneArg::Sxt => "sxt".fmt(f), + OneArg::Push => "push".fmt(f), + OneArg::Call => "call".fmt(f), + OneArg::Reti => "reti".fmt(f), + } + } + } + impl Display for TwoArg { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TwoArg::Mov => "mov".fmt(f), + TwoArg::Add => "add".fmt(f), + TwoArg::Addc => "addc".fmt(f), + TwoArg::Subc => "subc".fmt(f), + TwoArg::Sub => "sub".fmt(f), + TwoArg::Cmp => "cmp".fmt(f), + TwoArg::Dadd => "dadd".fmt(f), + TwoArg::Bit => "bit".fmt(f), + TwoArg::Bic => "bic".fmt(f), + TwoArg::Bis => "bis".fmt(f), + TwoArg::Xor => "xor".fmt(f), + TwoArg::And => "and".fmt(f), + } + } + } + impl Display for Jump { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Jump::Jne => "jne".fmt(f), + Jump::Jnz => "jnz".fmt(f), + Jump::Jeq => "jeq".fmt(f), + Jump::Jz => "jz".fmt(f), + Jump::Jnc => "jnc".fmt(f), + Jump::Jlo => "jlo".fmt(f), + Jump::Jc => "jc".fmt(f), + Jump::Jhs => "jhs".fmt(f), + Jump::Jn => "jn".fmt(f), + Jump::Jge => "jge".fmt(f), + Jump::Jl => "jl".fmt(f), + Jump::Jmp => "jmp".fmt(f), } } - Ok(()) } } diff --git a/src/lexer/token_stream.rs b/src/lexer/token_stream.rs deleted file mode 100644 index 7fe2df0..0000000 --- a/src/lexer/token_stream.rs +++ /dev/null @@ -1,85 +0,0 @@ -// © 2023 John Breaux -//! A TokenStream is a specialized [Iterator] which produces [Tokens](Token) -use super::*; - -use super::ignore::Ignore; -use super::preprocessed::Preprocessed; - -/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token) -pub trait TokenStream<'text>: Iterator> + std::fmt::Debug { - /// Gets this stream's [Context] - fn context(&self) -> Context; - - /// Creates an iterator that skips [Type::Space] in the input - #[inline] - fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self> - where Self: Sized { - Ignore::new(variant, self) - } - - /// Creates a [TokenStream] that performs live substitution of the input - #[inline] - fn preprocessed(&'text mut self) -> Preprocessed<'text, Self> - where Self: Sized { - Preprocessed::new(self) - } - - /// Returns the next [Token] without advancing - fn peek(&mut self) -> Self::Item; - - /// Returns the next [Token] if it is of the expected [Type], without advancing - fn peek_expect(&mut self, expected: Type) -> Result; - - /// Consumes and returns a [Token] if it is the expected [Type] - /// - /// Otherwise, does not consume a [Token] - fn expect(&mut self, expected: Type) -> Result; - - /// Ignores a [Token] of the expected [Type], propegating errors. - #[inline] - fn require(&mut self, expected: Type) -> Result<(), LexError> { self.expect(expected).map(|_| ()) } - - /// Ignores a [Token] of the expected [Type], discarding errors. - #[inline] - fn allow(&mut self, expected: Type) { let _ = self.expect(expected); } - - /// Runs a function on each - fn any_of(&mut self, f: fn(&mut Self, Type) -> Result, expected: T) -> Result - where T: AsRef<[Type]> { - for &expected in expected.as_ref() { - match f(self, expected).map_err(|e| e.bare()) { - Ok(t) => return Ok(t), - Err(LexError::UnexpectedToken { .. }) => continue, - Err(e) => return Err(e.context(self.context())), - } - } - Err(LexError::expected(expected, self.peek()).context(self.context())) - } - - /// Returns the next [Token] if it is of the expected [Types](Type), without advancing - #[inline] - fn peek_expect_any_of(&mut self, expected: T) -> Result - where T: AsRef<[Type]> { - self.any_of(Self::peek_expect, expected) - } - /// Consumes and returns a [Token] if it matches any of the expected [Types](Type) - /// - /// Otherwise, does not consume a [Token] - #[inline] - fn expect_any_of(&mut self, expected: T) -> Result - where T: AsRef<[Type]> { - self.any_of(Self::expect, expected) - } - /// Ignores a [Token] of any expected [Type], discarding errors. - #[inline] - fn allow_any_of(&mut self, expected: T) - where T: AsRef<[Type]> { - let _ = self.expect_any_of(expected); - } - /// Ignores a [Token] of any expected [Type], propegating errors. - #[inline] - fn require_any_of(&mut self, expected: T) -> Result<(), LexError> - where T: AsRef<[Type]> { - self.any_of(Self::require, expected) - } -} diff --git a/src/lib.rs b/src/lib.rs index e879dd1..63a4e5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,23 +54,55 @@ //! └─ EndOfFile //! ``` -pub mod preamble { - //! Common imports for msp430-asm - use super::*; - pub use assembler::Assembler; - pub use error::Error; - pub use lexer::{ - context::Context, - token::{Token, Type}, - token_stream::TokenStream, - Tokenizer, +pub mod util { + use std::{ + fmt::{Debug, Display}, + ops::{Index, Range}, }; - pub use parser::Parser; + /// A [Clone] + [Copy] + [!Iterator](Iterator) <\code> version of a [Range] + #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct Span { + pub start: Idx, + pub end: Idx, + } + impl From> for Range { + fn from(value: Span) -> Self { + value.start..value.end + } + } + impl From> for Span { + fn from(value: Range) -> Self { + Self { start: value.start, end: value.end } + } + } + impl Index> for [T] { + type Output = [T]; + fn index(&self, index: Span) -> &Self::Output { + self.index(Range::from(index)) + } + } + impl Index> for str { + type Output = str; + fn index(&self, index: Span) -> &Self::Output { + self.index(Range::from(index)) + } + } + impl Debug for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}..{:?}", self.start, self.end) + } + } + impl Display for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } + } } -use preamble::*; -pub mod error; +pub mod lexer; + +pub mod preprocessor; + +pub mod parser; pub mod assembler; -pub mod lexer; -pub mod parser; diff --git a/src/parser.rs b/src/parser.rs index 6fe8293..934fb24 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,81 +1,591 @@ -// © 2023 John Breaux -//! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root) +// © 2023-2024 John Breaux +//! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast) +pub mod ast; -use crate::{TokenStream, Type}; -use error::ParseError; -use preamble::*; -use std::{ - fmt::{Debug, Display}, - path::Path, +use self::error::{ + Error, + ErrorKind::{self, *}, + PResult, Parsing, }; +use crate::{ + lexer::{ + token::{Reg, Special, Token, TokenKind as Kind}, + Lexer, + }, + preprocessor::Preprocessor, + util::Span, +}; +use ast::*; -pub mod preamble { - //! All the different AST node types - use super::*; - // Traits - pub use parsable::Parsable; - // Nodes - pub use comment::Comment; - pub use directive::Directive; - pub use identifier::Identifier; - pub use instruction::{ - encoding::{ - encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand, - register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding, - }, - opcode::Opcode, +#[derive(Clone, Debug)] +pub struct Parser<'t> { + lexer: Preprocessor<'t>, + next: Option>, + loc: Span, +} + +impl<'t> Parser<'t> { + /// Creates a new [Parser] + pub fn new(text: &'t str) -> Self { + let lexer = Preprocessor::new(text); + Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer } + } + /// Createes a new [Parser] from an existing [Lexer] + pub fn with_lexer(lexer: Lexer<'t>) -> Self { + let lexer = Preprocessor::with_lexer(lexer); + Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer } + } + + pub fn parse>(&mut self) -> PResult { + Parsable::parse(self) + } + pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error { + Error { parsing, kind, loc: self.loc } + } + + /// Peek a token out of the lexer + pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> { + if self.next.is_none() { + self.next = self.lexer.scan(); + } + self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p)) + } + pub fn next(&mut self, p: Parsing) -> PResult> { + Ok(match self.take() { + Some(token) => token, + None => { + self.peek(p)?; + self.take().expect("should have been populated by peek") + } + }) + } + /// Consumes the next token + pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> { + match self.peek(p)?.kind { + kind if kind == expect => { + self.take(); + Ok(self) + } + kind => Err(self.error(Unexpected(kind), p)), + } + } + /// Consumes the next token without checking it + pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> { + self.next(p)?; + Ok(self) + } + /// Take the last peeked token + pub fn take(&mut self) -> Option> { + self.next.take() + } +} + +// Expressions +impl<'t> Parser<'t> { + /// Parses an expression + pub fn expr(&mut self) -> PResult> { + self.term() + } + /// Parses a term-expression (binary `*`mul, `/`div, `%`rem) + pub fn term(&mut self) -> PResult> { + let p = Parsing::Expr; + let a = self.factor()?; + let mut other = vec![]; + loop { + match self.peek(p)?.kind { + Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)), + Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)), + Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)), + _ if other.is_empty() => break Ok(a), + _ => break Ok(Expr::Binary(a.into(), other)), + } + } + } + /// Parses a factor expression (binary `+`add, `-`sub) + pub fn factor(&mut self) -> PResult> { + let p = Parsing::Expr; + let a = self.shift()?; + let mut other = vec![]; + loop { + match self.peek(p)?.kind { + Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)), + Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)), + _ if other.is_empty() => break Ok(a), + _ => break Ok(Expr::Binary(a.into(), other)), + } + } + } + /// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right) + pub fn shift(&mut self) -> PResult> { + let p = Parsing::Expr; + let a = self.bin()?; + let mut other = vec![]; + loop { + match self.peek(p)?.kind { + Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)), + Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)), + _ if other.is_empty() => break Ok(a), + _ => break Ok(Expr::Binary(a.into(), other)), + } + } + } + pub fn bin(&mut self) -> PResult> { + let p = Parsing::Expr; + let a = self.unary()?; + let mut other = vec![]; + loop { + match self.peek(p)?.kind { + Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)), + Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)), + Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)), + _ if other.is_empty() => break Ok(a), + _ => break Ok(Expr::Binary(a.into(), other)), + } + } + } + /// Parses a unary expression (`!`invert, `-`negate) + pub fn unary(&mut self) -> PResult> { + let p = Parsing::Expr; + let mut ops = vec![]; + loop { + match self.peek(p)?.kind { + Kind::Star => ops.push(UnOp::Deref), + Kind::Minus => ops.push(UnOp::Neg), + Kind::Bang => ops.push(UnOp::Not), + _ if ops.is_empty() => break Ok(self.primary()?), + _ => break Ok(Expr::Unary(ops, self.primary()?.into())), + } + self.take(); + } + } + /// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier + pub fn primary(&mut self) -> PResult> { + let p = Parsing::Expr; + let Token { lexeme, kind, .. } = *self.peek(p)?; + Ok(match kind { + Kind::OpenParen => { + let out = Expr::Group(self.then(p)?.parse()?); + self.assert(Kind::CloseParen, p)?; + out + } + Kind::Number(n, _) => { + self.take(); + Expr::Number(n) + } + Kind::Identifier => { + self.take(); + Expr::Ident(lexeme) + } + Kind::Amp => self.then(p)?.addrof()?, + ty => Err(self.error(NonNumeric(ty), p))?, + }) + } + pub fn addrof(&mut self) -> PResult> { + let p = Parsing::Expr; + let token = self.peek(p)?; + let out = match token.kind { + Kind::Identifier => Expr::AddrOf(token.lexeme), + Kind::Number(n, _) => Expr::Number(n), + ty => Err(self.error(Unexpected(ty), p))?, + }; + self.take(); + Ok(out) + } +} + +pub trait Parsable<'t>: Sized { + fn parse(p: &mut Parser<'t>) -> PResult; +} + +impl<'t> Parsable<'t> for Statements<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let mut stmts = vec![]; + while p.peek(Parsing::File)?.kind != Kind::Eof { + stmts.push(p.parse()?) + } + Ok(Self { stmts }) + } +} + +impl<'t> Parsable<'t> for Statement<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let token = *p.peek(Parsing::Stmt)?; + Ok(match token.kind { + Kind::Comment => { + p.take(); + Statement::Comment(token.lexeme) + } + Kind::Directive => Statement::Directive(p.parse()?), + Kind::Identifier => Statement::Label(p.label()?), + _ => Statement::Insn(p.parse()?), + }) + } +} +impl<'t> Parsable<'t> for Directive<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let parsing = Parsing::Directive; + let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?; + let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) }; + p.take(); + Ok(match lexeme { + ".define" => Directive::Define(p.parse()?), + ".org" => Directive::Org(p.expr()?.into()), + ".word" => Directive::Word(p.parse()?), + ".words" => Directive::Words(p.parse()?), + ".string" => Directive::String(p.string()?), + _ => Err(p.error(Unexpected(Kind::Directive), parsing))?, + }) + } +} +impl<'t> Parsable<'t> for Vec> { + fn parse(p: &mut Parser<'t>) -> PResult { + let parsing = Parsing::Directive; + let mut tokens = vec![]; + loop { + if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind { + break; + } + tokens.push(p.next(parsing)?) + } + p.take(); + Ok(tokens) + } +} +impl<'t> Parsable<'t> for Instruction<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let start = p.peek(Parsing::Instruction)?.pos.start; + Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } }) + } +} +impl<'t> Parsable<'t> for InstructionKind<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + use crate::lexer::token::OneArg; + // an instruction starts with an opcode + Ok(match p.peek(Parsing::Instruction)?.kind() { + Kind::NoEm(_) => Self::NoEm(p.parse()?), + Kind::OneEm(_) => Self::OneEm(p.parse()?), + Kind::Special(Special::Br) => Self::Br(p.parse()?), + Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?), + Kind::OneArg(_) => Self::OneArg(p.parse()?), + Kind::TwoArg(_) => Self::TwoArg(p.parse()?), + Kind::Jump(_) => Self::Jump(p.parse()?), + ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?, + }) + } +} +impl<'t> Parsable<'t> for NoEm { + fn parse(p: &mut Parser<'t>) -> PResult { + match p.next(Parsing::NoEm)?.kind { + Kind::NoEm(opcode) => Ok(Self { opcode }), + ty => Err(p.error(Unexpected(ty), Parsing::NoEm)), + } + } +} +impl<'t> Parsable<'t> for OneEm<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + Ok(Self { + opcode: match p.next(Parsing::OneEm)?.kind { + Kind::OneEm(opcode) => opcode, + ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?, + }, + width: p.parse()?, + dst: p.parse()?, + }) + } +} +impl<'t> Parsable<'t> for OneArg<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + Ok(Self { + opcode: match p.next(Parsing::OneArg)?.kind { + Kind::OneArg(opcode) => opcode, + ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?, + }, + width: p.parse()?, + src: p.parse()?, + }) + } +} +impl<'t> Parsable<'t> for TwoArg<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let parsing = Parsing::TwoArg; + Ok(Self { + opcode: match p.next(parsing)?.kind { + Kind::TwoArg(opcode) => opcode, + ty => Err(p.error(Unexpected(ty), parsing))?, + }, + width: p.parse()?, + src: p.parse()?, + dst: p.assert(Kind::Comma, parsing)?.parse()?, + }) + } +} +impl<'t> Parsable<'t> for Jump<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let parsing = Parsing::Jump; + Ok(Self { + opcode: match p.next(parsing)?.kind { + Kind::Jump(opcode) => opcode, + ty => Err(p.error(Unexpected(ty), parsing))?, + }, + dst: p.parse()?, + }) + } +} +impl<'t> Parsable<'t> for Reti { + fn parse(p: &mut Parser<'t>) -> PResult { + use crate::lexer::token::OneArg; + p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?; + Ok(Reti) + } +} +impl<'t> Parsable<'t> for Br<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + p.assert(Kind::Special(Special::Br), Parsing::Br)?; + Ok(Self { src: p.parse()? }) + } +} + +impl<'t> Parsable<'t> for Src<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let parsing = Parsing::Src; + Ok(match p.peek(parsing)?.kind { + Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special + Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?), // &addr + Kind::At => { + let reg = match p.then(parsing)?.next(parsing)?.kind { + Kind::Reg(r) => r, + ty => Err(p.error(Unexpected(ty), parsing))?, + }; + if let Kind::Plus = p.peek(parsing)?.kind { + p.take(); + Src::PostInc(reg) + } else { + Src::Indirect(reg) + } + } // @reg+, @reg + Kind::Reg(_) => Src::Direct(p.parse()?), + _ => { + let expr = p.parse()?; + match p.peek(parsing)?.kind { + Kind::OpenParen => Src::Indexed(expr, { + let reg = p.assert(Kind::OpenParen, parsing)?.reg()?; + p.assert(Kind::CloseParen, parsing)?; + reg + }), + _ => Src::BareExpr(expr), + } + } + }) + } +} +impl<'t> Parsable<'t> for Dst<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let parsing = Parsing::Dst; + Ok(match p.peek(parsing)?.kind { + Kind::Hash => match p.then(parsing)?.next(parsing)?.kind { + Kind::Number(0, _) => Dst::Special(DstSpecial::Zero), + Kind::Number(1, _) => Dst::Special(DstSpecial::One), + Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?, + ty => Err(p.error(Unexpected(ty), parsing))?, + }, + Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?), + Kind::Reg(_) => Dst::Direct(p.parse()?), + _ => Dst::Indexed(p.expr()?.into(), { + let reg = p.assert(Kind::OpenParen, parsing)?.reg()?; + p.assert(Kind::CloseParen, parsing)?; + reg + }), + }) + } +} +impl<'t> Parsable<'t> for JumpDst<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + let parsing = Parsing::Jump; + let mut neg = false; + let out = loop { + let token = p.peek(parsing)?; + match token.kind { + Kind::Minus => { + neg = !neg; + } + Kind::Plus => {} + Kind::Identifier => break Self::Label(token.lexeme), + Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }), + ty => Err(p.error(Unexpected(ty), parsing))?, + } + p.take(); + }; + p.take(); + Ok(out) + } +} +impl<'t> Parsable<'t> for Width { + fn parse(p: &mut Parser<'t>) -> PResult { + let out = match p.peek(Parsing::Width)?.kind() { + Kind::Byte => Width::Byte, + Kind::Word => Width::Word, + _ => return Ok(Width::Word), + }; + p.take(); + Ok(out) + } +} +impl<'t> Parsable<'t> for Reg { + fn parse(p: &mut Parser<'t>) -> PResult { + let out = match p.peek(Parsing::Reg)?.kind { + Kind::Reg(r) => r, + ty => Err(p.error(Unexpected(ty), Parsing::Reg))?, + }; + p.take(); + Ok(out) + } +} +impl<'t> Parsable<'t> for Expr<'t> { + fn parse(p: &mut Parser<'t>) -> PResult { + p.expr() + } +} +impl<'t, T: Parsable<'t>> Parsable<'t> for Box { + fn parse(p: &mut Parser<'t>) -> PResult { + Ok(Box::new(p.parse()?)) + } +} +impl<'t, T: Parsable<'t>> Parsable<'t> for Vec { + fn parse(p: &mut Parser<'t>) -> PResult { + let parsing = Parsing::Vec; + p.assert(Kind::OpenBrace, parsing)?; + let mut out = vec![]; + while Kind::CloseBrace != p.peek(parsing)?.kind { + out.push(p.parse()?) + } + p.assert(Kind::CloseBrace, parsing)?; + Ok(out) + } +} +/// Context-sensitive parsing rules +impl<'t> Parser<'t> { + pub fn string(&mut self) -> PResult<&'t str> { + let token = *self.peek(Parsing::Directive)?; + match token.kind { + Kind::String => { + self.take(); + Ok(&token.lexeme[1..token.lexeme.len() - 1]) + } + ty => Err(self.error(Unexpected(ty), Parsing::Directive)), + } + } + pub fn label(&mut self) -> PResult<&'t str> { + let p = Parsing::Label; + let token = self.next(p)?; + assert_eq!(Kind::Identifier, token.kind); + self.assert(Kind::Colon, p)?; + Ok(token.lexeme) + } + pub fn reg(&mut self) -> PResult { + match self.peek(Parsing::Reg)?.kind { + Kind::Reg(r) => { + self.take(); + Ok(r) + } + ty => Err(self.error(Unexpected(ty), Parsing::Reg)), + } + } +} + +pub mod error { + use super::Kind; + use crate::util::Span; + use std::{fmt::Display, num::TryFromIntError}; + + pub type PResult = Result; + + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct Error { + pub parsing: Parsing, + pub kind: ErrorKind, + pub loc: Span, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub enum ErrorKind { + LexError, + /// Returned when [Parsing::ConstExpr] fails without consuming + NotExpr, + DivZero, + NonNumeric(Kind), + BadIntForDst(u16), + TryFromIntError(TryFromIntError), + Unexpected(Kind), + BufEmpty, + Todo, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum Parsing { + File, + Stmt, + + Label, + Directive, Instruction, - }; - pub use label::Label; - pub use line::Line; - pub use root::Root; - // Error - pub use error::ParseError; -} -pub mod parsable; + NoEm, + OneEm, + Reti, + Br, + OneArg, + TwoArg, + Jump, -pub mod comment; -pub mod directive; -pub mod error; -pub mod identifier; -pub mod instruction; -pub mod label; -pub mod line; -pub mod root; + Width, + Src, + Dst, + Reg, -pub struct Parser { - radix: u32, -} - -impl Parser { - pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result { - Root::parse(&self, &mut stream.ignore(Type::Space)) + Expr, + Vec, } - pub fn parse(self, input: &T) -> Result - where T: AsRef + ?Sized { - Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space)) + impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing) + } } - pub fn parse_file

(self, path: &P) -> Result - where P: AsRef + ?Sized { - self.parse(&std::fs::read_to_string(path.as_ref())?).map(|r| r.set_file(path.as_ref().into())) + impl Display for ErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ErrorKind::LexError => write!(f, "lexical error"), + ErrorKind::TryFromIntError(e) => write!(f, "{e}"), + ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"), + ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"), + ErrorKind::DivZero => write!(f, "Division by zero"), + ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"), + ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"), + ErrorKind::BufEmpty => write!(f, "Peek buffer empty"), + ErrorKind::Todo => write!(f, "Not yet implemented"), + } + } } - pub fn parse_one(self, input: &T) -> Result - where T: AsRef + ?Sized { - Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space)) - } - - /// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number] - /// conversion - pub fn radix(mut self, radix: u32) { self.radix = radix; } -} - -impl Default for Parser { - fn default() -> Self { Self { radix: 16 } } -} - -impl Debug for Parser { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive() + impl Display for Parsing { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Parsing::File => "a file".fmt(f), + Parsing::Stmt => "a line".fmt(f), + Parsing::Label => "a label".fmt(f), + Parsing::Directive => "a directive".fmt(f), + Parsing::Instruction => "an instruction".fmt(f), + Parsing::NoEm => "a no-operand emulated instruction".fmt(f), + Parsing::OneEm => "a one-operand emulated instruction".fmt(f), + Parsing::Reti => "a `reti` instruction".fmt(f), + Parsing::Br => "a `br` instruction".fmt(f), + Parsing::OneArg => "a one-operand instruction".fmt(f), + Parsing::TwoArg => "a two-operand instruction".fmt(f), + Parsing::Jump => "a jump instruction".fmt(f), + Parsing::Width => "an instruction width".fmt(f), + Parsing::Src => "a source".fmt(f), + Parsing::Dst => "a destination".fmt(f), + Parsing::Reg => "a register".fmt(f), + Parsing::Expr => "a constant expression".fmt(f), + Parsing::Vec => "a list".fmt(f), + } + } } + impl std::error::Error for Error {} } diff --git a/src/parser/ast.rs b/src/parser/ast.rs new file mode 100644 index 0000000..a798dd1 --- /dev/null +++ b/src/parser/ast.rs @@ -0,0 +1,679 @@ +// © 2023-2024 John Breaux +/// Represents MSP430 instructions, +use crate::{ + lexer::token::{self, Reg, Token}, + util::Span, +}; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Statements<'t> { + pub stmts: Vec>, +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Statement<'t> { + Label(&'t str), + Insn(Instruction<'t>), + Directive(Directive<'t>), + Comment(&'t str), +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Directive<'t> { + /// TODO: Store define as a vec of tokens. This will require help from the + /// [preprocessor](crate::preprocessor) + Define(Vec>), + Org(Box>), + Word(Box>), + Words(Vec>), + String(&'t str), +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Instruction<'t> { + pub span: Span, + pub kind: InstructionKind<'t>, +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum InstructionKind<'t> { + NoEm(NoEm), + OneEm(OneEm<'t>), + OneArg(OneArg<'t>), + TwoArg(TwoArg<'t>), + Jump(Jump<'t>), + Reti(Reti), + Br(Br<'t>), +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct NoEm { + pub opcode: token::NoEm, +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct OneEm<'t> { + pub opcode: token::OneEm, + pub width: Width, + pub dst: Dst<'t>, +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct OneArg<'t> { + pub opcode: token::OneArg, + pub width: Width, + pub src: Src<'t>, +} +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct TwoArg<'t> { + pub opcode: token::TwoArg, + pub width: Width, + pub src: Src<'t>, + pub dst: Dst<'t>, +} +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Jump<'t> { + pub opcode: token::Jump, + pub dst: JumpDst<'t>, +} +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Reti; +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Br<'t> { + pub src: Src<'t>, +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] +pub enum Width { + #[default] + Word, + Byte, +} +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Src<'t> { + Direct(Reg), + Indexed(Box>, Reg), + Indirect(Reg), + PostInc(Reg), + Absolute(Box>), + Immediate(Box>), + Special(SrcSpecial), + BareExpr(Box>), +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum SrcSpecial { + Zero, + One, + Four, + Two, + Eight, + NegOne, +} +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Dst<'t> { + Direct(Reg), + Indexed(Box>, Reg), + Absolute(Box>), + Special(DstSpecial), +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum DstSpecial { + Zero, + One, +} +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum JumpDst<'t> { + /// A relative offset, nominally an even number from -0x400..=0x3fe + Rel(i16), + Label(&'t str), +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Expr<'t> { + Binary(Box>, Vec<(BinOp, Expr<'t>)>), + Unary(Vec, Box>), + Group(Box>), + Number(u16), + Ident(&'t str), + AddrOf(&'t str), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum BinOp { + Mul, + Div, + Rem, + Add, + Sub, + Lsh, + Rsh, + And, + Xor, + Or, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum UnOp { + Deref, + Not, + Neg, +} + +pub mod conv { + //! Conversions between [ast](super) types, via [From], or via `new` constructor + use super::{InstructionKind as Ik, *}; + + macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$( + impl<'t> From<$src> for $dst { + fn from(value: $src) -> Self { + $expr(value) + } + } + )*}} + // sure am glad macros aren't hygenic over lifetimes + impl_from! { Ik<'t> { + NoEm => Ik::NoEm, + OneEm<'t> => Ik::OneEm, + OneArg<'t> => Ik::OneArg, + TwoArg<'t> => Ik::TwoArg, + Jump<'t> => Ik::Jump, + Reti => Ik::Reti, + Br<'t> => Ik::Br, + }} + impl_from! { Expr<'t> { + u16 => Expr::Number + }} + impl<'t> From> for Src<'t> { + fn from(value: Dst<'t>) -> Self { + match value { + Dst::Special(v) => Src::Special(v.into()), + Dst::Absolute(v) => Src::Absolute(v), + Dst::Indexed(i, r) => Src::Indexed(i, r), + Dst::Direct(r) => Src::Direct(r), + } + } + } + impl From for SrcSpecial { + fn from(value: DstSpecial) -> Self { + match value { + DstSpecial::Zero => SrcSpecial::Zero, + DstSpecial::One => SrcSpecial::One, + } + } + } + impl<'t> TwoArg<'t> { + pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self { + Self { opcode, width, src, dst } + } + } +} +pub mod display { + use super::*; + use std::fmt::Display; + + impl<'t> Display for Statements<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for stmt in &self.stmts { + writeln!(f, "{stmt}")?; + } + Ok(()) + } + } + impl<'t> Display for Statement<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Statement::Label(v) => write!(f, "{v}:"), + Statement::Insn(v) => write!(f, "{v}"), + Statement::Directive(v) => write!(f, "{v}"), + Statement::Comment(v) => write!(f, "{v}"), + } + } + } + impl<'t> Display for Directive<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Directive::Define(_) => write!(f, ".directive"), + Directive::Org(e) => write!(f, ".org {e}"), + Directive::Word(w) => write!(f, ".word {w}"), + Directive::Words(words) => { + write!(f, ".words [ ")?; + for word in words { + write!(f, "{word} ")?; + } + write!(f, "]") + } + Directive::String(s) => write!(f, ".string \"{s}\""), + } + } + } + impl<'t> Display for Instruction<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { span: _, kind } = self; + write!(f, "{kind}") + } + } + impl<'t> Display for InstructionKind<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + InstructionKind::NoEm(v) => v.fmt(f), + InstructionKind::OneEm(v) => v.fmt(f), + InstructionKind::OneArg(v) => v.fmt(f), + InstructionKind::TwoArg(v) => v.fmt(f), + InstructionKind::Jump(v) => v.fmt(f), + InstructionKind::Reti(v) => v.fmt(f), + InstructionKind::Br(v) => v.fmt(f), + } + } + } + impl Display for NoEm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { opcode } = self; + write!(f, "{opcode}") + } + } + impl<'t> Display for OneEm<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { opcode, width, dst } = self; + write!(f, "{opcode}{width}\t{dst}") + } + } + impl<'t> Display for OneArg<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { opcode, width, src } = self; + write!(f, "{opcode}{width}\t{src}") + } + } + impl<'t> Display for TwoArg<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { opcode, width, src, dst } = self; + write!(f, "{opcode}{width}\t{src}, {dst}") + } + } + impl<'t> Display for Jump<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { opcode, dst } = self; + write!(f, "{opcode}\t{dst}") + } + } + impl Display for Reti { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "reti") + } + } + impl<'t> Display for Br<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { src } = self; + write!(f, "br\t{src}") + } + } + + impl<'t> Display for Src<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Src::Direct(r) => write!(f, "{r}"), + Src::Indexed(e, r) => write!(f, "{e}({r})"), + Src::Indirect(r) => write!(f, "@{r}"), + Src::PostInc(r) => write!(f, "@{r}+"), + Src::Absolute(e) => write!(f, "&{e}"), + Src::Immediate(e) => write!(f, "#{e}"), + Src::Special(i) => write!(f, "#{i}"), + Src::BareExpr(id) => write!(f, "{id}"), + } + } + } + impl Display for SrcSpecial { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SrcSpecial::Zero => write!(f, "0"), + SrcSpecial::One => write!(f, "1"), + SrcSpecial::Four => write!(f, "4"), + SrcSpecial::Two => write!(f, "2"), + SrcSpecial::Eight => write!(f, "8"), + SrcSpecial::NegOne => write!(f, "-1"), + } + } + } + impl<'t> Display for Dst<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Dst::Direct(r) => write!(f, "{r}"), + Dst::Indexed(e, r) => write!(f, "{e}({r})"), + Dst::Absolute(e) => write!(f, "&{e}"), + Dst::Special(i) => write!(f, "#{i}"), + } + } + } + impl Display for DstSpecial { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DstSpecial::Zero => write!(f, "0"), + DstSpecial::One => write!(f, "1"), + } + } + } + impl<'t> Display for JumpDst<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + JumpDst::Rel(i) => write!(f, "{i}"), + JumpDst::Label(l) => write!(f, "{l}"), + } + } + } + impl<'t> Display for Expr<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expr::Binary(head, tail) => { + write!(f, "{head}")?; + for (op, tail) in tail { + write!(f, "{op}{tail}")?; + } + Ok(()) + } + Expr::Unary(ops, tail) => { + for op in ops { + write!(f, "{op}")? + } + write!(f, "{tail}") + } + Expr::Group(e) => write!(f, "({e})"), + Expr::Number(n) => write!(f, "{n:x}"), + Expr::Ident(n) => write!(f, "{n}"), + Expr::AddrOf(n) => write!(f, "&{n}"), + } + } + } + impl Display for BinOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BinOp::Mul => write!(f, "*"), + BinOp::Div => write!(f, "/"), + BinOp::Rem => write!(f, "%"), + BinOp::Add => write!(f, "+"), + BinOp::Sub => write!(f, "-"), + BinOp::Lsh => write!(f, "<<"), + BinOp::Rsh => write!(f, ">>"), + BinOp::And => write!(f, "&"), + BinOp::Xor => write!(f, "^"), + BinOp::Or => write!(f, "|"), + } + } + } + impl Display for UnOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + UnOp::Deref => write!(f, "*"), + UnOp::Not => write!(f, "!"), + UnOp::Neg => write!(f, "-"), + } + } + } + impl Display for Width { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Width::Word => Ok(()), + Width::Byte => write!(f, ".b"), + } + } + } +} +pub mod canonical { + use std::iter; + + use super::*; + use token::TwoArg::*; + pub trait Canonicalize { + /// The output after canonicalization + type Output; + /// Transmutes Self into its "canonical" form. "Emulated" instructions are converted + /// into their respective non-emulated forms. + fn to_canonical(self) -> Self::Output; + } + impl<'t> Canonicalize for Statements<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() } + } + } + impl<'t> Canonicalize for Statement<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + match self { + Statement::Insn(i) => Self::Insn(i.to_canonical()), + _ => self, + } + } + } + impl<'t> Canonicalize for Instruction<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + Self { kind: self.kind.to_canonical(), ..self } + } + } + impl<'t> Canonicalize for InstructionKind<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + match self { + Self::NoEm(v) => Self::TwoArg(v.to_canonical()), + Self::OneEm(v) => Self::TwoArg(v.to_canonical()), + Self::Reti(v) => Self::Reti(v.to_canonical()), + Self::Br(v) => Self::TwoArg(v.to_canonical()), + Self::OneArg(v) => Self::OneArg(v.to_canonical()), + Self::TwoArg(v) => Self::TwoArg(v.to_canonical()), + Self::Jump(v) => Self::Jump(v.to_canonical()), + } + } + } + impl Canonicalize for NoEm { + type Output = TwoArg<'static>; + fn to_canonical(self) -> Self::Output { + let Self { opcode } = self; + use SrcSpecial::*; + use Width::*; + match opcode { + token::NoEm::Nop => { + TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG)) + } + token::NoEm::Ret => { + TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC)) + } + token::NoEm::Clrc => { + TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR)) + } + token::NoEm::Clrz => { + TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR)) + } + token::NoEm::Clrn => { + TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR)) + } + token::NoEm::Setc => { + TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR)) + } + token::NoEm::Setz => { + TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR)) + } + token::NoEm::Setn => { + TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR)) + } + token::NoEm::Dint => { + TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR)) + } + token::NoEm::Eint => { + TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR)) + } + } + } + } + impl<'t> Canonicalize for OneEm<'t> { + type Output = TwoArg<'t>; + fn to_canonical(self) -> Self::Output { + use SrcSpecial::*; + let Self { opcode, width, dst } = self; + match opcode { + token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst), + token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst), + token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst), + token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst), + token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst), + token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst), + token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst), + token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst), + token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst), + token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst), + token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst), + token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst), + token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst), + } + } + } + impl<'t> Canonicalize for OneArg<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + let Self { opcode, width, src } = self; + Self { + opcode, + width: match opcode { + token::OneArg::Call => Width::Word, + _ => width, + }, + src: src.to_canonical(), + } + } + } + impl<'t> Canonicalize for TwoArg<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + let Self { opcode, width, src, dst } = self; + Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() } + } + } + impl<'t> Canonicalize for Jump<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + let Self { opcode, dst } = self; + Self { + opcode: match opcode { + token::Jump::Jnz => token::Jump::Jne, + token::Jump::Jz => token::Jump::Jeq, + token::Jump::Jnc => token::Jump::Jlo, + token::Jump::Jc => token::Jump::Jhs, + t => t, + }, + dst: dst.to_canonical(), + } + } + } + impl Canonicalize for Reti { + type Output = Self; + fn to_canonical(self) -> Self::Output { + self + } + } + impl<'t> Canonicalize for Br<'t> { + type Output = TwoArg<'t>; + fn to_canonical(self) -> Self::Output { + let Self { src } = self; + TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC)) + } + } + + impl<'t> Canonicalize for Src<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + use SrcSpecial::*; + match self { + Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self, + Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r), + Src::Absolute(e) => Src::Absolute(e.to_canonical().into()), + Src::Immediate(e) => match e.to_canonical() { + Expr::Number(0) => Src::Special(Zero), + Expr::Number(1) => Src::Special(One), + Expr::Number(2) => Src::Special(Two), + Expr::Number(4) => Src::Special(Four), + Expr::Number(8) => Src::Special(Eight), + Expr::Number(0xffff) => Src::Special(NegOne), + expr => Src::Immediate(expr.into()), + }, + Src::BareExpr(_) => self, + } + } + } + impl<'t> Canonicalize for Dst<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + match self { + Dst::Direct(_) | Dst::Special(_) => self, + Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r), + Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()), + } + } + } + impl<'t> Canonicalize for JumpDst<'t> { + type Output = Self; + fn to_canonical(self) -> Self::Output { + self + } + } + impl<'t> Canonicalize for Expr<'t> { + type Output = Self; + /// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number], + /// this returns a single [Expr::Number]. If not, it evaluates until + /// it runs into an unevaluatable leaf. + fn to_canonical(self) -> Self::Output { + match self { + Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self, + Expr::Group(e) => e.to_canonical(), + Expr::Unary(ops, tail) => { + let mut tail = match tail.to_canonical() { + Expr::Number(n) => n, + other => return other, + }; + // If the tail is dereferenced, canonicalization must halt, + // since we have no knowledge of memory layout + let mut ops = ops.into_iter(); + for op in ops.by_ref() { + tail = match op { + UnOp::Deref => { + return Expr::Unary( + iter::once(op).chain(ops).collect(), + Box::new(tail.into()), + ) + } + UnOp::Not => !tail, + UnOp::Neg => 0u16.wrapping_sub(tail), + } + } + Expr::Number(tail) + } + Expr::Binary(head, tails) => { + let mut head = match head.to_canonical() { + Expr::Number(n) => n, + head => return Expr::Binary(head.into(), tails), + }; + let mut tails = tails.into_iter(); + for (op, tail) in &mut tails { + let tail = tail.to_canonical(); + // If the canonical tail isn't a number, rebuild and return + let Expr::Number(tail) = tail else { + return Expr::Binary( + Box::new(head.into()), + iter::once((op, tail)).chain(tails).collect(), + ); + }; + head = match op { + BinOp::Mul => head.wrapping_mul(tail), + BinOp::Div => head.wrapping_div(tail), + BinOp::Rem => head.wrapping_rem(tail), + BinOp::Add => head.wrapping_add(tail), + BinOp::Sub => head.wrapping_sub(tail), + BinOp::Lsh => head.wrapping_shl(tail as u32), + BinOp::Rsh => head.wrapping_shr(tail as u32), + BinOp::And => head & tail, + BinOp::Xor => head ^ tail, + BinOp::Or => head | tail, + }; + } + Expr::Number(head) + } + } + } + } +} diff --git a/src/parser/comment.rs b/src/parser/comment.rs deleted file mode 100644 index c3fc86c..0000000 --- a/src/parser/comment.rs +++ /dev/null @@ -1,15 +0,0 @@ -// © 2023 John Breaux -//! A [`Comment`] stores the contents of a line comment, including the preceding `;` or `//` -use super::*; -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Comment(pub String); - -impl Parsable for Comment { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - Ok(Self(stream.expect(Type::Comment)?.lexeme().to_string())) - } -} -impl Display for Comment { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) } -} diff --git a/src/parser/directive.rs b/src/parser/directive.rs deleted file mode 100644 index 81e2acf..0000000 --- a/src/parser/directive.rs +++ /dev/null @@ -1,90 +0,0 @@ -// © 2023 John Breaux -//! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and -//! [Linker](crate::Linker) - -use std::path::PathBuf; - -use super::*; -use crate::lexer::token::OwnedToken; - -// TODO: Parse each kind of *postprocessor* directive into an AST node -// - .org 8000: Directive::Org { base: Number } -// - .define ident tt... Directive::Define { } ; should this be in the AST? How do I put this -// in the AST? -// - .include "" Directive::Include { Root } ; should this include an entire AST in -// the AST? -// - .word 8000 Directive::Word(Number) -// - .words dead beef Directive::Words(Vec|Vec) -// - .byte ff Directive::Byte(Number) -// - .bytes de, ad, be, ef Directive::Bytes(Vec) -// - .string "string" Directive::String(String) -// - .ascii "string" Directive::Ascii(Vec) - -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Directive { - Org(Number), - Define(Vec), - Include(Root), - Byte(Number), - Bytes(Vec), - Word(Number), - Words(Vec), - String(String), - Strings(Vec), -} - -impl Directive {} - -impl Parsable for Directive { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - let d = stream.expect(Type::Directive)?; - // match on the directive - Ok(match d.lexeme() { - ".org" => Self::Org(Number::parse(p, stream)?), - ".define" => { - let mut tokens = vec![]; - loop { - match stream.peek().variant() { - Type::Endl | Type::EndOfFile => break, - _ => tokens.push(stream.next().unwrap_or_default().into()), - } - } - Self::Define(tokens) - } - ".include" => { - // Try to get path - Self::Include(Parser::default().parse_file(&PathBuf::parse(p, stream)?)?) - } - ".byte" => Self::Byte(Number::parse(p, stream)?), - ".bytes" => Self::Bytes(Vec::::parse(p, stream)?), - ".word" => Self::Word(Number::parse(p, stream)?), - ".words" => Self::Words(Vec::::parse(p, stream)?), - ".string" => Self::String(String::parse(p, stream)?), - ".strings" => Self::Strings(Vec::::parse(p, stream)?), - e => Err(ParseError::UnrecognizedDirective(e.into()))?, - }) - } -} - -impl Display for Directive { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Directive::Org(num) => write!(f, ".org {num}"), - Directive::Define(rep) => { - write!(f, ".define")?; - for t in rep { - write!(f, " {t}")?; - } - Ok(()) - } - Directive::Include(r) => Display::fmt(r, f), - Directive::Byte(num) => write!(f, ".org {num}"), - Directive::Bytes(v) => write!(f, ".bytes {v:?}"), - Directive::Word(num) => write!(f, ".org {num}"), - Directive::Words(v) => write!(f, ".bytes {v:?}"), - Directive::String(s) => write!(f, ".string \"{s}\""), - Directive::Strings(s) => write!(f, ".string \"{s:?}\""), - } - } -} diff --git a/src/parser/error.rs b/src/parser/error.rs deleted file mode 100644 index 0b3bf88..0000000 --- a/src/parser/error.rs +++ /dev/null @@ -1,74 +0,0 @@ -// © 2023 John Breauxs -use super::*; -use crate::lexer::error::LexError; - -#[derive(Debug)] -pub enum ParseError { - /// Produced by [lexer](crate::lexer) - LexError(LexError), - /// Produced by [std::io] - IoError(std::io::Error), - /// Produced by [Number](Number)[::parse()](Parsable::parse()) - /// when the parsed number contains digits too high for the specified radix - UnexpectedDigits(String, u32), - /// Produced by [Opcode](Opcode)[::parse()](Parsable::parse()) - /// when the opcode passed lexing but did not match recognized opcodes. - /// - /// This is always a lexer bug. - UnrecognizedOpcode(String), - /// Produced by [Directive](Directive)[::parse()](Parsable::parse()) - /// when an unknown or unimplemented directive is used - UnrecognizedDirective(String), - /// Produced by [Register] when attempting to convert from a [str] - /// that isn't a register (pc, sp, sr, cg, or r{number}) - NotARegister(String), - /// Produced by [Register] when the r{number} is outside the range 0-15 - RegisterTooHigh(u16), - /// Produced by [SecondaryOperand] when the joke "secondary immediate" form - /// is out of range 0..=1 - FatSecondaryImmediate(isize), - /// Produced by a [Number] too wide to fit in 16 bits - /// (outside the range `(-2^15) .. (2^16-1)` ) - NumberTooWide(isize), - /// Produced by [JumpTarget](parser::preamble::JumpTarget) - /// when the jump offset is outside the range (-0x3ff..0x3fc) - JumpedTooFar(isize), - /// Produced by [JumpTarget](parser::preamble::JumpTarget) - JumpedOdd(isize), -} - -impl From for ParseError { - fn from(value: LexError) -> Self { Self::LexError(value) } -} -impl From for ParseError { - fn from(value: std::io::Error) -> Self { Self::IoError(value) } -} - -impl Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::LexError(error) => Display::fmt(error, f), - Self::IoError(error) => Display::fmt(error, f), - Self::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."), - Self::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"), - Self::UnrecognizedDirective(d) => write!(f, "{d} is not a directive."), - Self::NotARegister(reg) => write!(f, "{reg} is not a register"), - Self::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"), - Self::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"), - Self::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"), - Self::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"), - Self::JumpedOdd(num) => { - write!(f, "Jump targets only encode even numbers: {num} must not be odd.") - } - } - } -} -impl std::error::Error for ParseError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - Self::LexError(e) => Some(e), - Self::IoError(e) => Some(e), - _ => None, - } - } -} diff --git a/src/parser/identifier.rs b/src/parser/identifier.rs deleted file mode 100644 index 3caefbb..0000000 --- a/src/parser/identifier.rs +++ /dev/null @@ -1,26 +0,0 @@ -// © 2023 John Breaux -//! An [Identifier] stores the hash of an identifier -use super::*; -use std::rc::Rc; -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Identifier { - str: Rc, -} - -impl Identifier { - fn str>(s: T) -> Self { Self { str: s.as_ref().to_owned().into() } } -} - -impl Parsable for Identifier { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - let token = stream.expect(Type::Identifier)?; - match token.variant() { - Type::Identifier => Ok(Self::str(token.lexeme())), - _ => unreachable!("Expected identifier, got {token:?}"), - } - } -} -impl Display for Identifier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.str, f) } -} diff --git a/src/parser/instruction.rs b/src/parser/instruction.rs deleted file mode 100644 index 38bfaa5..0000000 --- a/src/parser/instruction.rs +++ /dev/null @@ -1,52 +0,0 @@ -// © 2023 John Breaux -//! An [`Instruction`] contains the [`Opcode`] and [`Encoding`] information for a single msp430 -//! instruction -//! -//! -//! Note: [`Opcode`] and [`Encoding`] are very tightly coupled, because they represent -//! interdependent parts of the same instruction. This is why [`Opcode`]::resolve() returns an -//! [`EncodingParser`] -- otherwise, there's an explosion of states that I can't really cope with on -//! my own. Really, there's about 9 valid classes of instruction, some of which are only used for -//! one or two of the MSP430's instructions. - -use super::*; - -pub mod encoding; -pub mod opcode; - -/// Contains the [Opcode] and [Encoding] information for a single msp430 instruction -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Instruction(Opcode, Encoding); - -impl Instruction { - pub fn opcode(&self) -> &Opcode { &self.0 } - pub fn encoding(&self) -> &Encoding { &self.1 } - /// Gets the Instruction as a [u16] - pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() } - /// Gets the [extension words] - pub fn ext_words(&self) -> [Option; 2] { self.1.extwords() } -} - -impl Parsable for Instruction { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where - Self: Sized, - T: crate::TokenStream<'text>, - { - // parse an opcode - let opcode: Opcode = Opcode::parse(p, stream)?; - // resolve the opcode to a final opcode and an encoding - let (opcode, encoding) = opcode.resolve(); - // parse the encoding - let encoding = encoding.parse(p, stream)?; - Ok(Self(opcode, encoding)) - } -} - -impl From for u16 { - fn from(value: Instruction) -> Self { value.word() } -} - -impl Display for Instruction { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) } -} diff --git a/src/parser/instruction/encoding.rs b/src/parser/instruction/encoding.rs deleted file mode 100644 index 1a44938..0000000 --- a/src/parser/instruction/encoding.rs +++ /dev/null @@ -1,81 +0,0 @@ -// © 2023 John Breaux -//! An [`Encoding`] represents the set of arguments for a given [msp430 opcode](Opcode) -use super::*; - -pub mod number; -pub mod register; -pub mod width; - -pub mod jump_target; -pub mod primary_operand; -pub mod secondary_operand; - -mod builder; -pub mod encoding_parser; - -use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder}; -use encoding_parser::EncodingParser; - -/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html) -/// -/// # Examples -/// ```rust -/// use msp430_asm::{preamble::*, parser::preamble::*}; -/// // Create a token sequence -/// let asm_file = r".b 8000(r15)"; -/// // Create a single-operand encoding parser -/// let single: EncodingParser = Encoding::single().end(); -/// // Parse an Encoding from it -/// let encoding: Encoding = single -/// .parse(&Default::default(), &mut Tokenizer::new(asm_file).ignore_spaces()) -/// .unwrap(); -/// // Print the Encoding -/// println!("{encoding}"); -/// ``` -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Encoding { - Single { width: Width, dst: PrimaryOperand }, - Jump { target: JumpTarget }, - Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand }, -} -impl Encoding { - /// Returns a builder for [Encoding::Single] - pub fn single() -> SingleBuilder { Default::default() } - /// Returns a builder for [Encoding::Jump] - pub fn jump() -> JumpBuilder { Default::default() } - /// Returns a builder for [Encoding::Double] - pub fn double() -> DoubleBuilder { Default::default() } - /// Returns a builder for [Encoding::Double] - /// - /// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and - /// dst are the same - pub fn reflexive() -> ReflexiveBuilder { Default::default() } - /// - pub fn word(&self) -> u16 { - match self { - Encoding::Single { width, dst } => u16::from(*width) | dst.mode() | dst.register() as u16, - Encoding::Jump { target } => target.word().unwrap_or_default(), - Encoding::Double { width, src, dst } => { - u16::from(*width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8) - } - } - } - /// Returns extwords for instruction - pub fn extwords(&self) -> [Option; 2] { - match self { - Encoding::Double { src, dst, .. } => [src.ext_word(), dst.ext_word()], - Encoding::Single { dst, .. } => [dst.ext_word(), None], - Encoding::Jump { .. } => [None, None], - } - } -} - -impl Display for Encoding { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Encoding::Single { width, dst } => write!(f, "{width} {dst}"), - Encoding::Jump { target } => write!(f, " {target}"), - Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"), - } - } -} diff --git a/src/parser/instruction/encoding/builder.rs b/src/parser/instruction/encoding/builder.rs deleted file mode 100644 index 63080d5..0000000 --- a/src/parser/instruction/encoding/builder.rs +++ /dev/null @@ -1,76 +0,0 @@ -// © 2023 John Breaux -//! Builder API for [`EncodingParser`] -use super::*; -#[derive(Debug, Default)] -pub struct SingleBuilder { - width: Option, - dst: Option, -} -impl SingleBuilder { - pub fn width(mut self, width: bool) -> Self { - self.width = Some(width.into()); - self - } - /// Sets the [PrimaryOperand] field - pub fn operand(mut self, dst: PrimaryOperand) -> Self { - self.dst = Some(dst); - self - } - /// Build - pub fn end(self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } } -} - -#[derive(Debug, Default)] -pub struct JumpBuilder { - target: Option, -} -impl JumpBuilder { - pub fn target(mut self, target: JumpTarget) -> Self { - self.target = Some(target); - self - } - pub fn end(self) -> EncodingParser { EncodingParser::Jump { target: self.target } } -} - -#[derive(Debug, Default)] -pub struct DoubleBuilder { - width: Option, - src: Option, - dst: Option, -} -impl DoubleBuilder { - /// Sets the [Width] field - pub fn width(mut self, width: bool) -> Self { - self.width = Some(width.into()); - self - } - /// Sets the [PrimaryOperand] field - pub fn src(mut self, src: PrimaryOperand) -> Self { - self.src = Some(src); - self - } - /// Sets the [PrimaryOperand] field - pub fn dst(mut self, dst: SecondaryOperand) -> Self { - self.dst = Some(dst); - self - } - pub fn end(self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } } -} - -#[derive(Debug, Default)] -pub struct ReflexiveBuilder { - width: Option, - reg: Option, -} -impl ReflexiveBuilder { - /// Sets the [Width] field - pub fn width(mut self, width: bool) -> Self { - self.width = Some(width.into()); - self - } - pub fn reg(mut self, reg: SecondaryOperand) -> Self { - self.reg = Some(reg); - self - } - pub fn end(self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } } -} diff --git a/src/parser/instruction/encoding/encoding_parser.rs b/src/parser/instruction/encoding/encoding_parser.rs deleted file mode 100644 index c59515d..0000000 --- a/src/parser/instruction/encoding/encoding_parser.rs +++ /dev/null @@ -1,37 +0,0 @@ -// © 2023 John Breaux -//! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`] -use super::*; - -#[derive(Clone, Debug)] -/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream] -pub enum EncodingParser { - Single { width: Option, dst: Option }, - Jump { target: Option }, - Double { width: Option, src: Option, dst: Option }, - Reflexive { width: Option, reg: Option }, -} - -impl EncodingParser { - /// Constructs an [Encoding] from this [EncodingParser], filling holes - /// with the tokenstream - pub fn parse<'text, T>(self, p: &Parser, stream: &mut T) -> Result - where T: crate::TokenStream<'text> { - Ok(match self { - Self::Single { width, dst } => Encoding::Single { - width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)), - dst: if let Some(dst) = dst { dst } else { PrimaryOperand::parse(p, stream)? }, - }, - Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) }, - Self::Double { width, src, dst } => Encoding::Double { - width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)), - src: if let Some(src) = src { src } else { PrimaryOperand::parse(p, stream)? }, - dst: if let Some(dst) = dst { dst } else { SecondaryOperand::parse(p, stream)? }, - }, - Self::Reflexive { width, reg } => { - let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default()); - let reg = if let Some(reg) = reg { reg } else { SecondaryOperand::parse(p, stream)? }; - Encoding::Double { width, src: reg.clone().into(), dst: reg } - } - }) - } -} diff --git a/src/parser/instruction/encoding/jump_target.rs b/src/parser/instruction/encoding/jump_target.rs deleted file mode 100644 index 2d9b731..0000000 --- a/src/parser/instruction/encoding/jump_target.rs +++ /dev/null @@ -1,58 +0,0 @@ -// © 2023 John Breaux -//! A [`JumpTarget`] contains the [pc-relative offset](Number) or [label](Identifier) -//! for a [Jump](Encoding::Jump) [instruction] -use super::*; - -/// Contains the [pc-relative offset](Number) or [label](Identifier) -/// for a [Jump](Encoding::Jump) [Instruction] -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum JumpTarget { - Number(Number), - Identifier(Identifier), -} - -impl JumpTarget { - pub fn word(&self) -> Option { - match self { - JumpTarget::Number(n) => Some(u16::from(*n) & 0x3ff), - JumpTarget::Identifier(_) => None, - } - } - pub fn squish(value: isize) -> Result { - match value { - i if i % 2 != 0 => Err(ParseError::JumpedOdd(i))?, - i if (-1024..=1022).contains(&(i - 2)) => Ok(((value >> 1) - 1) as u16 & 0x3ff), - i => Err(ParseError::JumpedTooFar(i))?, - } - } - pub fn unsquish(value: u16) -> isize { (value as isize + 1) << 1 } -} - -impl Parsable for JumpTarget { - // - Identifier - // - Number - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: crate::TokenStream<'text> { - // Try to parse a number - if let Some(num) = Number::try_parse(p, stream)? { - Self::try_from(num) - } else { - // if that fails, try to parse an identifier instead - Ok(Self::Identifier(Identifier::parse(p, stream)?)) - } - } -} - -impl TryFrom for JumpTarget { - type Error = ParseError; - fn try_from(value: Number) -> Result { Ok(Self::Number(Self::squish(value.into())?.into())) } -} - -impl Display for JumpTarget { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Number(num) => write!(f, "{:x}", Self::unsquish(u16::from(*num))), - Self::Identifier(id) => write!(f, "{id}"), - } - } -} diff --git a/src/parser/instruction/encoding/number.rs b/src/parser/instruction/encoding/number.rs deleted file mode 100644 index 849e0f9..0000000 --- a/src/parser/instruction/encoding/number.rs +++ /dev/null @@ -1,81 +0,0 @@ -// © 2023 John Breaux -//! A [`Number`] represents a 16-bit signed or unsigned word -use super::*; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Number(isize, u32); // (value, radix) - -impl Parsable for Number { - // A number is: - // [Minus|Plus]? RadixMarker[Hex|Dec|Oct|Bin]? Number - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - use Type as Ty; - // The number is negative when it begins with a Minus, but Plus is also acceptable. - let negative = stream.expect_any_of([Ty::Minus, Ty::Plus]).map_or(false, |t| t.is_variant(Ty::Minus)); - let radix = match stream - .expect_any_of([Ty::RadixMarkerHex, Ty::RadixMarkerDec, Ty::RadixMarkerOct, Ty::RadixMarkerBin]) - .ok() - .map(|t| t.variant()) - { - Some(Ty::RadixMarkerHex) => 16, - Some(Ty::RadixMarkerDec) => 10, - Some(Ty::RadixMarkerOct) => 8, - Some(Ty::RadixMarkerBin) => 2, - _ => p.radix, - }; - let number = stream.expect(Ty::Number)?; - // TODO: Reintroduce error context - let number = isize::from_str_radix(number.lexeme(), radix) - .map_err(|_| ParseError::UnexpectedDigits(number.lexeme().into(), radix))? - * if negative { -1 } else { 1 }; - // Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit) - Ok(Self( - if (-0x8000..0x10000).contains(&number) { number } else { Err(ParseError::NumberTooWide(number))? }, - radix, - )) - } -} - -impl From for Number { - fn from(value: isize) -> Self { Self(value, 16) } -} - -impl From for isize { - fn from(value: Number) -> Self { value.0 as Self } -} - -impl From for Number { - fn from(value: u16) -> Self { Self(value as isize, 16) } -} - -impl From for u16 { - fn from(value: Number) -> Self { value.0 as Self } -} - -impl std::ops::Sub for Number { - type Output = Self; - fn sub(mut self, rhs: isize) -> Self::Output { - self.0 -= rhs; - self - } -} - -impl std::ops::Shr for Number { - type Output = Self; - fn shr(mut self, rhs: usize) -> Self::Output { - self.0 >>= rhs; - self - } -} - -impl std::fmt::Display for Number { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.1 { - 2 => std::fmt::Binary::fmt(&self.0, f), - 8 => std::fmt::Octal::fmt(&self.0, f), - 16 => std::fmt::LowerHex::fmt(&self.0, f), - _ => std::fmt::Display::fmt(&self.0, f), - } - } -} diff --git a/src/parser/instruction/encoding/primary_operand.rs b/src/parser/instruction/encoding/primary_operand.rs deleted file mode 100644 index 8742e92..0000000 --- a/src/parser/instruction/encoding/primary_operand.rs +++ /dev/null @@ -1,146 +0,0 @@ -// © 2023 John Breaux -//! A [`PrimaryOperand`] contains the first [`Register`], addressing mode, and Extension -//! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [`Instruction`] -use super::*; - -/// Contains the first [Register], addressing mode, and Extension Word for a -/// [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction] -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum PrimaryOperand { - Direct(Register), - Indirect(Register), - PostInc(Register), - Indexed(Register, Number), - Relative(Identifier), - Absolute(Number), - Immediate(Number), - Four, - Eight, - Zero, - One, - Two, - MinusOne, -} - -impl PrimaryOperand { - /// Returns the mode bits - pub fn mode(&self) -> u16 { - use PrimaryOperand::*; - match self { - Direct(_) | Zero => 0, - Indexed(_, _) | Relative(_) | Absolute(_) | One => 1 << 4, - Indirect(_) | Two | Four => 2 << 4, - PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4, - } - } - /// Gets the register - pub fn register(&self) -> Register { - use PrimaryOperand::*; - match self { - Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r, - Immediate(_) | Relative(_) => Register::pc, - Absolute(_) | Four | Eight => Register::sr, - Zero | One | Two | MinusOne => Register::cg, - } - } - /// Gets the extension word, if present - pub fn ext_word(&self) -> Option { - use PrimaryOperand::*; - match self { - Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()), - _ => None, - } - } -} - -impl Parsable for PrimaryOperand { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: crate::TokenStream<'text> { - // Try parsing as Register (Direct) - if let Some(r) = Register::try_parse(p, stream)? { - return Ok(Self::Direct(r)); - } - // Try parsing as Number (Indexed) - if let Some(idx) = Number::try_parse(p, stream)? { - stream.expect(Type::LParen)?; - let reg = Register::parse(p, stream)?; - stream.expect(Type::RParen)?; - return Ok(Self::Indexed(reg, idx)); - } - // Try parsing as Identifier (Relative, label mode) - if let Some(id) = Identifier::try_parse(p, stream)? { - return Ok(Self::Relative(id)); - } - // Or directly match any of the valid prefix markers - // Register, Number, and Identifier are included here to make error messages clearer. - // their inclusion will cause a negligible slowdown when the next token is not a prefix marker - // (a failure condition) - let token = stream.expect_any_of([ - Type::Indirect, - Type::Absolute, - Type::Immediate, - Type::Register, - Type::Number, - Type::Identifier, - ])?; - Ok(match token.variant() { - Type::Indirect => { - let reg = Register::parse(p, stream)?; - match stream.expect(Type::Plus) { - Ok(_) => Self::PostInc(reg), - Err(_) => Self::Indirect(reg), - } - } - Type::Absolute => Self::Absolute(Number::parse(p, stream)?), - Type::Immediate => { - let number = Number::parse(p, stream)?; - match number.into() { - // There are two representations for the all-ones constant, since Number preserves - // signedness. - -1_isize | 0xffff => Self::MinusOne, - 0 => Self::Zero, - 1 => Self::One, - 2 => Self::Two, - 4 => Self::Four, - 8 => Self::Eight, - _ => Self::Immediate(number), - } - } - _ => unreachable!("Token {token:?} passed expectation but failed match!"), - }) - } -} - -impl From for PrimaryOperand { - fn from(value: SecondaryOperand) -> Self { - match value { - SecondaryOperand::Direct(r) => Self::Direct(r), - SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n), - SecondaryOperand::Absolute(n) => Self::Absolute(n), - SecondaryOperand::Relative(id) => Self::Relative(id), - SecondaryOperand::Zero => Self::Zero, - SecondaryOperand::One => Self::One, - } - } -} - -impl Display for PrimaryOperand { - // Turn the operand back into a form which parses into the same type - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Direct(r) => Display::fmt(r, f), - Self::Indirect(r) => write!(f, "@{r}"), - Self::PostInc(r) => write!(f, "@{r}+"), - Self::Indexed(r, idx) => write!(f, "{idx}({r})"), - Self::Relative(id) => Display::fmt(id, f), - Self::Absolute(n) => write!(f, "&{n}"), - Self::Immediate(n) => write!(f, "#{n}"), - Self::Four => Display::fmt("#4", f), - Self::Eight => Display::fmt("#8", f), - Self::Zero => Display::fmt("#0", f), - Self::One => Display::fmt("#1", f), - Self::Two => Display::fmt("#2", f), - Self::MinusOne => Display::fmt("#-1", f), - } - } -} diff --git a/src/parser/instruction/encoding/register.rs b/src/parser/instruction/encoding/register.rs deleted file mode 100644 index 7c4c1aa..0000000 --- a/src/parser/instruction/encoding/register.rs +++ /dev/null @@ -1,112 +0,0 @@ -// © 2023 John Breaux -//! A [`Register`] represents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html) -use super::*; -use std::str::FromStr; - -/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html) -#[allow(non_camel_case_types)] -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Register { - /// Program Counter - pc, - /// Stack Pointer - sp, - /// Status Register - sr, - /// Constant Generator - cg, - r4, - r5, - r6, - r7, - r8, - r9, - r10, - r11, - r12, - r13, - r14, - r15, -} - -impl Parsable for Register { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result - where T: crate::TokenStream<'text> { - stream.expect(Type::Register)?.lexeme().parse() - } -} - -impl From for u16 { - fn from(value: Register) -> Self { value as u16 } -} - -impl TryFrom for Register { - type Error = ParseError; - fn try_from(value: u16) -> Result { - use Register::*; - Ok(match value { - 0 => pc, - 1 => sp, - 2 => sr, - 3 => cg, - 4 => r4, - 5 => r5, - 6 => r6, - 7 => r7, - 8 => r8, - 9 => r9, - 10 => r10, - 11 => r11, - 12 => r12, - 13 => r13, - 14 => r14, - 15 => r15, - _ => return Err(ParseError::RegisterTooHigh(value)), - }) - } -} - -impl FromStr for Register { - type Err = ParseError; - - fn from_str(s: &str) -> Result { - use Register::*; - match s { - "pc" => Ok(pc), - "sp" => Ok(sp), - "sr" => Ok(sr), - "cg" => Ok(cg), - _ => { - str::parse::(&s[1..]).map_err(|_| -> Self::Err { ParseError::NotARegister(s.into()) })?.try_into() - } - } - } -} - -impl From for &str { - fn from(value: Register) -> Self { - use Register::*; - match value { - pc => "pc", - sp => "sp", - sr => "sr", - cg => "cg", - r4 => "r4", - r5 => "r5", - r6 => "r6", - r7 => "r7", - r8 => "r8", - r9 => "r9", - r10 => "r10", - r11 => "r11", - r12 => "r12", - r13 => "r13", - r14 => "r14", - r15 => "r15", - } - } -} - -impl std::fmt::Display for Register { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) } -} diff --git a/src/parser/instruction/encoding/secondary_operand.rs b/src/parser/instruction/encoding/secondary_operand.rs deleted file mode 100644 index 59ca3b1..0000000 --- a/src/parser/instruction/encoding/secondary_operand.rs +++ /dev/null @@ -1,105 +0,0 @@ -// © 2023 John Breaux -//! A [`SecondaryOperand`] contains the second [`Register`], addressing mode, and Extension -//! Word for a [two-operand](Encoding::Double) [instruction] -use super::*; - -/// The destination of a [Double](Encoding::Double) -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum SecondaryOperand { - Direct(Register), - Indexed(Register, Number), - Relative(Identifier), - Absolute(Number), - // Joke encodings? - Zero, - One, -} - -use SecondaryOperand as So; - -impl SecondaryOperand { - pub fn mode(&self) -> u16 { - match self { - So::Direct(_) | So::Zero => 0, - So::Indexed(_, _) | So::Relative(_) | So::Absolute(_) | So::One => 1 << 7, - } - } - pub fn register(&self) -> Register { - use SecondaryOperand::*; - match self { - Direct(r) | Indexed(r, _) => *r, - Relative(_) => Register::pc, - Absolute(_) => Register::sr, - Zero | One => Register::cg, - } - } - /// This is the only way to have an extension word - pub fn ext_word(&self) -> Option { - use SecondaryOperand::*; - match self { - Indexed(_, w) | Absolute(w) => Some((*w).into()), - _ => None, - } - } -} - -impl Parsable for SecondaryOperand { - // Separator - // - Register => Direct - // - Number => Indexed - // - OpenIdx - // - Register - // - CloseIdx - // - Absolute - // - Number - // - Immediate - // - Number == 0, 1 - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: crate::TokenStream<'text> { - use SecondaryOperand::*; - stream.allow(Type::Separator); - // Try parsing as Register (Direct) - if let Some(r) = Register::try_parse(p, stream)? { - return Ok(Self::Direct(r)); - } - // Try parsing as Number (Indexed) - if let Some(idx) = Number::try_parse(p, stream)? { - stream.expect(Type::LParen)?; - let reg = Register::parse(p, stream)?; - stream.expect(Type::RParen)?; - return Ok(Self::Indexed(reg, idx)); - } - // Try parsing as Identifier (Relative, label mode) - if let Some(id) = Identifier::try_parse(p, stream)? { - return Ok(Self::Relative(id)); - } - // Register, Number, and Identifier are included here to make error messages clearer. - // their inclusion will cause a negligible slowdown when the next token is not a prefix marker - // (a failure condition) but should not match a token - let token = - stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number, Type::Identifier])?; - Ok(match token.variant() { - Type::Absolute => Absolute(Number::parse(p, stream)?), - // TODO: Reintroduce error context - Type::Immediate => match Number::parse(p, stream)?.into() { - 0 => Zero, - 1 => One, - n => Err(ParseError::FatSecondaryImmediate(n))?, - }, - _ => unreachable!("Token {token:?} passed expectation but failed match!"), - }) - } -} - -impl Display for SecondaryOperand { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Direct(r) => Display::fmt(r, f), - Self::Indexed(r, idx) => write!(f, "{idx}({r})"), - Self::Relative(id) => Display::fmt(id, f), - Self::Absolute(n) => write!(f, "&{n}"), - Self::Zero => Display::fmt("#0", f), - Self::One => Display::fmt("#1", f), - } - } -} diff --git a/src/parser/instruction/encoding/width.rs b/src/parser/instruction/encoding/width.rs deleted file mode 100644 index 3a4924e..0000000 --- a/src/parser/instruction/encoding/width.rs +++ /dev/null @@ -1,32 +0,0 @@ -// © 2023 John Breaux -//! A [`Width`] represents whether an instruction operates on whole words or bytes -use super::*; - -/// Represents an instruction's operand width. -/// -/// Evaluates to false when instruction takes word-sized operands, or true when -/// instruction takes byte-sized operands -#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Width(bool); - -impl Parsable for Width { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else { - return Ok(Self(false)); - }; - Ok(Self(token.is_variant(Type::ByteWidth))) - } -} -impl From for u16 { - fn from(value: Width) -> Self { (value.0 as Self) << 6 } -} -impl From for bool { - fn from(value: Width) -> Self { value.0 } -} -impl From for Width { - fn from(value: bool) -> Self { Width(value) } -} -impl std::fmt::Display for Width { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) } -} diff --git a/src/parser/instruction/opcode.rs b/src/parser/instruction/opcode.rs deleted file mode 100644 index f4d8488..0000000 --- a/src/parser/instruction/opcode.rs +++ /dev/null @@ -1,261 +0,0 @@ -// © 2023 John Breaux -//! An [`Opcode`] encodes an msp430 operation -use super::*; - -use std::str::FromStr; - -/// Opcode from the [MSPGCC Manual][1] -/// -/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will -/// extract from a [TokenStream] only the required arguments for that call. -/// -/// [1]: https://mspgcc.sourceforge.net/manual/x223.html -#[allow(clippy::identity_op)] -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Opcode { - // "Emulated" opcodes - Nop, - Pop, - Br, - Ret, - Clrc, - Setc, - Clrz, - Setz, - Clrn, - Setn, - Dint, - Eint, - Rla, - Rlc, - Inv, - Clr, - Tst, - Dec, - Decd, - Inc, - Incd, - Adc, - Dadc, - Sbc, - // Single - Rrc = 0x1000 | 0 << 7, - Swpb = 0x1000 | 1 << 7, - Rra = 0x1000 | 2 << 7, - Sxt = 0x1000 | 3 << 7, - Push = 0x1000 | 4 << 7, - Call = 0x1000 | 5 << 7, - Reti = 0x1000 | 6 << 7, - // Jump - Jnz = 0x2000 | 0 << 10, - Jz = 0x2000 | 1 << 10, - Jnc = 0x2000 | 2 << 10, - Jc = 0x2000 | 3 << 10, - Jn = 0x2000 | 4 << 10, - Jge = 0x2000 | 5 << 10, - Jl = 0x2000 | 6 << 10, - Jmp = 0x2000 | 7 << 10, - // Double - Mov = 0x4000, - Add = 0x5000, - Addc = 0x6000, - Subc = 0x7000, - Sub = 0x8000, - Cmp = 0x9000, - Dadd = 0xa000, - Bit = 0xb000, - Bic = 0xc000, - Bis = 0xd000, - Xor = 0xe000, - And = 0xf000, -} - -impl Opcode { - /// Resolve an Opcode into an [Opcode] and an [EncodingParser] - pub fn resolve(self) -> (Opcode, EncodingParser) { - use super::Encoding as Enc; - use Register as Reg; - use {PrimaryOperand as Src, SecondaryOperand as Dst}; - match self { - Self::Rrc | Self::Rra | Self::Push => (self, Enc::single().end()), - // these instructions do not take a width specifier (though they may still behave properly) - Self::Swpb | Self::Sxt | Self::Call => (self, Enc::single().width(false).end()), - // `reti` does not take any operands. - Self::Reti => (self, Enc::single().operand(Src::Direct(Reg::pc)).end()), - Self::Jnz | Self::Jz | Self::Jnc | Self::Jc | Self::Jn | Self::Jge | Self::Jl | Self::Jmp => { - (self, Enc::jump().end()) - } - Self::Mov - | Self::Add - | Self::Addc - | Self::Subc - | Self::Sub - | Self::Cmp - | Self::Dadd - | Self::Bit - | Self::Bic - | Self::Bis - | Self::Xor - | Self::And => (self, Enc::double().end()), - Self::Nop => (Self::Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()), - Self::Pop => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).end()), - Self::Br => (Self::Mov, Enc::double().dst(Dst::Direct(Reg::pc)).end()), - Self::Ret => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).dst(Dst::Direct(Reg::pc)).end()), - Self::Clrc => (Self::Bic, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()), - Self::Setc => (Self::Bis, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()), - Self::Clrz => (Self::Bic, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()), - Self::Setz => (Self::Bis, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()), - Self::Clrn => (Self::Bic, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()), - Self::Setn => (Self::Bis, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()), - Self::Dint => (Self::Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()), - Self::Eint => (Self::Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()), - Self::Rla => (Self::Add, Enc::reflexive().end()), - Self::Rlc => (Self::Addc, Enc::reflexive().end()), - Self::Inv => (Self::Xor, Enc::double().src(Src::MinusOne).end()), - Self::Clr => (Self::Mov, Enc::double().src(Src::Zero).end()), - Self::Tst => (Self::Cmp, Enc::double().src(Src::Zero).end()), - Self::Dec => (Self::Sub, Enc::double().src(Src::One).end()), - Self::Decd => (Self::Sub, Enc::double().src(Src::Two).end()), - Self::Inc => (Self::Add, Enc::double().src(Src::One).end()), - Self::Incd => (Self::Add, Enc::double().src(Src::Two).end()), - Self::Adc => (Self::Addc, Enc::double().src(Src::Zero).end()), - Self::Dadc => (Self::Dadd, Enc::double().src(Src::Zero).end()), - Self::Sbc => (Self::Subc, Enc::double().src(Src::Zero).end()), - } - } -} - -impl Parsable for Opcode { - fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - // TODO: Reintroduce error context - stream.expect(Type::Insn)?.parse() - } -} - -impl FromStr for Opcode { - type Err = ParseError; - fn from_str(s: &str) -> Result { - //TODO: Reduce allocations here? - let s = s.to_ascii_lowercase(); - Ok(match s.as_str() { - "rrc" => Self::Rrc, - "swpb" => Self::Swpb, - "rra" => Self::Rra, - "sxt" => Self::Sxt, - "push" => Self::Push, - "call" => Self::Call, - "reti" => Self::Reti, - - "jne" | "jnz" => Self::Jnz, - "jeq" | "jz" => Self::Jz, - "jnc" | "jlo" => Self::Jnc, - "jc" | "jhs" => Self::Jc, - "jn" => Self::Jn, - "jge" => Self::Jge, - "jl" => Self::Jl, - "jmp" => Self::Jmp, - - "mov" => Self::Mov, - "add" => Self::Add, - "addc" => Self::Addc, - "subc" => Self::Subc, - "sub" => Self::Sub, - "cmp" => Self::Cmp, - "dadd" => Self::Dadd, - "bit" => Self::Bit, - "bic" => Self::Bic, - "bis" => Self::Bis, - "xor" => Self::Xor, - "and" => Self::And, - - "nop" => Self::Nop, - "pop" => Self::Pop, - "br" => Self::Br, - "ret" => Self::Ret, - "clrc" => Self::Clrc, - "setc" => Self::Setc, - "clrz" => Self::Clrz, - "setz" => Self::Setz, - "clrn" => Self::Clrn, - "setn" => Self::Setn, - "dint" => Self::Dint, - "eint" => Self::Eint, - "rla" => Self::Rla, - "rlc" => Self::Rlc, - "inv" => Self::Inv, - "clr" => Self::Clr, - "tst" => Self::Tst, - "dec" => Self::Dec, - "decd" => Self::Decd, - "inc" => Self::Inc, - "incd" => Self::Incd, - "adc" => Self::Adc, - "dadc" => Self::Dadc, - "sbc" => Self::Sbc, - _ => Err(ParseError::UnrecognizedOpcode(s))?, - }) - } -} - -impl Display for Opcode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - match self { - Self::Nop => "nop", - Self::Pop => "pop", - Self::Br => "br", - Self::Ret => "ret", - Self::Clrc => "clrc", - Self::Setc => "setc", - Self::Clrz => "clrz", - Self::Setz => "setz", - Self::Clrn => "clrn", - Self::Setn => "setn", - Self::Dint => "dint", - Self::Eint => "eint", - Self::Rla => "rla", - Self::Rlc => "rlc", - Self::Inv => "inv", - Self::Clr => "clr", - Self::Tst => "tst", - Self::Dec => "dec", - Self::Decd => "decd", - Self::Inc => "inc", - Self::Incd => "incd", - Self::Adc => "adc", - Self::Dadc => "dadc", - Self::Sbc => "sbc", - Self::Rrc => "rrc", - Self::Swpb => "swpb", - Self::Rra => "rra", - Self::Sxt => "sxt", - Self::Push => "push", - Self::Call => "call", - Self::Reti => "reti", - Self::Jnz => "jnz", - Self::Jz => "jz", - Self::Jnc => "jnc", - Self::Jc => "jc", - Self::Jn => "jn", - Self::Jge => "jge", - Self::Jl => "jl", - Self::Jmp => "jmp", - Self::Mov => "mov", - Self::Add => "add", - Self::Addc => "addc", - Self::Subc => "subc", - Self::Sub => "sub", - Self::Cmp => "cmp", - Self::Dadd => "dadd", - Self::Bit => "bit", - Self::Bic => "bic", - Self::Bis => "bis", - Self::Xor => "xor", - Self::And => "and", - } - ) - } -} diff --git a/src/parser/label.rs b/src/parser/label.rs deleted file mode 100644 index 0b4c484..0000000 --- a/src/parser/label.rs +++ /dev/null @@ -1,21 +0,0 @@ -// © 2023 John Breaux -//! The definition of a label -use super::*; - -/// The definition of a label -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Label(pub Identifier); - -impl Parsable for Label { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - Ok(Self(Identifier::parse(p, stream).and_then(|t| { - stream.require(Type::Label)?; - Ok(t) - })?)) - } -} - -impl Display for Label { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) } -} diff --git a/src/parser/line.rs b/src/parser/line.rs deleted file mode 100644 index 3e1f7e2..0000000 --- a/src/parser/line.rs +++ /dev/null @@ -1,72 +0,0 @@ -// © 2023 John Breaux -//! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same -//! document line will be treated as if they took up multiple [`Line`s](Line). -//! -//! A line contains one of: -//! - [`Label`] -//! - [`Instruction`] -//! - [`Directive`] -//! - [`Comment`] -//! - [Nothing](Line::Empty) -use super::*; - -/// A line contains any one of: -/// - [`Label`] (definition) -/// - [`Instruction`] -/// - [`Directive`] -/// - [`Comment`] -/// - Nothing at all -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Line { - Empty, - Insn(Instruction), - Comment(Comment), - Directive(Directive), - Label(Label), - EndOfFile, // Expected end of file -} - -impl Parsable for Line { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - Ok( - match stream - .peek_expect_any_of([ - Type::Endl, - Type::Insn, - Type::Comment, - Type::Directive, - Type::Identifier, - Type::EndOfFile, - ])? - .variant() - { - Type::Endl => { - stream.next(); - Self::Empty - } - Type::Insn => Self::Insn(Instruction::parse(p, stream)?), - Type::Comment => Self::Comment(Comment::parse(p, stream)?), - Type::Directive => Self::Directive(Directive::parse(p, stream)?), - Type::Identifier => Self::Label(Label::parse(p, stream)?), - Type::EndOfFile => { - stream.next(); - Self::EndOfFile - } - _ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"), - }, - ) - } -} -impl Display for Line { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Empty => writeln!(f, "\n"), - Self::Label(arg0) => Display::fmt(arg0, f), - Self::Insn(arg0) => Display::fmt(arg0, f), - Self::Directive(arg0) => Display::fmt(arg0, f), - Self::Comment(arg0) => Display::fmt(arg0, f), - Self::EndOfFile => write!(f, "; End of file."), - } - } -} diff --git a/src/parser/parsable.rs b/src/parser/parsable.rs deleted file mode 100644 index 3216ad2..0000000 --- a/src/parser/parsable.rs +++ /dev/null @@ -1,85 +0,0 @@ -// © 2023 John Breaux -//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html) -use super::*; -/// Parses tokens from [stream](TokenStream) into Self node -pub trait Parsable { - /// Parses tokens from [TokenStream](TokenStream) into Self nodes - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where - Self: Sized, - T: TokenStream<'text>; - - /// Attempts to parse tokens from [stream](TokenStream) into Self nodes. - /// - /// Masks failed expectations. - fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result, ParseError> - where - Self: Sized, - T: TokenStream<'text>, - { - match Self::parse(p, stream) { - Ok(some) => Ok(Some(some)), - Err(ParseError::LexError(_)) => Ok(None), - Err(e) => Err(e), - } - } - - fn parse_and<'text, T, R>( - p: &Parser, - stream: &mut T, - f: fn(p: &Parser, &mut T) -> R, - ) -> Result<(Self, R), ParseError> - where - Self: Sized, - T: TokenStream<'text>, - { - Ok((Self::parse(p, stream)?, f(p, stream))) - } - - /// Attempts to parse tokens from [stream](TokenStream) into Self nodes. - /// - /// Returns [`Self::default()`](Default::default()) on error - fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self - where - Self: Sized + Default, - T: TokenStream<'text>, - { - Self::parse(p, stream).unwrap_or_default() - } -} - -macro_rules! parsable_str_types { - ($($t:ty),*$(,)?) => {$( - impl Parsable for $t { - fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into()) - } - } - )*}; -} -use std::{path::PathBuf, rc::Rc}; -parsable_str_types![String, Rc, Box, PathBuf]; - -/// Vectors of arbitrary parsables are cool -impl Parsable for Vec

{ - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - // [dead beef] - // [A, B,] - // [c d e f] - // [ something - // else ] - - stream.require(Type::LBracket)?; - stream.allow(Type::Endl); - let mut out = vec![]; - while let Some(t) = P::try_parse(p, stream)? { - out.push(t); - stream.allow(Type::Separator); - stream.allow(Type::Endl); - } - stream.require(Type::RBracket)?; - Ok(out) - } -} diff --git a/src/parser/root.rs b/src/parser/root.rs deleted file mode 100644 index 62341f3..0000000 --- a/src/parser/root.rs +++ /dev/null @@ -1,51 +0,0 @@ -use std::path::{Path, PathBuf}; - -// © 2023 John Breaux -use super::*; - -/// Contains the entire AST -#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Root(Option, Vec<(usize, Line)>); -// pub struct Root { pub path: PathBuf, pub lines: Vec } - -impl Root { - pub fn file(&self) -> Option<&Path> { self.0.as_deref() } - pub(crate) fn set_file(mut self, path: PathBuf) -> Self { - self.0 = Some(path); - self - } - pub fn lines(&self) -> &[(usize, Line)] { &self.1 } -} - -impl Parsable for Root { - fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result - where T: TokenStream<'text> { - let mut lines = vec![]; - loop { - let number = stream.context().line(); - match Line::parse(p, stream)? { - Line::EndOfFile => break, - line => lines.push((number, line)), - } - } - Ok(Root(None, lines)) - } -} - -impl Display for Root { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for (num, line) in &self.1 { - f.pad(&format!("{num:3}: {line} "))?; - } - Ok(()) - } -} - -impl Debug for Root { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for line in self.0.iter() { - Debug::fmt(line, f)?; - } - Ok(()) - } -} diff --git a/src/preprocessor.rs b/src/preprocessor.rs new file mode 100644 index 0000000..fd624b9 --- /dev/null +++ b/src/preprocessor.rs @@ -0,0 +1,87 @@ +// © 2023-2024 John Breaux + +use crate::{ + lexer::{ + token::{Token, TokenKind as Kind}, + Lexer, + }, + util::Span, +}; +use std::collections::{HashMap, VecDeque}; + +#[derive(Clone, Debug)] +pub struct Preprocessor<'t> { + lexer: Lexer<'t>, + buf: VecDeque>, + defn: HashMap<&'t str, Vec>>, + /// Location for injected tokens + pos: Span, +} + +impl<'t> Preprocessor<'t> { + pub fn new(text: &'t str) -> Self { + Self { + lexer: Lexer::new(text), + buf: Default::default(), + defn: Default::default(), + pos: Default::default(), + } + } + pub fn with_lexer(lexer: Lexer<'t>) -> Self { + Self { lexer, buf: Default::default(), defn: Default::default(), pos: Default::default() } + } + pub fn scan(&mut self) -> Option> { + self.buf.pop_front().or_else(|| self.next()).inspect(|t| self.pos = t.pos) + } + pub fn start(&self) -> usize { + self.lexer.location() + } + /// Grabs a token from the lexer, and attempts to match its lexeme + fn next(&mut self) -> Option> { + let token = self.lexer.scan()?; + if let Some(tokens) = self.defn.get(token.lexeme) { + self.buf.extend(tokens.iter().copied().map(|mut t| { + t.pos = self.pos; + t + })); + return self.scan(); + } else { + match token.kind { + Kind::Directive => self.directive(token), + Kind::Newline => return self.scan(), + _ => {} + } + Some(token) + } + } + /// Passes a token through while parsing a directive + fn tee(&mut self) -> Option> { + let token = self.lexer.scan()?; + self.buf.push_back(token); + // self.buf.push_back(token); + Some(token) + } + /// Parses and executes a directive + pub fn directive(&mut self, token: Token<'t>) { + if ".define" == token.lexeme { + self.define() + } + } + pub fn define(&mut self) { + let Some(key) = self.tee() else { + return; + }; + let mut value = vec![]; + while let Some(token) = self.tee() { + match token.kind { + Kind::Comment => { + self.buf.push_back(token); + break; + } + Kind::Newline => break, + _ => value.push(token), + } + } + self.defn.insert(key.lexeme, value); + } +} From 22ade3750e242898684f7197406692cc047ad605 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Tue, 30 Jan 2024 05:29:25 -0600 Subject: [PATCH 02/12] grammar.ebnf: Commit incomplete grammar description --- grammar.ebnf | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 grammar.ebnf diff --git a/grammar.ebnf b/grammar.ebnf new file mode 100644 index 0000000..06704d9 --- /dev/null +++ b/grammar.ebnf @@ -0,0 +1,47 @@ +(* Partical grammar for msp430-asm *) + +Line = Label | Directive | Insn ; + +Insn = NoEm | OneEm | Special | OneArg | TwoArg | Jump ; + +(* Instruction formats *) +NoEm = OpNoEm ; +OneEm = OpOneEm Dst ; +Special = "reti" | "br" Src ; +OneArg = OpOneArg Src ; +TwoArg = OpTwoArg Src ','? Dst ; +Jump = OpJump Expr ; + + +(* Addressing modes *) +Src = '#' (SrcSpecial | '-'? Expr) + | Absolute + | PostInc + | Indexed + | Direct ; + +Dst = '#' (SrcSpecial | Expr) + | Absolute + | Indirect + | Indexed + | Direct ; + +Direct = Reg ; +Indirect = '@' Reg ; +PostInc = Indirect '+'? ; +Indexed = Number '(' Reg ')' ; +Immediate = '#' Expr ; +Absolute = '&' Expr ; +SrcSpecial = 0 | 1 | '-' 1 | 0xffff | 2 | 4 | 8 ; +DstSpecial = 0 | 1 ; + +Expr = '-'? Number ; + +(* Pseudo-terminals *) +Reg = "pc" | "sp" | "sr" | "cg" + | "r0" | "r1" | "r2" | "r3" + | "r4" | "r5" | "r6" | "r7" + | "r8" | "r9" | "r10" | "r11" + | "r12" | "r13" | "r14" | "r15" ; +Identifier = ID_START ID_CONTINUE* ; +Number = '-'? DIGIT ; From b31295ad213f7cf6ce9261b1695c919c5c2aa8ee Mon Sep 17 00:00:00 2001 From: John Breaux Date: Tue, 30 Jan 2024 05:40:49 -0600 Subject: [PATCH 03/12] Add one of my old Microcorruption solutions as an example - TODO: allow embedding unicode characters as numerics in expressions --- sample-asm/shellcode.asm | 99 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 sample-asm/shellcode.asm diff --git a/sample-asm/shellcode.asm b/sample-asm/shellcode.asm new file mode 100644 index 0000000..0eac4f5 --- /dev/null +++ b/sample-asm/shellcode.asm @@ -0,0 +1,99 @@ +; © 2023-2024 John Breaux +; Comtains spoilers for Microcorruption Halifax! Be warned! +; just hash the first 0x140 B and stick them in memory + + +const: +.define msize 0x1 ; length of each hash in bytes +.define hsize 0x3 ; bytes kept per hash (only needs to be 3 to determine 1 byte of sram) +.define sr_len 0x140 ; number of bytes in sram to dump +.define ha_len 0x3c0 ; number of bytes in hash array (hsize * sr_len) +.define haddr 0x7000 ; address of the big hash array +.define iaddr 0x8000 ; address of the sram input buffer +.define kaddr 0x9000 ; address of the key buffer +external_data: +.define HEX_LUT 0x4710; "0123456789ABCDEF" +external_func: +; INT(int interrupt, ...) +.define INT #0x4550 +; getsn(void *dest, size_t len) +.define getsn #0x4568 +; putchar(char character) +.define putchar #0x4578 +; puts(char *str) +.define puts #0x4586 +; memcpy(void *dest, void *src, size_t len) +.define memcpy #0x45a4 +; sha256_internal(void * sram_addr, size_t sr_len, void * sha_buf) +.define sha256_internal #0x45b6 +; memset(void* buf, char value, size_t length) +.define memset #0x45c8 + +get_sram_hashes: + clr r11 ; loop variable in r11 + mov #msize, r14 ; r14 = 1 + mov #haddr, r13 ; set destination to 0x8000 + sr_loop: + mov r11, r15 ; mov addr r15 + call sha256_internal ; + add #hsize, r13 ; keep 3 bytes of the output + inc r11 ; inc r11 + cmp #sr_len, r11 ; do that 0x1000 times + jnc sr_loop + +print_hex: + clr r11; + ph_loop: + mov.b haddr(r11), r14 + mov.b r14, r15 + rra r15 ; using rra here instead of rra.b means the value won't roll into the highest bit + rra r15 ; which negates the need to and 0xf, r15 + rra r15 + rra r15 + clrc + and #0xf, r14 + mov.b HEX_LUT(r15), r15 + call putchar ; + mov.b HEX_LUT(r14), r15 + call putchar ; + inc r11 ; inc r11 + cmp #ha_len, r11 ; do that sram_length*3 times + jnc ph_loop + + mov.b #0xa, r15 ; '\n' + call #0x4578 ; putchar ('\n') + + +take_input: + mov #sr_len, r14 + mov #iaddr, r15 + call getsn ; + +check_all_passwords: + ;for i in 0..sr_len: + clr r9 + pw_loop: + ; memcpy(kaddr, iaddr + i, len) + mov #0x10, r13 + mov #iaddr, r14 + add r9, r14 + mov #kaddr, r15 + call memcpy + ; INT (0x42, key) + push #kaddr + push #0x42 + call INT + add #4, sp + ; INT(7f) + unlock7f: + push #0 + push #0 + push #0x7f + call INT + add #6, sp + inc r9 + cmp #sr_len, r9 + jl pw_loop + +end: + ret From 5a77985b39074f9f0ebff1a87d5a25d82cb9adae Mon Sep 17 00:00:00 2001 From: John Breaux Date: Wed, 31 Jan 2024 13:47:18 -0600 Subject: [PATCH 04/12] lexer: Fix bug that did not check the second character of identifiers. This also fixes one-character identifiers not being properly detected. --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index 992d785..447f28a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -124,7 +124,7 @@ impl<'t> Lexer<'t> { '|' => self.then().emit(TokenKind::Bar), '}' => self.then().emit(TokenKind::CloseCurly), c if c.is_numeric() => self.number::(), - &c if is_xid_start(c) => self.then().identifier(), + &c if is_xid_start(c) => self.identifier(), c => todo!("Unrecognized character: {c}"), } } From 6b5663ae4e69a79173453d8f094770c22f1fc63b Mon Sep 17 00:00:00 2001 From: John Breaux Date: Wed, 31 Jan 2024 13:48:16 -0600 Subject: [PATCH 05/12] msp430-asm: Remove ANES as a dependency - ANSI escape codes are stupid simple, and really don't warrant an external dependency --- msp430-asm/Cargo.toml | 1 - msp430-asm/src/lib.rs | 23 +++++- msp430-asm/src/main.rs | 174 +++++++++++++++++++---------------------- 3 files changed, 103 insertions(+), 95 deletions(-) diff --git a/msp430-asm/Cargo.toml b/msp430-asm/Cargo.toml index 6192e9d..d7a82c5 100644 --- a/msp430-asm/Cargo.toml +++ b/msp430-asm/Cargo.toml @@ -10,5 +10,4 @@ publish.workspace = true [dependencies] libmsp430 = { path = ".." } -anes = { version = "0.2.0" } argp = { version = "0.3.0" } diff --git a/msp430-asm/src/lib.rs b/msp430-asm/src/lib.rs index 715377c..33b77d4 100644 --- a/msp430-asm/src/lib.rs +++ b/msp430-asm/src/lib.rs @@ -62,12 +62,31 @@ pub mod split_twice { pub mod cursor { use std::fmt::{Arguments, Display}; - pub macro csi($($t:tt)*) {format_args!("\x1b[{}", format_args!($($t)*))} + /// Moves to the {line}th previous line + pub macro previous($line:literal) { + csi!("{}F", $line) + } - pub macro color($fg:expr, $($t:tt)*) { + /// Injects a Command Sequence Introducer + pub macro csi($($t:tt)*) { + format_args!("\x1b[{}", format_args!($($t)*)) + } + + /// Formats the args with a foreground [Color] + pub macro fg($fg:expr, $($t:tt)*) { Colorized::new(Some($fg), None, format_args!($($t)*)) } + /// Formats the args with a background [Color] + pub macro bg($bg:expr, $(t:tt)*) { + Colorized::new(None, Some($bg), format_args!($($t)*)) + } + + /// Formats the args with both a foreground and background [Color] + pub macro color($fg:expr, $bg:expr, $($t:tt)*) { + Colorized::new(Some($fg), Some($bg), format_args!($($t)*)) + } + #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Color { #[default] diff --git a/msp430-asm/src/main.rs b/msp430-asm/src/main.rs index a40c4d0..910892e 100644 --- a/msp430-asm/src/main.rs +++ b/msp430-asm/src/main.rs @@ -7,7 +7,7 @@ use libmsp430::{ parser::{error::Error as PError, Parser}, }; use msp430_asm::{ - cursor::{color, Color::*}, + cursor::{fg, Color::*}, split_twice::SplitTwice, }; use std::{ @@ -19,103 +19,16 @@ fn main() -> Result<(), Box> { let mut buf = String::new(); if let Some(file) = parse_args_or_exit::(argp::DEFAULT).file { buf = std::fs::read_to_string(file)?; - } else if !stdin().is_terminal() { - // if stdin is not a terminal, don't parsecheck each line. - stdin().lock().read_to_string(&mut buf)?; - } else { + } else if stdin().is_terminal() { // if stdin is a terminal, enter parse-checked REPL mode. repl::repl(&mut buf)?; + } else { + // if stdin is not a terminal, don't parsecheck each line. + stdin().lock().read_to_string(&mut buf)?; } asm(&buf) } -mod args { - use argp::FromArgs; - use std::path::PathBuf; - - /// Assembles MSP430 assembly into 16-bit little-endian machine code. \ - /// If used interactively, syntax is checked on a per-line basis. - #[derive(Debug, FromArgs)] - pub struct Args { - /// File to load. If not provided, takes input from stdin. - #[argp(option, short = 'f')] - pub file: Option, - } -} -mod repl { - use super::*; - use anes::MoveCursorToPreviousLine; - use std::io::{stderr, Write}; - - // macro color ($color: expr, $fmt: literal, $($str: expr),*) { - // format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)* - // ::anes::ResetAttributes) } - - macro linenr($n: expr) { - format_args!("{:4}: ", $n) - } - - macro printfl ($($x: expr),+) { - {print!($($x),+); let _ = ::std::io::stdout().flush();} - } - - macro move_cursor($x:expr, $y:expr) { - format_args!("{}{}", ::anes::MoveCursorToPreviousLine($x), "") - } - - pub fn repl(buf: &mut String) -> Result<(), Box> { - let mut line = String::new(); - let mut linenr = 1; - println!( - "{}", - color!(DarkGray, "{} v{}", env!("CARGO_BIN_NAME"), env!("CARGO_PKG_VERSION")) - ); - printfl!("{}", linenr!(linenr)); - while let Ok(len) = stdin().read_line(&mut line) { - match len { - 0 => break, // No newline (reached EOF) - 1 => continue, // Line is empty - _ => (), - } - // Try to parse this line in isolation (this restricts preprocessing) - match Parser::new(&line).parse::() { - Err(error) => errpp(&line, linenr, &error), - Ok(_) => { - okpp(&line, linenr); - *buf += &line; - linenr += 1; - } - } - line.clear(); - printfl!("{}", linenr!(linenr)); - } - println!("{}", color!(Gray, "[EOF]")); - Ok(()) - } - - fn okpp(line: &str, linenr: i32) { - println!( - "{}{}{}", - move_cursor!(1, 5), - color!(Green, "{:4}", linenr!(linenr)), - line.trim_end(), - ); - } - - /// Pretty-prints a line error - fn errpp(line: &str, linenr: i32, err: &PError) { - let loc = err.loc; - if stderr().is_terminal() { - let line = line.trim_end(); - eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Red, "{}", linenr!(linenr))); - let (start, mid, end) = line.split_twice(loc.start, loc.end); - eprintln!("{start}{}{end} {}", color!(Red, "{}", mid), color!(DarkGray, "; {}", err)); - } else { - eprintln!("{} ({err})", line.trim()) - } - } -} - // Parses and assembles a buffer, then prints it in hex to stdout fn asm(buf: &str) -> Result<(), Box> { match Parser::new(buf).parse::()?.to_canonical().assemble() { @@ -129,3 +42,80 @@ fn asm(buf: &str) -> Result<(), Box> { } Ok(()) } + +mod args { + use argp::FromArgs; + use std::path::PathBuf; + + /// Assembles MSP430 assembly into 16-bit little-endian machine code. \ + /// If used interactively, syntax is checked on a per-line basis. + #[derive(Debug, FromArgs)] + pub struct Args { + /// File to load. If not provided, takes input from stdin. + #[argp(option, short = 'f')] + pub file: Option, + } +} + +mod repl { + //! The REPL reads a line, parses it, evaluates the line, and prints, in a loop + use super::*; + use msp430_asm::cursor::*; + use std::io::{stderr, Write}; + + /// Prints the line number + macro linenr($n: expr) { + format_args!("{:4}: ", $n) + } + + /// [println], but without the newline + macro printfl ($($x: expr),+) { + {print!($($x),+); let _ = ::std::io::stdout().flush();} + } + + /// Runs the read-evaluate-print loop + pub fn repl(buf: &mut String) -> Result<(), Box> { + let mut line = String::new(); + let mut linenr = 1; + println!("{}", fg!(DarkGray, "{} v{}", env!("CARGO_BIN_NAME"), env!("CARGO_PKG_VERSION"))); + printfl!("{}", linenr!(linenr)); + while let Ok(len) = stdin().read_line(&mut line) { + match len { + 0 => break, // No newline (reached EOF) + 1 => continue, // Line is empty + _ => (), + } + // Try to parse this line in isolation (this restricts preprocessing) + match Parser::new(&line).parse::() { + Err(error) => format_err(&line, linenr, &error), + Ok(_) => { + format_ok(&line, linenr); + *buf += &line; + linenr += 1; + } + } + line.clear(); + printfl!("{}", linenr!(linenr)); + } + println!("{}", fg!(Gray, "[EOF]")); + Ok(()) + } + + /// Rewrites the line in OK format, with a green linenr + fn format_ok(line: &str, linenr: i32) { + println!("{}{}{}", previous!(1), fg!(Green, "{:4}", linenr!(linenr)), line.trim_end(),); + } + + /// Pretty-prints a line error + fn format_err(line: &str, linenr: i32, err: &PError) { + let loc = err.loc; + if stderr().is_terminal() { + let line = line.trim_end(); + eprint!("{}{}", previous!(1), fg!(Red, "{}", linenr!(linenr))); + let (start, mid, end) = line.split_twice(loc.start, loc.end); + eprintln!("{start}{}{end} {}", fg!(Red, "{}", mid), fg!(DarkGray, "; {}", err)); + } else { + eprintln!("{} ({err})", line.trim()) + } + } +} From af89541af1411ed12093391465b4dff5e9f60732 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Wed, 31 Jan 2024 17:58:26 -0600 Subject: [PATCH 06/12] lexer: Fix copy+paste error in `greater`. Now emits `Rsh` tokens~! --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index 447f28a..c2a8edc 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -169,7 +169,7 @@ impl<'t> Lexer<'t> { } fn greater(&mut self) -> Option> { match self.peek() { - Some('>') => self.then().emit(TokenKind::Lsh), + Some('>') => self.then().emit(TokenKind::Rsh), _ => todo!("greater"), } } From 860c9d4a97b8dfdff7c8132890f3f4d71df9599f Mon Sep 17 00:00:00 2001 From: John Breaux Date: Wed, 31 Jan 2024 17:59:31 -0600 Subject: [PATCH 07/12] parser: Refactor `Parsable` to align with `assembler::Assemble` --- src/parser.rs | 52 ++++++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 934fb24..e9c5643 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -37,7 +37,7 @@ impl<'t> Parser<'t> { } pub fn parse>(&mut self) -> PResult { - Parsable::parse(self) + Parsable::parse_with(self) } pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error { Error { parsing, kind, loc: self.loc } @@ -194,11 +194,14 @@ impl<'t> Parser<'t> { } pub trait Parsable<'t>: Sized { - fn parse(p: &mut Parser<'t>) -> PResult; + fn parse(text: &'t str) -> PResult { + Self::parse_with(&mut Parser::new(text)) + } + fn parse_with(p: &mut Parser<'t>) -> PResult; } impl<'t> Parsable<'t> for Statements<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let mut stmts = vec![]; while p.peek(Parsing::File)?.kind != Kind::Eof { stmts.push(p.parse()?) @@ -208,7 +211,7 @@ impl<'t> Parsable<'t> for Statements<'t> { } impl<'t> Parsable<'t> for Statement<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let token = *p.peek(Parsing::Stmt)?; Ok(match token.kind { Kind::Comment => { @@ -222,7 +225,7 @@ impl<'t> Parsable<'t> for Statement<'t> { } } impl<'t> Parsable<'t> for Directive<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Directive; let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?; let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) }; @@ -238,7 +241,7 @@ impl<'t> Parsable<'t> for Directive<'t> { } } impl<'t> Parsable<'t> for Vec> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Directive; let mut tokens = vec![]; loop { @@ -252,13 +255,13 @@ impl<'t> Parsable<'t> for Vec> { } } impl<'t> Parsable<'t> for Instruction<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let start = p.peek(Parsing::Instruction)?.pos.start; Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } }) } } impl<'t> Parsable<'t> for InstructionKind<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { use crate::lexer::token::OneArg; // an instruction starts with an opcode Ok(match p.peek(Parsing::Instruction)?.kind() { @@ -274,7 +277,7 @@ impl<'t> Parsable<'t> for InstructionKind<'t> { } } impl<'t> Parsable<'t> for NoEm { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { match p.next(Parsing::NoEm)?.kind { Kind::NoEm(opcode) => Ok(Self { opcode }), ty => Err(p.error(Unexpected(ty), Parsing::NoEm)), @@ -282,7 +285,7 @@ impl<'t> Parsable<'t> for NoEm { } } impl<'t> Parsable<'t> for OneEm<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { Ok(Self { opcode: match p.next(Parsing::OneEm)?.kind { Kind::OneEm(opcode) => opcode, @@ -294,7 +297,7 @@ impl<'t> Parsable<'t> for OneEm<'t> { } } impl<'t> Parsable<'t> for OneArg<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { Ok(Self { opcode: match p.next(Parsing::OneArg)?.kind { Kind::OneArg(opcode) => opcode, @@ -306,7 +309,7 @@ impl<'t> Parsable<'t> for OneArg<'t> { } } impl<'t> Parsable<'t> for TwoArg<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::TwoArg; Ok(Self { opcode: match p.next(parsing)?.kind { @@ -320,7 +323,7 @@ impl<'t> Parsable<'t> for TwoArg<'t> { } } impl<'t> Parsable<'t> for Jump<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Jump; Ok(Self { opcode: match p.next(parsing)?.kind { @@ -332,21 +335,21 @@ impl<'t> Parsable<'t> for Jump<'t> { } } impl<'t> Parsable<'t> for Reti { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { use crate::lexer::token::OneArg; p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?; Ok(Reti) } } impl<'t> Parsable<'t> for Br<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { p.assert(Kind::Special(Special::Br), Parsing::Br)?; Ok(Self { src: p.parse()? }) } } impl<'t> Parsable<'t> for Src<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Src; Ok(match p.peek(parsing)?.kind { Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special @@ -379,7 +382,7 @@ impl<'t> Parsable<'t> for Src<'t> { } } impl<'t> Parsable<'t> for Dst<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Dst; Ok(match p.peek(parsing)?.kind { Kind::Hash => match p.then(parsing)?.next(parsing)?.kind { @@ -399,7 +402,7 @@ impl<'t> Parsable<'t> for Dst<'t> { } } impl<'t> Parsable<'t> for JumpDst<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Jump; let mut neg = false; let out = loop { @@ -420,7 +423,7 @@ impl<'t> Parsable<'t> for JumpDst<'t> { } } impl<'t> Parsable<'t> for Width { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let out = match p.peek(Parsing::Width)?.kind() { Kind::Byte => Width::Byte, Kind::Word => Width::Word, @@ -431,7 +434,7 @@ impl<'t> Parsable<'t> for Width { } } impl<'t> Parsable<'t> for Reg { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let out = match p.peek(Parsing::Reg)?.kind { Kind::Reg(r) => r, ty => Err(p.error(Unexpected(ty), Parsing::Reg))?, @@ -441,17 +444,17 @@ impl<'t> Parsable<'t> for Reg { } } impl<'t> Parsable<'t> for Expr<'t> { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { p.expr() } } impl<'t, T: Parsable<'t>> Parsable<'t> for Box { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { Ok(Box::new(p.parse()?)) } } impl<'t, T: Parsable<'t>> Parsable<'t> for Vec { - fn parse(p: &mut Parser<'t>) -> PResult { + fn parse_with(p: &mut Parser<'t>) -> PResult { let parsing = Parsing::Vec; p.assert(Kind::OpenBrace, parsing)?; let mut out = vec![]; @@ -589,3 +592,6 @@ pub mod error { } impl std::error::Error for Error {} } + +#[cfg(test)] +mod tests; From 11bae9b348d18e1b59cfd89de482c08d00db1513 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Wed, 31 Jan 2024 17:59:55 -0600 Subject: [PATCH 08/12] Unit tests: Add more lexer tests, add parser tests --- src/lexer/tests.rs | 155 +++++++++++++++++++++++---- src/parser/tests.rs | 256 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 388 insertions(+), 23 deletions(-) create mode 100644 src/parser/tests.rs diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs index df3c0dc..296e40c 100644 --- a/src/lexer/tests.rs +++ b/src/lexer/tests.rs @@ -1,7 +1,11 @@ use super::*; macro_rules! lex { - (type ($t:tt), $expected:expr) => { - let token = Lexer::new(stringify!($t)).scan().expect(stringify!($t:tt should yield a valid token)); + (type ($($t:tt)*), $expected:expr) => { + let token = Lexer::new(stringify!($($t)*)).scan().expect(stringify!($($t:tt)* should yield a valid token)); + assert_eq!(token.kind, $expected); + }; + (str $t:literal, $expected:expr) => { + let token = Lexer::new($t).scan().expect(stringify!($t:tt should yield a valid token)); assert_eq!(token.kind, $expected); }; ({ $($t:tt)* }) => { @@ -40,27 +44,132 @@ fn no_operand_emulated() { lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm } #[test] +fn br() { + lex!(type (br), TokenKind::Special(Special::Br)); +} +#[test] +fn one_operand_emulated() { + lex!(type (pop), TokenKind::OneEm(OneEm::Pop)); + lex!(type (rla), TokenKind::OneEm(OneEm::Rla)); + lex!(type (rlc), TokenKind::OneEm(OneEm::Rlc)); + lex!(type (inv), TokenKind::OneEm(OneEm::Inv)); + lex!(type (clr), TokenKind::OneEm(OneEm::Clr)); + lex!(type (tst), TokenKind::OneEm(OneEm::Tst)); + lex!(type (dec), TokenKind::OneEm(OneEm::Dec)); + lex!(type (decd), TokenKind::OneEm(OneEm::Decd)); + lex!(type (inc), TokenKind::OneEm(OneEm::Inc)); + lex!(type (incd), TokenKind::OneEm(OneEm::Incd)); + lex!(type (adc), TokenKind::OneEm(OneEm::Adc)); + lex!(type (dadc), TokenKind::OneEm(OneEm::Dadc)); + lex!(type (sbc), TokenKind::OneEm(OneEm::Sbc)); +} +#[test] +fn one_operand() { + lex!(type (rrc), TokenKind::OneArg(OneArg::Rrc)); + lex!(type (swpb), TokenKind::OneArg(OneArg::Swpb)); + lex!(type (rra), TokenKind::OneArg(OneArg::Rra)); + lex!(type (sxt), TokenKind::OneArg(OneArg::Sxt)); + lex!(type (push), TokenKind::OneArg(OneArg::Push)); + lex!(type (call), TokenKind::OneArg(OneArg::Call)); + lex!(type (reti), TokenKind::OneArg(OneArg::Reti)); +} +#[test] +fn two_operand() { + lex!(type (mov), TokenKind::TwoArg(TwoArg::Mov)); + lex!(type (add), TokenKind::TwoArg(TwoArg::Add)); + lex!(type (addc), TokenKind::TwoArg(TwoArg::Addc)); + lex!(type (subc), TokenKind::TwoArg(TwoArg::Subc)); + lex!(type (sub), TokenKind::TwoArg(TwoArg::Sub)); + lex!(type (cmp), TokenKind::TwoArg(TwoArg::Cmp)); + lex!(type (dadd), TokenKind::TwoArg(TwoArg::Dadd)); + lex!(type (bit), TokenKind::TwoArg(TwoArg::Bit)); + lex!(type (bic), TokenKind::TwoArg(TwoArg::Bic)); + lex!(type (bis), TokenKind::TwoArg(TwoArg::Bis)); + lex!(type (xor), TokenKind::TwoArg(TwoArg::Xor)); + lex!(type (and), TokenKind::TwoArg(TwoArg::And)); +} +#[test] +fn jump() { + lex!(type (jne), TokenKind::Jump(Jump::Jne)); + lex!(type (jnz), TokenKind::Jump(Jump::Jnz)); + lex!(type (jeq), TokenKind::Jump(Jump::Jeq)); + lex!(type (jz), TokenKind::Jump(Jump::Jz)); + lex!(type (jnc), TokenKind::Jump(Jump::Jnc)); + lex!(type (jlo), TokenKind::Jump(Jump::Jlo)); + lex!(type (jc), TokenKind::Jump(Jump::Jc)); + lex!(type (jhs), TokenKind::Jump(Jump::Jhs)); + lex!(type (jn), TokenKind::Jump(Jump::Jn)); + lex!(type (jge), TokenKind::Jump(Jump::Jge)); + lex!(type (jl), TokenKind::Jump(Jump::Jl)); + lex!(type (jmp), TokenKind::Jump(Jump::Jmp)); +} +#[test] fn registers() { - lex!(type(pc), TokenKind::Reg(Reg::PC)); - lex!(type(sp), TokenKind::Reg(Reg::SP)); - lex!(type(sr), TokenKind::Reg(Reg::SR)); - lex!(type(cg), TokenKind::Reg(Reg::CG)); - lex!(type(r0), TokenKind::Reg(Reg::PC)); - lex!(type(r1), TokenKind::Reg(Reg::SP)); - lex!(type(r2), TokenKind::Reg(Reg::SR)); - lex!(type(r3), TokenKind::Reg(Reg::CG)); - lex!(type(r4), TokenKind::Reg(Reg::R4)); - lex!(type(r5), TokenKind::Reg(Reg::R5)); - lex!(type(r6), TokenKind::Reg(Reg::R6)); - lex!(type(r7), TokenKind::Reg(Reg::R7)); - lex!(type(r8), TokenKind::Reg(Reg::R8)); - lex!(type(r9), TokenKind::Reg(Reg::R9)); - lex!(type(r10), TokenKind::Reg(Reg::R10)); - lex!(type(r11), TokenKind::Reg(Reg::R11)); - lex!(type(r12), TokenKind::Reg(Reg::R12)); - lex!(type(r13), TokenKind::Reg(Reg::R13)); - lex!(type(r14), TokenKind::Reg(Reg::R14)); - lex!(type(r15), TokenKind::Reg(Reg::R15)); + lex!(type (pc), TokenKind::Reg(Reg::PC)); + lex!(type (sp), TokenKind::Reg(Reg::SP)); + lex!(type (sr), TokenKind::Reg(Reg::SR)); + lex!(type (cg), TokenKind::Reg(Reg::CG)); + lex!(type (r0), TokenKind::Reg(Reg::PC)); + lex!(type (r1), TokenKind::Reg(Reg::SP)); + lex!(type (r2), TokenKind::Reg(Reg::SR)); + lex!(type (r3), TokenKind::Reg(Reg::CG)); + lex!(type (r4), TokenKind::Reg(Reg::R4)); + lex!(type (r5), TokenKind::Reg(Reg::R5)); + lex!(type (r6), TokenKind::Reg(Reg::R6)); + lex!(type (r7), TokenKind::Reg(Reg::R7)); + lex!(type (r8), TokenKind::Reg(Reg::R8)); + lex!(type (r9), TokenKind::Reg(Reg::R9)); + lex!(type (r10), TokenKind::Reg(Reg::R10)); + lex!(type (r11), TokenKind::Reg(Reg::R11)); + lex!(type (r12), TokenKind::Reg(Reg::R12)); + lex!(type (r13), TokenKind::Reg(Reg::R13)); + lex!(type (r14), TokenKind::Reg(Reg::R14)); + lex!(type (r15), TokenKind::Reg(Reg::R15)); } -// TODO: opcode tests, misc. special character tests, etc. +#[test] +fn delimiters() { + lex!(str "", TokenKind::Eof); + lex!(str "\n", TokenKind::Newline); + lex!(str "(", TokenKind::OpenParen); + lex!(str ")", TokenKind::CloseParen); + lex!(str "{", TokenKind::OpenCurly); + lex!(str "}", TokenKind::CloseCurly); + lex!(str "[", TokenKind::OpenBrace); + lex!(str "]", TokenKind::CloseBrace); +} + +#[test] +fn comment() { + lex!(str "; this is a comment!\n\n", TokenKind::Comment); +} + +#[test] +fn other() { + // lex!(type (), TokenKind::) + lex!(type (,), TokenKind::Comma); + lex!(type (:), TokenKind::Colon); + lex!(type (!), TokenKind::Bang); + lex!(type (@), TokenKind::At); + lex!(type (&), TokenKind::Amp); + lex!(type (|), TokenKind::Bar); + lex!(type (^), TokenKind::Caret); + lex!(type (*), TokenKind::Star); + lex!(type (#), TokenKind::Hash); + lex!(type ($), TokenKind::Dollar); + lex!(type (%), TokenKind::Percent); + lex!(type (+), TokenKind::Plus); + lex!(type (-), TokenKind::Minus); + lex!(type (/), TokenKind::Slash); + lex!(type (<<), TokenKind::Lsh); + lex!(type (>>), TokenKind::Rsh); + lex!(type (.directive), TokenKind::Directive); + lex!(type (identifier), TokenKind::Identifier); + lex!(type (.b), TokenKind::Byte); + lex!(type (.w), TokenKind::Word); +} + +#[test] +fn ignores_leading_whitespace() { + lex!(str " \u{a0}\t\t\t\t\t\t\t-", TokenKind::Minus); +} diff --git a/src/parser/tests.rs b/src/parser/tests.rs new file mode 100644 index 0000000..12f77f3 --- /dev/null +++ b/src/parser/tests.rs @@ -0,0 +1,256 @@ +#![allow(non_upper_case_globals)] +use super::*; +use crate::lexer::token; + +/// Because [assert_matches](core::assert_matches::assert_matches) is unstable +macro_rules! assert_matches { + ($e: expr, $($p: pat $(if $condition:expr)?)* ) => { + match $e { + $($p $(if $condition)? => (),)* + _ => panic!("{}", stringify!($e did not match $($p),*)), + } + }; +} + +/// Simplified grammar for constructing an expression +macro_rules! expr { + ($ident:ident) => { + Expr::Ident(stringify!($ident)).into() + }; + ($lit:literal) => { + Expr::Number($lit).into() + }; + (& $lit:literal) => { + Expr::AddrOf($lit).into() + }; + (($($t:tt)*)) => { + Expr::Group(expr!($($t)*)) + }; + ([$($op:tt)*] $($t:tt)*) => { + Expr::Unary(vec![$(UnOp::$op),*], expr!($($t)*)) + }; + (($($a:tt)*) $($op:tt ($($b:tt)*))+) => { + Expr::Binary(expr!($($a)*), vec![$((BinOp::$op, expr!($($b)*))),+]) + } +} + +macro_rules! passert { + ($expected:expr, $text:literal) => { + assert_eq!($expected, Parsable::parse($text).unwrap()) + }; +} + +// #[test] +// fn statements() { +// passert!(, ""); +// } +// #[test] +// fn statement() { +// passert!(, ""); +// } +#[test] +fn directive() { + passert!(Directive::Org(expr!(0x8000)), ".org 0x8000"); + passert!(Directive::String("Hello, world!"), ".string \"Hello, world!\""); + assert_eq!(Directive::parse(".word 0x40").unwrap(), Directive::Word(expr!(0x40))); + passert!( + Directive::Words(vec![expr!(0x40), expr!(0x41), expr!(0x42), expr!(0x43)]), + ".words [ 0x40 0x41 0x42 0x43 ]" + ); +} +// #[test] +// fn instruction() { +// passert!(, ""); +// } +#[test] +fn instruction_kind() { + assert_matches!(Parsable::parse("nop").unwrap(), InstructionKind::NoEm(NoEm { .. })); + + assert_matches!(Parsable::parse("pop sp").unwrap(), InstructionKind::OneEm(OneEm { .. })); +} +#[test] +fn no_em() { + passert!(NoEm { opcode: token::NoEm::Nop }, "nop"); + passert!(NoEm { opcode: token::NoEm::Ret }, "ret"); + passert!(NoEm { opcode: token::NoEm::Clrc }, "clrc"); + passert!(NoEm { opcode: token::NoEm::Clrz }, "clrz"); + passert!(NoEm { opcode: token::NoEm::Clrn }, "clrn"); + passert!(NoEm { opcode: token::NoEm::Setc }, "setc"); + passert!(NoEm { opcode: token::NoEm::Setz }, "setz"); + passert!(NoEm { opcode: token::NoEm::Setn }, "setn"); + passert!(NoEm { opcode: token::NoEm::Dint }, "dint"); + passert!(NoEm { opcode: token::NoEm::Eint }, "eint"); +} +#[test] +fn one_em() { + const dst: Dst = Dst::Direct(Reg::R15); + let width = Width::Word; + passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop r15"); + passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla r15"); + passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc r15"); + passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv r15"); + passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr r15"); + passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst r15"); + passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec r15"); + passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd r15"); + passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc r15"); + passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd r15"); + passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc r15"); + passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc r15"); + passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc r15"); + + let width = Width::Byte; + passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop.b r15"); + passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla.b r15"); + passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc.b r15"); + passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv.b r15"); + passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr.b r15"); + passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst.b r15"); + passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec.b r15"); + passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd.b r15"); + passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc.b r15"); + passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd.b r15"); + passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc.b r15"); + passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc.b r15"); + passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc.b r15"); +} +#[test] +fn one_arg() { + const src: Src = Src::Direct(Reg::PC); + let width = Width::Word; + passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc pc"); + passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb pc"); + passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra pc"); + passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt pc"); + passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push pc"); + passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call pc"); + + let width = Width::Byte; + passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc.b pc"); + passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb.b pc"); + passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra.b pc"); + passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt.b pc"); + passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push.b pc"); + passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call.b pc"); +} +#[test] +fn two_arg() { + const src: Src = Src::Direct(Reg::R14); + const dst: Dst = Dst::Direct(Reg::R15); + let width = Width::Word; + passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and r14, r15"); + + let width = Width::Byte; + passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor.b r14, r15"); + passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and.b r14, r15"); +} +#[test] +fn jump() { + const dst100: JumpDst = JumpDst::Rel(100); + passert!(Jump { opcode: token::Jump::Jne, dst: dst100 }, "jne 100"); + passert!(Jump { opcode: token::Jump::Jnz, dst: dst100 }, "jnz 100"); + passert!(Jump { opcode: token::Jump::Jeq, dst: dst100 }, "jeq 100"); + passert!(Jump { opcode: token::Jump::Jz, dst: dst100 }, "jz 100"); + passert!(Jump { opcode: token::Jump::Jnc, dst: dst100 }, "jnc 100"); + passert!(Jump { opcode: token::Jump::Jlo, dst: dst100 }, "jlo 100"); + passert!(Jump { opcode: token::Jump::Jc, dst: dst100 }, "jc 100"); + passert!(Jump { opcode: token::Jump::Jhs, dst: dst100 }, "jhs 100"); + passert!(Jump { opcode: token::Jump::Jn, dst: dst100 }, "jn 100"); + passert!(Jump { opcode: token::Jump::Jge, dst: dst100 }, "jge 100"); + passert!(Jump { opcode: token::Jump::Jl, dst: dst100 }, "jl 100"); + passert!(Jump { opcode: token::Jump::Jmp, dst: dst100 }, "jmp 100"); +} +#[test] +fn reti() { + passert!(Reti, "reti"); +} +#[test] +fn br() { + passert!(Br { src: Src::Direct(Reg::R15) }, "br r15"); +} +#[test] +fn width() { + passert!(Width::Byte, ".b"); + passert!(Width::Word, ".w"); + passert!(Width::Word, ""); +} +#[test] +fn src() { + passert!(Src::Direct(Reg::R15), "r15"); + passert!(Src::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)"); + passert!(Src::Indirect(Reg::R15), "@r15"); + passert!(Src::PostInc(Reg::R15), "@r15+"); + passert!(Src::Absolute(expr!(0x1000)), "&0x1000"); + passert!(Src::Immediate(expr!(0x1000)), "#0x1000"); + passert!(Src::BareExpr(expr!(foo)), "foo"); +} +#[test] +fn dst() { + passert!(Dst::Direct(Reg::R15), "r15"); + passert!(Dst::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)"); + passert!(Dst::Absolute(expr!(0x1000)), "&0x1000"); + passert!(Dst::Special(DstSpecial::Zero), "#0"); + passert!(Dst::Special(DstSpecial::One), "#1"); +} +#[test] +fn jump_dst() { + passert!(JumpDst::Rel(100), "100"); + passert!(JumpDst::Rel(-100), "-100"); + passert!(JumpDst::Label("foo"), "foo"); +} + +#[test] +fn expr() { + // Terms= + passert!(expr!((1) Mul(2) Rem(3) Div(4)), "1 * 2 % 3 / 4"); + // Factors + passert!(expr!((1) Add(2) Sub(3)), "1 + 2 - 3"); + // Shift + passert!(expr!((1) Lsh(2) Rsh(3)), "1 << 2 >> 3"); + // Bitwise logic + passert!(expr!((1) And(2) Or(3) Xor(4)), "1 & 2 | 3 ^ 4"); + // Unary + passert!(expr!([Deref Neg Not] 1), "*-!1"); + // Number + passert!(Expr::Number(42), "42"); + // Identifier + passert!(Expr::Ident("foo"), "foo"); + // Addrof + passert!(Expr::AddrOf("bar"), "&bar"); + // Group + passert!(expr!((42)), "(42)"); + // All of the above + passert!( + expr!( + (4) Mul( + (3) Add( + (2) Lsh( + (1) And([Neg] 1) + ) Rsh([Deref] 2) + ) Add([Not] 3) + ) Mul(4) + ), + "4 * 3 + 2 << 1 & -1 >> *2 + !3 * 4" + ); +} From a63a4b7ececd588d541fe1dcb27255dad06f12a9 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Thu, 1 Feb 2024 11:59:21 -0600 Subject: [PATCH 09/12] util: Only contains `Span`, so rename module to `span.rs` --- src/assembler.rs | 4 ++-- src/lexer.rs | 2 +- src/lexer/token.rs | 2 +- src/lib.rs | 45 +-------------------------------------------- src/parser.rs | 4 ++-- src/parser/ast.rs | 2 +- src/preprocessor.rs | 2 +- src/span.rs | 43 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 52 insertions(+), 52 deletions(-) create mode 100644 src/span.rs diff --git a/src/assembler.rs b/src/assembler.rs index 22aef2f..28e3170 100644 --- a/src/assembler.rs +++ b/src/assembler.rs @@ -3,7 +3,7 @@ use error::{AResult, ErrorKind::*}; use std::collections::HashMap; -use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, util::Span}; +use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, span::Span}; use self::error::{Error, ErrorKind}; @@ -366,7 +366,7 @@ impl<'t> Assembler<'t> { pub mod error { use std::fmt::Display; - use crate::util::Span; + use crate::span::Span; pub type AResult = Result; diff --git a/src/lexer.rs b/src/lexer.rs index c2a8edc..a4c1290 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -5,7 +5,7 @@ pub mod token; use self::token::{Special, TokenKind, *}; -use crate::util::Span; +use crate::span::Span; use std::{ iter::Peekable, str::{CharIndices, FromStr}, diff --git a/src/lexer/token.rs b/src/lexer/token.rs index a467f68..119afb6 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -2,7 +2,7 @@ //! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span] //! //! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`) -use crate::util::Span; +use crate::span::Span; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Token<'t> { pub lexeme: &'t str, diff --git a/src/lib.rs b/src/lib.rs index 63a4e5a..615d18f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,50 +54,7 @@ //! └─ EndOfFile //! ``` -pub mod util { - use std::{ - fmt::{Debug, Display}, - ops::{Index, Range}, - }; - /// A [Clone] + [Copy] + [!Iterator](Iterator) <\code> version of a [Range] - #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] - pub struct Span { - pub start: Idx, - pub end: Idx, - } - impl From> for Range { - fn from(value: Span) -> Self { - value.start..value.end - } - } - impl From> for Span { - fn from(value: Range) -> Self { - Self { start: value.start, end: value.end } - } - } - impl Index> for [T] { - type Output = [T]; - fn index(&self, index: Span) -> &Self::Output { - self.index(Range::from(index)) - } - } - impl Index> for str { - type Output = str; - fn index(&self, index: Span) -> &Self::Output { - self.index(Range::from(index)) - } - } - impl Debug for Span { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}..{:?}", self.start, self.end) - } - } - impl Display for Span { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}..{}", self.start, self.end) - } - } -} +pub mod span; pub mod lexer; diff --git a/src/parser.rs b/src/parser.rs index e9c5643..451487e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -13,7 +13,7 @@ use crate::{ Lexer, }, preprocessor::Preprocessor, - util::Span, + span::Span, }; use ast::*; @@ -497,7 +497,7 @@ impl<'t> Parser<'t> { pub mod error { use super::Kind; - use crate::util::Span; + use crate::span::Span; use std::{fmt::Display, num::TryFromIntError}; pub type PResult = Result; diff --git a/src/parser/ast.rs b/src/parser/ast.rs index a798dd1..8337bb5 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -2,7 +2,7 @@ /// Represents MSP430 instructions, use crate::{ lexer::token::{self, Reg, Token}, - util::Span, + span::Span, }; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] diff --git a/src/preprocessor.rs b/src/preprocessor.rs index fd624b9..9df3bfe 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -5,7 +5,7 @@ use crate::{ token::{Token, TokenKind as Kind}, Lexer, }, - util::Span, + span::Span, }; use std::collections::{HashMap, VecDeque}; diff --git a/src/span.rs b/src/span.rs new file mode 100644 index 0000000..4ca993b --- /dev/null +++ b/src/span.rs @@ -0,0 +1,43 @@ +//! A [Span] is a [Range] that does not implement [Iterator]. It is a [Copy] type. +use std::{ + fmt::{Debug, Display}, + ops::{Index, Range}, +}; +/// A [Clone] + [Copy] + [!Iterator](Iterator) version of a [Range] +#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Span { + pub start: Idx, + pub end: Idx, +} +impl From> for Range { + fn from(value: Span) -> Self { + value.start..value.end + } +} +impl From> for Span { + fn from(value: Range) -> Self { + Self { start: value.start, end: value.end } + } +} +impl Index> for [T] { + type Output = [T]; + fn index(&self, index: Span) -> &Self::Output { + self.index(Range::from(index)) + } +} +impl Index> for str { + type Output = str; + fn index(&self, index: Span) -> &Self::Output { + self.index(Range::from(index)) + } +} +impl Debug for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}..{:?}", self.start, self.end) + } +} +impl Display for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } +} From f6c191472060a9192f042070b43a94317e264284 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Thu, 1 Feb 2024 12:00:01 -0600 Subject: [PATCH 10/12] cargo doc: Fix linking errors in doc comments --- src/assembler.rs | 12 ++--- src/lib.rs | 115 +++++++++++++++++++++++++++++------------------ src/parser.rs | 2 +- 3 files changed, 79 insertions(+), 50 deletions(-) diff --git a/src/assembler.rs b/src/assembler.rs index 28e3170..a6e342d 100644 --- a/src/assembler.rs +++ b/src/assembler.rs @@ -184,8 +184,8 @@ impl<'t> Assemble<'t> for OneEm<'t> { } } impl<'t> Assemble<'t> for OneArg<'t> { - /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] - /// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ] + /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]` + /// `[ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]` fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { let Self { opcode, width, src } = self; let (src_reg, src_mode, src_ext) = source(src); @@ -199,8 +199,8 @@ impl<'t> Assemble<'t> for OneArg<'t> { } } impl<'t> Assemble<'t> for TwoArg<'t> { - /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] - /// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ] + /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]` + /// `[ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]` fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { let Self { opcode, width, src, dst } = self; let (src_reg, src_mode, src_ext) = source(src); @@ -224,8 +224,8 @@ impl<'t> Assemble<'t> for TwoArg<'t> { } } impl<'t> Assemble<'t> for Jump<'t> { - /// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] - /// [ 0 0 1 [cond:3] +- [word_offset:10 ] ] + /// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]` + /// `[ 0 0 1 [cond:3] +- [word_offset:10 ] ]` fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { let Self { opcode, dst } = self; let word = 1 << 13 diff --git a/src/lib.rs b/src/lib.rs index 615d18f..88338f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,58 +1,87 @@ // © 2023 John Breaux //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption //! -//! This project aims to assemble any valid msp430 instructions, while being lenient about the -//! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it -//! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a -//! useful message to the author.) +//! This project aims to assemble any valid msp430 instructions, while including important quality +//! of life features such as constant expression evaluation. //! -//! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines, -//! unless syntactically relevant. It will also discard comma-separators between operands of a -//! two-operand instruction. +//! ## Tokenization +//! The [`Lexer`](lexer::Lexer) will ignore whitespace, except newlines. It borrows a text buffer, +//! and outputs [tokens](lexer::token::Token) of various [TokenKinds](lexer::token::TokenKind). //! -//! It returns an AST structured as follows +//! ## Preprocessing +//! The [`Preprocessor`](preprocessor::Preprocessor) will filter +//! [newlines](lexer::token::TokenKind::Newline), unless used to terminate a `.define` directive. +//! +//! ## Parsing +//! The [`Parser`](parser::Parser) consumes a [Lexer](lexer::Lexer) +//! and returns an [AST](parser::ast) structured roughly as follows: //! ```text -//! Root -//! ├─ Line -//! │ └─ Empty -//! ├─ Line +//! Statements +//! ├─ Stmt //! │ └─ Comment -//! ├─ Line +//! ├─ Stmt //! │ └─ Directive // Pre- or Post-processor directive -//! ├─ Linel +//! ├─ Stmt //! │ └─ Label // Label definition -//! ├─ Line -//! │ └─ Instruction -//! │ ├─ Opcode -//! │ └─ Encoding::Single +//! ├─ Stmt +//! │ └─ Insn +//! │ └─ NoEm // A zero-operand "emulated" instruction +//! ├─ Stmt +//! │ └─ Insn +//! │ └─ OneEm // A one-operand "emulated" instruction +//! │ ├─ Opcode //! │ ├─ Width -//! │ └─ PrimaryOperand -//! │ ├─ Identifier // Label, for relative-addressed data/code -//! │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. -//! │ └─ Number // Index, absolute address or immediate value. -//! ├─ Line -//! │ └─ Instruction -//! │ ├─ Opcode -//! │ └─ Encoding::Double +//! │ └─ Dst // A destination register has several addressing modes: +//! │ └─ Direct // - The contents of a register +//! │ ╶─ Indexed // - The register, as a pointer, plus a byte index +//! │ ╶─ Absolute // - An immediate absolute address +//! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings. +//! ├─ Stmt +//! │ └─ Insn +//! │ └─ OneArg // A one-operand instruction +//! │ ├─ Opcode //! │ ├─ Width -//! │ ├─ PrimaryOperand -//! │ ├─ Identifier // Label, for relative-addressed data/code -//! │ │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. -//! │ │ └─ Number // Index, absolute address or immediate value. -//! │ └─ SecondaryOperand -//! │ ├─ Identifier // Label, for relative-addressed data/code -//! │ ├─ Register // Direct or indexed register -//! │ └─ Number // Index or absolute address -//! ├─ Line -//! │ └─ Instruction -//! │ ├─ Opcode -//! │ └─ Encoding::Jump -//! │ └─ JumpTarget -//! │ ├─ Identifier // Label -//! │ └─ Number // Even, PC-relative offset in range (-1024..=1022) -//! └─ Line -//! └─ EndOfFile +//! │ └─ Src // A source register has even more addressing modes: +//! │ └─ Direct // - The contents of a register +//! │ ╶─ Indexed // - The register, as a pointer, plus a byte index +//! │ ╶─ Indirect // - The word at the address stored in the register +//! │ // (like Indexed, but without an extension word.) +//! │ ╶─ PostIncrement // - Indirect, but the register is post-incremented by 1 +//! │ // (or, if it's the PC or SP, by 2) +//! │ ╶─ Absolute // - An immediate absolute address +//! │ ╶─ Immediate // - An immediate 16-bit number +//! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings. +//! ├─ Stmt +//! │ └─ Insn +//! │ └─ TwoArg // A two-operand instruction +//! │ ├─ Opcode +//! │ ├─ Width +//! │ ├─ Src +//! │ └─ Dst +//! └─ Stmt +//! └─ Insn +//! └─ Jump // A relative jump instruction +//! ├─ Opcode // The jump condition +//! └─ JumpDst // A jump instruction's destination can be either: +//! └─ Rel // - An even, signed 11-bit offset +//! ╶─ Label // - A label to jump to //! ``` +//! +//! ## Canonicalization +//! After parsing, tokens must be [canonicalized](parser::ast::canonical::Canonicalize): +//! - Expressions which act exclusively on numbers are eagerly evaluated +//! - Expressions which begin with a numeric part are repacked for late evaluation +//! - "Emulated" instructions are desugared into their canonical counterparts +//! +//! ## Assembly +//! The [Assembler](assembler::Assembler) takes an [AST](parser::ast), and +//! 1. Encodes all [Instructions](parser::ast::Instruction) into 16-bit words +//! 2. Records all jump labels, for backpatching +//! 3. Records all expressions, for late evaluation +//! 4. Performs late evaluation and backpatching +//! +//! If a non-canonical instruction is found, the assembler will print a warning, +//! and canonicalize it. pub mod span; diff --git a/src/parser.rs b/src/parser.rs index 451487e..32a0744 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -511,7 +511,7 @@ pub mod error { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ErrorKind { LexError, - /// Returned when [Parsing::ConstExpr] fails without consuming + /// Returned when [Parsing::Expr] fails without consuming NotExpr, DivZero, NonNumeric(Kind), From 618200dc4288f6ff4ee7919084034673d96b490e Mon Sep 17 00:00:00 2001 From: John Breaux Date: Thu, 1 Feb 2024 12:00:55 -0600 Subject: [PATCH 11/12] msp430-asm: Make UI a little bet prettier --- msp430-asm/src/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/msp430-asm/src/main.rs b/msp430-asm/src/main.rs index 910892e..2a6fefa 100644 --- a/msp430-asm/src/main.rs +++ b/msp430-asm/src/main.rs @@ -58,12 +58,12 @@ mod args { } mod repl { - //! The REPL reads a line, parses it, evaluates the line, and prints, in a loop + //! The REPL reads, evaluates, and prints in a loop use super::*; use msp430_asm::cursor::*; use std::io::{stderr, Write}; - /// Prints the line number + /// Formats the line number macro linenr($n: expr) { format_args!("{:4}: ", $n) } @@ -97,13 +97,13 @@ mod repl { line.clear(); printfl!("{}", linenr!(linenr)); } - println!("{}", fg!(Gray, "[EOF]")); + println!("{}", fg!(DarkGray, "[EOF]")); Ok(()) } /// Rewrites the line in OK format, with a green linenr fn format_ok(line: &str, linenr: i32) { - println!("{}{}{}", previous!(1), fg!(Green, "{:4}", linenr!(linenr)), line.trim_end(),); + println!("{}{}{}", previous!(1), fg!(Lime, "{:4}", linenr!(linenr)), line.trim_end(),); } /// Pretty-prints a line error From e8fbae9837e463d2b4e694df7a7daa45cbf1c5e0 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Thu, 1 Feb 2024 12:37:59 -0600 Subject: [PATCH 12/12] Update copyright header --- msp430-asm/src/main.rs | 2 ++ src/assembler.rs | 2 ++ src/lexer.rs | 1 + src/lexer/token.rs | 1 + src/lib.rs | 3 ++- src/parser.rs | 1 + src/parser/ast.rs | 1 + src/preprocessor.rs | 2 ++ src/span.rs | 2 ++ 9 files changed, 14 insertions(+), 1 deletion(-) diff --git a/msp430-asm/src/main.rs b/msp430-asm/src/main.rs index 2a6fefa..1f638c3 100644 --- a/msp430-asm/src/main.rs +++ b/msp430-asm/src/main.rs @@ -1,3 +1,5 @@ +// © 2023-2024 John Breaux +//See LICENSE.md for license //! Simple frontend for the assembler #![feature(decl_macro)] use argp::parse_args_or_exit; diff --git a/src/assembler.rs b/src/assembler.rs index a6e342d..191ccf0 100644 --- a/src/assembler.rs +++ b/src/assembler.rs @@ -1,3 +1,5 @@ +// © 2023-2024 John Breaux +//See LICENSE.md for license //! Assembles a binary using the given [AST](crate::parser::ast) use error::{AResult, ErrorKind::*}; diff --git a/src/lexer.rs b/src/lexer.rs index a4c1290..5013e33 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,4 +1,5 @@ // © 2023-2024 John Breaux +//See LICENSE.md for license //! The [Lexer] turns a [sequence of characters](str) into a stream of //! [lexically-tagged tokens](token) diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 119afb6..33a3ca0 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -1,4 +1,5 @@ // © 2023-2024 John Breaux +//See LICENSE.md for license //! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span] //! //! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`) diff --git a/src/lib.rs b/src/lib.rs index 88338f5..a4afd9a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ -// © 2023 John Breaux +// © 2023-2024 John Breaux +//See LICENSE.md for license //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption //! //! This project aims to assemble any valid msp430 instructions, while including important quality diff --git a/src/parser.rs b/src/parser.rs index 32a0744..9f49ccf 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,4 +1,5 @@ // © 2023-2024 John Breaux +//See LICENSE.md for license //! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast) pub mod ast; diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 8337bb5..afd1fce 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -1,4 +1,5 @@ // © 2023-2024 John Breaux +//See LICENSE.md for license /// Represents MSP430 instructions, use crate::{ lexer::token::{self, Reg, Token}, diff --git a/src/preprocessor.rs b/src/preprocessor.rs index 9df3bfe..a340154 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -1,4 +1,6 @@ // © 2023-2024 John Breaux +//See LICENSE.md for license +//! Preprocesses a token stream by removing and replacing tokens according to `.define` directives use crate::{ lexer::{ diff --git a/src/span.rs b/src/span.rs index 4ca993b..379f691 100644 --- a/src/span.rs +++ b/src/span.rs @@ -1,3 +1,5 @@ +// © 2023-2024 John Breaux +//See LICENSE.md for license //! A [Span] is a [Range] that does not implement [Iterator]. It is a [Copy] type. use std::{ fmt::{Debug, Display},