v0.3.0 #1
| @@ -1,12 +1,12 @@ | |||||||
| unstable_features = true | unstable_features = true | ||||||
| max_width = 120 | max_width = 100 | ||||||
| wrap_comments = true | wrap_comments = true | ||||||
| comment_width = 100 | comment_width = 100 | ||||||
|  |  | ||||||
| # Allow structs to fill an entire line | # Allow structs to fill an entire line | ||||||
| use_small_heuristics = "Max" | use_small_heuristics = "Max" | ||||||
| # Allow small functions on single line | # Allow small functions on single line | ||||||
| fn_single_line = true | # fn_single_line = true | ||||||
|  |  | ||||||
| # Alignment | # Alignment | ||||||
| enum_discrim_align_threshold = 12 | enum_discrim_align_threshold = 12 | ||||||
|   | |||||||
							
								
								
									
										33
									
								
								Cargo.toml
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								Cargo.toml
									
									
									
									
									
								
							| @@ -1,23 +1,24 @@ | |||||||
| [package] | [workspace] | ||||||
| name = "msp430-asm" | members = ["msp430-asm"] | ||||||
| version = "0.2.0" | # default-members = ["msp430-asm"] | ||||||
| edition = "2021" |  | ||||||
| rust-version = "1.70" | [workspace.package] | ||||||
| authors = ["John Breaux <j@soft.fish>"] | authors = ["John Breaux <j@soft.fish>"] | ||||||
|  | version = "0.3.0" | ||||||
|  | license = "MIT" | ||||||
|  | edition = "2021" | ||||||
| publish = false | publish = false | ||||||
|  |  | ||||||
| [features] | [package] | ||||||
| default = [] | name = "libmsp430" | ||||||
|  | authors.workspace = true | ||||||
|  | version.workspace = true | ||||||
|  | license.workspace = true | ||||||
|  | edition.workspace = true | ||||||
|  | publish.workspace = true | ||||||
|  |  | ||||||
| [[example]] |  | ||||||
| name = "msp430-asm" |  | ||||||
| path = "examples/msp430-asm/main.rs" |  | ||||||
| # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||||||
|  |  | ||||||
| [dependencies] | [dependencies] | ||||||
| regex = "1.9.3" | # Provides very quick boolean tests for XID_START and XID_CONTINUE | ||||||
| # TODO: Remove dependency on regex | unicode-ident = "1.0.12" | ||||||
|  |  | ||||||
| [dev-dependencies] |  | ||||||
| anes = { version = "0.1.6" } |  | ||||||
| argp = { version = "0.3.0" } |  | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								LICENSE.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								LICENSE.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  |  The MIT License (MIT) | ||||||
|  |  | ||||||
|  | Copyright © 2023-2024 John Breaux | ||||||
|  |  | ||||||
|  | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: | ||||||
|  |  | ||||||
|  | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. | ||||||
|  |  | ||||||
|  | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||||||
| @@ -1,222 +1,224 @@ | |||||||
|  | //! TODO: rewrite for v0.3.0 | ||||||
| use super::*; | use super::*; | ||||||
|  |  | ||||||
| /// Creates a [Parsable] implementation for an enum whose variants |  | ||||||
| /// are named after other [Parsable] items |  | ||||||
| macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) { |  | ||||||
|     $( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* } |  | ||||||
|     impl ::msp430_asm::parser::parsable::Parsable for $id { |  | ||||||
|         fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|         where T: TokenStream<'text> { |  | ||||||
|             $(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )* |  | ||||||
|             { Err(ParseError::UnrecognizedDirective("".into())) } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     impl TryFrom<&str> for $id { |  | ||||||
|         type Error = ParseError; |  | ||||||
|         fn try_from(value: &str) -> Result<Self, Self::Error> { |  | ||||||
|             Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed()) |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| make_parsable! { | // /// Creates a [Parsable] implementation for an enum whose variants | ||||||
|     #[derive(Debug)] | // /// are named after other [Parsable] items | ||||||
|     pub enum SyntaxFragment { | // macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) { | ||||||
|         Opcode, | //     $( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* } | ||||||
|         PrimaryOperand, | //     impl ::msp430_asm::parser::parsable::Parsable for $id { | ||||||
|         Number, | //         fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> | ||||||
|     } | //         where T: TokenStream<'text> { | ||||||
| } | //             $(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )* | ||||||
|  | //             { Err(ParseError::UnrecognizedDirective("".into())) } | ||||||
|  | //         } | ||||||
|  | //     } | ||||||
|  | //     impl TryFrom<&str> for $id { | ||||||
|  | //         type Error = ParseError; | ||||||
|  | //         fn try_from(value: &str) -> Result<Self, Self::Error> { | ||||||
|  | //             Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed()) | ||||||
|  | //         } | ||||||
|  | //     } | ||||||
|  | // } | ||||||
|  |  | ||||||
| impl SyntaxFragment { | // make_parsable! { | ||||||
|     pub fn info(&self) { | //     #[derive(Debug)] | ||||||
|         match self { | //     pub enum SyntaxFragment { | ||||||
|             SyntaxFragment::Opcode(o) => Self::opcode_info(o), | //         Opcode, | ||||||
|             SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o), | //         PrimaryOperand, | ||||||
|             SyntaxFragment::Number(n) => println!("The number {n}"), | //         Number, | ||||||
|         } | //     } | ||||||
|     } | // } | ||||||
|     fn opcode_info(o: &Opcode) { |  | ||||||
|         let (desc, as_rust) = usage(o); |  | ||||||
|         println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o)); |  | ||||||
|         footer!("https://mspgcc.sourceforge.net/manual/x223.html"); |  | ||||||
|     } |  | ||||||
|     // TODO: re-enable full instruction decoding |  | ||||||
|     // fn encoding_info(e: &Encoding) { |  | ||||||
|     //     match e { |  | ||||||
|     //         Encoding::Single { dst, .. } => Self::operand_info(dst), |  | ||||||
|     //         Encoding::Jump { target } => println!("Jumps to (pc + {target})"), |  | ||||||
|     //         Encoding::Double { src, dst, .. } => { |  | ||||||
|     //             Self::operand_info(src); |  | ||||||
|     //             Self::operand_info(&dst.clone().into()) |  | ||||||
|     //         } |  | ||||||
|     //     } |  | ||||||
|     // } |  | ||||||
|     fn operand_info(o: &PrimaryOperand) { |  | ||||||
|         match o { |  | ||||||
|             PrimaryOperand::Direct(r) => Self::register_info(r), |  | ||||||
|             PrimaryOperand::Indirect(r) => { |  | ||||||
|                 Self::register_info(r); |  | ||||||
|                 println!("Indirect addressing mode: use data pointed to by {r}"); |  | ||||||
|             } |  | ||||||
|             PrimaryOperand::PostInc(r) => { |  | ||||||
|                 Self::register_info(r); |  | ||||||
|                 println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}"); |  | ||||||
|             } |  | ||||||
|             PrimaryOperand::Indexed(r, n) => { |  | ||||||
|                 Self::register_info(r); |  | ||||||
|                 println!("Indexed mode: use the data at {r}[{n}]"); |  | ||||||
|             } |  | ||||||
|             PrimaryOperand::Relative(_) => return, |  | ||||||
|             PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"), |  | ||||||
|             PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"), |  | ||||||
|             PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"), |  | ||||||
|             PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"), |  | ||||||
|             PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"), |  | ||||||
|             PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"), |  | ||||||
|             PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"), |  | ||||||
|             PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"), |  | ||||||
|         } |  | ||||||
|         footer!("https://mspgcc.sourceforge.net/manual/x82.html"); |  | ||||||
|     } |  | ||||||
|     fn register_info(r: &Register) { |  | ||||||
|         use Register as Re; |  | ||||||
|         match r { |  | ||||||
|             Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."), |  | ||||||
|             Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."), |  | ||||||
|             Re::sr => println!( |  | ||||||
|                 "sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC", |  | ||||||
|             ), |  | ||||||
|             Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."), |  | ||||||
|             Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => { |  | ||||||
|                 println!("{r} is a callee-saved general purpose register.") |  | ||||||
|             } |  | ||||||
|             Re::r12 | Re::r13 | Re::r14 | Re::r15 => { |  | ||||||
|                 println!("{r} is a caller-saved general purpose register, allowed for return values.") |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Gets parameter usage information from the opcode's EncodingParser | // impl SyntaxFragment { | ||||||
| pub fn params(opcode: &Opcode) -> &'static str { | //     pub fn info(&self) { | ||||||
|     match opcode.resolve().1 { | //         match self { | ||||||
|         EncodingParser::Jump { target: None } => " target (relative address or label)", | //             SyntaxFragment::Opcode(o) => Self::opcode_info(o), | ||||||
|         EncodingParser::Single { width: None, dst: None } => "[.b] dst", | //             SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o), | ||||||
|         EncodingParser::Single { dst: None, .. } => " dst", | //             SyntaxFragment::Number(n) => println!("The number {n}"), | ||||||
|         EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst", | //         } | ||||||
|         EncodingParser::Double { src: None, .. } => "[.b] src", | //     } | ||||||
|         EncodingParser::Double { dst: None, .. } => "[.b] dst", | //     fn opcode_info(o: &Opcode) { | ||||||
|         EncodingParser::Double { .. } => "[.b]", | //         let (desc, as_rust) = usage(o); | ||||||
|         EncodingParser::Reflexive { reg: None, .. } => "[.b] dst", | //         println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o)); | ||||||
|         _ => "", | //         footer!("https://mspgcc.sourceforge.net/manual/x223.html"); | ||||||
|     } | //     } | ||||||
| } | //     // TODO: re-enable full instruction decoding | ||||||
|  | //     // fn encoding_info(e: &Encoding) { | ||||||
|  | //     //     match e { | ||||||
|  | //     //         Encoding::Single { dst, .. } => Self::operand_info(dst), | ||||||
|  | //     //         Encoding::Jump { target } => println!("Jumps to (pc + {target})"), | ||||||
|  | //     //         Encoding::Double { src, dst, .. } => { | ||||||
|  | //     //             Self::operand_info(src); | ||||||
|  | //     //             Self::operand_info(&dst.clone().into()) | ||||||
|  | //     //         } | ||||||
|  | //     //     } | ||||||
|  | //     // } | ||||||
|  | //     fn operand_info(o: &PrimaryOperand) { | ||||||
|  | //         match o { | ||||||
|  | //             PrimaryOperand::Direct(r) => Self::register_info(r), | ||||||
|  | //             PrimaryOperand::Indirect(r) => { | ||||||
|  | //                 Self::register_info(r); | ||||||
|  | //                 println!("Indirect addressing mode: use data pointed to by {r}"); | ||||||
|  | //             } | ||||||
|  | //             PrimaryOperand::PostInc(r) => { | ||||||
|  | //                 Self::register_info(r); | ||||||
|  | //                 println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}"); | ||||||
|  | //             } | ||||||
|  | //             PrimaryOperand::Indexed(r, n) => { | ||||||
|  | //                 Self::register_info(r); | ||||||
|  | //                 println!("Indexed mode: use the data at {r}[{n}]"); | ||||||
|  | //             } | ||||||
|  | //             PrimaryOperand::Relative(_) => return, | ||||||
|  | //             PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"), | ||||||
|  | //             PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"), | ||||||
|  | //             PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"), | ||||||
|  | //             PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"), | ||||||
|  | //             PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"), | ||||||
|  | //             PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"), | ||||||
|  | //             PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"), | ||||||
|  | //             PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"), | ||||||
|  | //         } | ||||||
|  | //         footer!("https://mspgcc.sourceforge.net/manual/x82.html"); | ||||||
|  | //     } | ||||||
|  | //     fn register_info(r: &Register) { | ||||||
|  | //         use Register as Re; | ||||||
|  | //         match r { | ||||||
|  | //             Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."), | ||||||
|  | //             Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."), | ||||||
|  | //             Re::sr => println!( | ||||||
|  | //                 "sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC", | ||||||
|  | //             ), | ||||||
|  | //             Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."), | ||||||
|  | //             Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => { | ||||||
|  | //                 println!("{r} is a callee-saved general purpose register.") | ||||||
|  | //             } | ||||||
|  | //             Re::r12 | Re::r13 | Re::r14 | Re::r15 => { | ||||||
|  | //                 println!("{r} is a caller-saved general purpose register, allowed for return values.") | ||||||
|  | //             } | ||||||
|  | //         } | ||||||
|  | //     } | ||||||
|  | // } | ||||||
|  |  | ||||||
| pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) { | // // Gets parameter usage information from the opcode's EncodingParser | ||||||
|     match opcode { | // pub fn params(opcode: &Opcode) -> &'static str { | ||||||
|         // Single | //     match opcode.resolve().1 { | ||||||
|         Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"), | //         EncodingParser::Jump { target: None } => " target (relative address or label)", | ||||||
|         Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"), | //         EncodingParser::Single { width: None, dst: None } => "[.b] dst", | ||||||
|         Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"), | //         EncodingParser::Single { dst: None, .. } => " dst", | ||||||
|         Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"), | //         EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst", | ||||||
|         Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"), | //         EncodingParser::Double { src: None, .. } => "[.b] src", | ||||||
|         Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"), | //         EncodingParser::Double { dst: None, .. } => "[.b] dst", | ||||||
|         Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"), | //         EncodingParser::Double { .. } => "[.b]", | ||||||
|         // Jump | //         EncodingParser::Reflexive { reg: None, .. } => "[.b] dst", | ||||||
|         Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"), | //         _ => "", | ||||||
|         Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"), | //     } | ||||||
|         Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"), | // } | ||||||
|         Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"), |  | ||||||
|         Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"), |  | ||||||
|         Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"), |  | ||||||
|         Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"), |  | ||||||
|         Opcode::Jmp => ("Jump unconditionally", "pc += target"), |  | ||||||
|         // Double |  | ||||||
|         Opcode::Mov => ("Copy src into dst", "dst = src"), |  | ||||||
|         Opcode::Add => ("Add src to dst", "dst += src"), |  | ||||||
|         Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"), |  | ||||||
|         Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"), |  | ||||||
|         Opcode::Sub => ("Subtract src from dst", "dst -= src"), |  | ||||||
|         Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"), |  | ||||||
|         Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"), |  | ||||||
|         Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"), |  | ||||||
|         Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"), |  | ||||||
|         Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"), |  | ||||||
|         Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"), |  | ||||||
|         Opcode::And => ("Bitwise And src into dst", "dst &= src"), |  | ||||||
|         // Emulated |  | ||||||
|         Opcode::Nop => ("Does nothing", "{}"), |  | ||||||
|         Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"), |  | ||||||
|         Opcode::Br => ("Branches to the absolute address in src", "pc = src"), |  | ||||||
|         Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"), |  | ||||||
|         Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"), |  | ||||||
|         Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"), |  | ||||||
|         Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"), |  | ||||||
|         Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"), |  | ||||||
|         Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"), |  | ||||||
|         Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"), |  | ||||||
|         Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"), |  | ||||||
|         Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"), |  | ||||||
|         Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"), |  | ||||||
|         Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"), |  | ||||||
|         Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"), |  | ||||||
|         Opcode::Clr => ("Sets dst to 0", "dst = 0"), |  | ||||||
|         Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""), |  | ||||||
|         Opcode::Dec => ("Decrements dst", "dst -= 1"), |  | ||||||
|         Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"), |  | ||||||
|         Opcode::Inc => ("Increments dst", "dst += 1"), |  | ||||||
|         Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"), |  | ||||||
|         Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"), |  | ||||||
|         Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"), |  | ||||||
|         Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"), |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| const SINGLE: [Opcode; 7] = | // pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) { | ||||||
|     [Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti]; | //     match opcode { | ||||||
|  | //         // Single | ||||||
|  | //         Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"), | ||||||
|  | //         Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"), | ||||||
|  | //         Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"), | ||||||
|  | //         Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"), | ||||||
|  | //         Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"), | ||||||
|  | //         Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"), | ||||||
|  | //         Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"), | ||||||
|  | //         // Jump | ||||||
|  | //         Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"), | ||||||
|  | //         Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"), | ||||||
|  | //         Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"), | ||||||
|  | //         Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"), | ||||||
|  | //         Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"), | ||||||
|  | //         Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"), | ||||||
|  | //         Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"), | ||||||
|  | //         Opcode::Jmp => ("Jump unconditionally", "pc += target"), | ||||||
|  | //         // Double | ||||||
|  | //         Opcode::Mov => ("Copy src into dst", "dst = src"), | ||||||
|  | //         Opcode::Add => ("Add src to dst", "dst += src"), | ||||||
|  | //         Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"), | ||||||
|  | //         Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"), | ||||||
|  | //         Opcode::Sub => ("Subtract src from dst", "dst -= src"), | ||||||
|  | //         Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"), | ||||||
|  | //         Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"), | ||||||
|  | //         Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"), | ||||||
|  | //         Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"), | ||||||
|  | //         Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"), | ||||||
|  | //         Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"), | ||||||
|  | //         Opcode::And => ("Bitwise And src into dst", "dst &= src"), | ||||||
|  | //         // Emulated | ||||||
|  | //         Opcode::Nop => ("Does nothing", "{}"), | ||||||
|  | //         Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"), | ||||||
|  | //         Opcode::Br => ("Branches to the absolute address in src", "pc = src"), | ||||||
|  | //         Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"), | ||||||
|  | //         Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"), | ||||||
|  | //         Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"), | ||||||
|  | //         Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"), | ||||||
|  | //         Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"), | ||||||
|  | //         Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"), | ||||||
|  | //         Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"), | ||||||
|  | //         Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"), | ||||||
|  | //         Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"), | ||||||
|  | //         Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"), | ||||||
|  | //         Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"), | ||||||
|  | //         Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"), | ||||||
|  | //         Opcode::Clr => ("Sets dst to 0", "dst = 0"), | ||||||
|  | //         Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""), | ||||||
|  | //         Opcode::Dec => ("Decrements dst", "dst -= 1"), | ||||||
|  | //         Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"), | ||||||
|  | //         Opcode::Inc => ("Increments dst", "dst += 1"), | ||||||
|  | //         Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"), | ||||||
|  | //         Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"), | ||||||
|  | //         Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"), | ||||||
|  | //         Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"), | ||||||
|  | //     } | ||||||
|  | // } | ||||||
|  |  | ||||||
| const JUMP: [Opcode; 8] = | // const SINGLE: [Opcode; 7] = | ||||||
|     [Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp]; | //     [Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti]; | ||||||
|  |  | ||||||
| #[rustfmt::skip] | // const JUMP: [Opcode; 8] = | ||||||
| const DOUBLE: [Opcode; 12] = [ | //     [Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp]; | ||||||
|     Opcode::Mov,  Opcode::Add,  Opcode::Addc, Opcode::Subc, Opcode::Sub,  Opcode::Cmp, |  | ||||||
|     Opcode::Dadd, Opcode::Bit,  Opcode::Bic,  Opcode::Bis,  Opcode::Xor,  Opcode::And, |  | ||||||
| ]; |  | ||||||
| #[rustfmt::skip] |  | ||||||
| const SIMULATED: [Opcode; 24] = [ |  | ||||||
|     Opcode::Nop,  Opcode::Pop,  Opcode::Br,   Opcode::Ret,  Opcode::Clrc, Opcode::Setc, |  | ||||||
|     Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint, |  | ||||||
|     Opcode::Rla,  Opcode::Rlc,  Opcode::Inv,  Opcode::Clr,  Opcode::Tst,  Opcode::Dec, |  | ||||||
|     Opcode::Decd, Opcode::Inc,  Opcode::Incd, Opcode::Adc,  Opcode::Dadc, Opcode::Sbc, |  | ||||||
| ]; |  | ||||||
|  |  | ||||||
| pub fn list_opcodes() { | // #[rustfmt::skip] | ||||||
|     let mut stdout = std::io::stdout().lock(); | // const DOUBLE: [Opcode; 12] = [ | ||||||
|     header!(stdout, "Single-operand instructions:"); | //     Opcode::Mov,  Opcode::Add,  Opcode::Addc, Opcode::Subc, Opcode::Sub,  Opcode::Cmp, | ||||||
|     let _ = write_opcode_list(&mut stdout, &SINGLE); | //     Opcode::Dadd, Opcode::Bit,  Opcode::Bic,  Opcode::Bis,  Opcode::Xor,  Opcode::And, | ||||||
|     header!(stdout, "Relative Jump instructions:"); | // ]; | ||||||
|     let _ = write_opcode_list(&mut stdout, &JUMP); | // #[rustfmt::skip] | ||||||
|     header!(stdout, "Double-operand instructions:"); | // const SIMULATED: [Opcode; 24] = [ | ||||||
|     let _ = write_opcode_list(&mut stdout, &DOUBLE); | //     Opcode::Nop,  Opcode::Pop,  Opcode::Br,   Opcode::Ret,  Opcode::Clrc, Opcode::Setc, | ||||||
|     header!(stdout, "Simulated instructions:"); | //     Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint, | ||||||
|     let _ = write_opcode_list(&mut stdout, &SIMULATED); | //     Opcode::Rla,  Opcode::Rlc,  Opcode::Inv,  Opcode::Clr,  Opcode::Tst,  Opcode::Dec, | ||||||
| } | //     Opcode::Decd, Opcode::Inc,  Opcode::Incd, Opcode::Adc,  Opcode::Dadc, Opcode::Sbc, | ||||||
|  | // ]; | ||||||
|  |  | ||||||
| fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> { | // pub fn list_opcodes() { | ||||||
|     for (idx, opcode) in list.iter().enumerate() { | //     let mut stdout = std::io::stdout().lock(); | ||||||
|         write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?; | //     header!(stdout, "Single-operand instructions:"); | ||||||
|     } | //     let _ = write_opcode_list(&mut stdout, &SINGLE); | ||||||
|     if list.len() % 6 != 0 { | //     header!(stdout, "Relative Jump instructions:"); | ||||||
|         writeln!(f)?; | //     let _ = write_opcode_list(&mut stdout, &JUMP); | ||||||
|     } | //     header!(stdout, "Double-operand instructions:"); | ||||||
|     Ok(()) | //     let _ = write_opcode_list(&mut stdout, &DOUBLE); | ||||||
| } | //     header!(stdout, "Simulated instructions:"); | ||||||
|  | //     let _ = write_opcode_list(&mut stdout, &SIMULATED); | ||||||
|  | // } | ||||||
|  |  | ||||||
| macro header ($f:ident, $($x: expr),+) { | // fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> { | ||||||
|     {write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();} | //     for (idx, opcode) in list.iter().enumerate() { | ||||||
| } | //         write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?; | ||||||
| macro footer ($($x: expr),+) { | //     } | ||||||
|     {print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);} | //     if list.len() % 6 != 0 { | ||||||
| } | //         writeln!(f)?; | ||||||
|  | //     } | ||||||
|  | //     Ok(()) | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // macro header ($f:ident, $($x: expr),+) { | ||||||
|  | //     {write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();} | ||||||
|  | // } | ||||||
|  | // macro footer ($($x: expr),+) { | ||||||
|  | //     {print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);} | ||||||
|  | // } | ||||||
|   | |||||||
| @@ -3,59 +3,63 @@ | |||||||
| // https://mspgcc.sourceforge.net/manual/ln16.html | // https://mspgcc.sourceforge.net/manual/ln16.html | ||||||
| #![feature(decl_macro)] | #![feature(decl_macro)] | ||||||
|  |  | ||||||
| use anes::{Color, ResetAttributes, SetForegroundColor}; | fn main() { | ||||||
| use msp430_asm::parser::preamble::*; |     println!("Hello, world!") | ||||||
| use msp430_asm::preamble::*; |  | ||||||
| use std::{ |  | ||||||
|     error::Error, |  | ||||||
|     io::{stdin, IsTerminal, Write}, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| type AsmResult<T> = Result<T, Box<dyn Error>>; |  | ||||||
|  |  | ||||||
| mod data; |  | ||||||
|  |  | ||||||
| fn main() -> AsmResult<()> { |  | ||||||
|     if stdin().is_terminal() { |  | ||||||
|         hello(); |  | ||||||
|     } |  | ||||||
|     repl() |  | ||||||
| } | } | ||||||
|  |  | ||||||
| fn hello() { | // use anes::{Color, ResetAttributes, SetForegroundColor}; | ||||||
|     println!( | // use msp430_asm::parser::preamble::*; | ||||||
|         "{}{} v{} | // use msp430_asm::preamble::*; | ||||||
| This software contains instruction and register descriptions adapted from | // use std::{ | ||||||
| the mspgcc project's fantastic documentation, which is licensed under the GPL. | //     error::Error, | ||||||
| https://mspgcc.sourceforge.net/manual/book1.html{}\n", | //     io::{stdin, IsTerminal, Write}, | ||||||
|         SetForegroundColor(Color::DarkGray), | // }; | ||||||
|         env!("CARGO_BIN_NAME"), |  | ||||||
|         env!("CARGO_PKG_VERSION"), |  | ||||||
|         ResetAttributes |  | ||||||
|     ); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| fn repl() -> AsmResult<()> { | // type AsmResult<T> = Result<T, Box<dyn Error>>; | ||||||
|     printflush!("> "); |  | ||||||
|     let mut line = String::new(); |  | ||||||
|     while let Ok(len) = stdin().read_line(&mut line) { |  | ||||||
|         match len { |  | ||||||
|             0 => break, // No newline (reached EOF) |  | ||||||
|             1 => (),    // Line is empty |  | ||||||
|             _ => { |  | ||||||
|                 if line.starts_with('?') || line.starts_with("help") { |  | ||||||
|                     data::list_opcodes() |  | ||||||
|                 } else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) { |  | ||||||
|                     sf.info(); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         printflush!("> "); |  | ||||||
|         line.clear(); |  | ||||||
|     } |  | ||||||
|     Ok(()) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| macro printflush ($($x: expr),+) { | // mod data; | ||||||
|     {print!($($x),+); let _ = ::std::io::stdout().flush();} |  | ||||||
| } | // fn main() -> AsmResult<()> { | ||||||
|  | //     if stdin().is_terminal() { | ||||||
|  | //         hello(); | ||||||
|  | //     } | ||||||
|  | //     repl() | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // fn hello() { | ||||||
|  | //     println!( | ||||||
|  | //         "{}{} v{} | ||||||
|  | // This software contains instruction and register descriptions adapted from | ||||||
|  | // the mspgcc project's fantastic documentation, which is licensed under the GPL. | ||||||
|  | // https://mspgcc.sourceforge.net/manual/book1.html{}\n", | ||||||
|  | //         SetForegroundColor(Color::DarkGray), | ||||||
|  | //         env!("CARGO_BIN_NAME"), | ||||||
|  | //         env!("CARGO_PKG_VERSION"), | ||||||
|  | //         ResetAttributes | ||||||
|  | //     ); | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // fn repl() -> AsmResult<()> { | ||||||
|  | //     printflush!("> "); | ||||||
|  | //     let mut line = String::new(); | ||||||
|  | //     while let Ok(len) = stdin().read_line(&mut line) { | ||||||
|  | //         match len { | ||||||
|  | //             0 => break, // No newline (reached EOF) | ||||||
|  | //             1 => (),    // Line is empty | ||||||
|  | //             _ => { | ||||||
|  | //                 if line.starts_with('?') || line.starts_with("help") { | ||||||
|  | //                     data::list_opcodes() | ||||||
|  | //                 } else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) { | ||||||
|  | //                     sf.info(); | ||||||
|  | //                 } | ||||||
|  | //             } | ||||||
|  | //         } | ||||||
|  | //         printflush!("> "); | ||||||
|  | //         line.clear(); | ||||||
|  | //     } | ||||||
|  | //     Ok(()) | ||||||
|  | // } | ||||||
|  |  | ||||||
|  | // macro printflush ($($x: expr),+) { | ||||||
|  | //     {print!($($x),+); let _ = ::std::io::stdout().flush();} | ||||||
|  | // } | ||||||
|   | |||||||
							
								
								
									
										47
									
								
								grammar.ebnf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								grammar.ebnf
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | |||||||
|  | (* Partical grammar for msp430-asm *) | ||||||
|  |  | ||||||
|  | Line = Label | Directive | Insn ; | ||||||
|  |  | ||||||
|  | Insn = NoEm | OneEm | Special | OneArg | TwoArg | Jump ; | ||||||
|  |  | ||||||
|  | (* Instruction formats *) | ||||||
|  | NoEm    = OpNoEm ; | ||||||
|  | OneEm   = OpOneEm Dst ; | ||||||
|  | Special = "reti" | "br" Src ; | ||||||
|  | OneArg  = OpOneArg Src ; | ||||||
|  | TwoArg  = OpTwoArg Src ','? Dst ; | ||||||
|  | Jump    = OpJump Expr ; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | (* Addressing modes *) | ||||||
|  | Src = '#' (SrcSpecial | '-'? Expr) | ||||||
|  |     | Absolute | ||||||
|  |     | PostInc | ||||||
|  |     | Indexed | ||||||
|  |     | Direct ; | ||||||
|  |  | ||||||
|  | Dst = '#' (SrcSpecial | Expr) | ||||||
|  |     | Absolute | ||||||
|  |     | Indirect | ||||||
|  |     | Indexed | ||||||
|  |     | Direct ; | ||||||
|  |  | ||||||
|  | Direct     = Reg ; | ||||||
|  | Indirect   = '@' Reg ; | ||||||
|  | PostInc    = Indirect '+'? ; | ||||||
|  | Indexed    = Number '(' Reg ')' ; | ||||||
|  | Immediate  = '#' Expr ; | ||||||
|  | Absolute   = '&' Expr ; | ||||||
|  | SrcSpecial = 0 | 1 | '-' 1 | 0xffff | 2 | 4 | 8 ; | ||||||
|  | DstSpecial = 0 | 1 ; | ||||||
|  |  | ||||||
|  | Expr       = '-'? Number ; | ||||||
|  |  | ||||||
|  | (* Pseudo-terminals *) | ||||||
|  | Reg = "pc"  | "sp"  | "sr"  | "cg"  | ||||||
|  |     | "r0"  | "r1"  | "r2"  | "r3"  | ||||||
|  |     | "r4"  | "r5"  | "r6"  | "r7"  | ||||||
|  |     | "r8"  | "r9"  | "r10" | "r11" | ||||||
|  |     | "r12" | "r13" | "r14" | "r15" ; | ||||||
|  | Identifier = ID_START ID_CONTINUE* ; | ||||||
|  | Number = '-'? DIGIT ; | ||||||
							
								
								
									
										13
									
								
								msp430-asm/Cargo.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								msp430-asm/Cargo.toml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | [package] | ||||||
|  | name = "msp430-asm" | ||||||
|  | authors.workspace = true | ||||||
|  | version.workspace = true | ||||||
|  | license.workspace = true | ||||||
|  | edition.workspace = true | ||||||
|  | publish.workspace = true | ||||||
|  |  | ||||||
|  | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||||||
|  |  | ||||||
|  | [dependencies] | ||||||
|  | libmsp430 = { path = ".." } | ||||||
|  | argp = { version = "0.3.0" } | ||||||
							
								
								
									
										135
									
								
								msp430-asm/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								msp430-asm/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | |||||||
|  | //! Helper library for msp430-asm | ||||||
|  | #![feature(decl_macro)] | ||||||
|  | pub mod split_twice { | ||||||
|  |     /// Slices a collection into a beginning, middle, and end, based on two unordered indices | ||||||
|  |     pub trait SplitTwice<'t> { | ||||||
|  |         type Slice; | ||||||
|  |         type Idx; | ||||||
|  |         /// Splits a collection into a beginning, middle, and end slice, | ||||||
|  |         /// based on two unordered indices | ||||||
|  |         /// | ||||||
|  |         /// # Examples | ||||||
|  |         /// ```rust | ||||||
|  |         /// # use msp430_asm::split_twice::SplitTwice; | ||||||
|  |         /// let string = "foo,bar,baz"; | ||||||
|  |         /// let (foo, bar, baz) = string.split_twice(4, 8); | ||||||
|  |         /// assert_eq!(foo, "foo,"); | ||||||
|  |         /// assert_eq!(bar, "bar,"); | ||||||
|  |         /// assert_eq!(baz, "baz"); | ||||||
|  |         /// ``` | ||||||
|  |         fn split_twice( | ||||||
|  |             &'t self, | ||||||
|  |             a: Self::Idx, | ||||||
|  |             b: Self::Idx, | ||||||
|  |         ) -> (Self::Slice, Self::Slice, Self::Slice); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl<'t, T: 't> SplitTwice<'t> for [T] { | ||||||
|  |         type Slice = &'t [T]; | ||||||
|  |         type Idx = usize; | ||||||
|  |         fn split_twice( | ||||||
|  |             &'t self, | ||||||
|  |             a: Self::Idx, | ||||||
|  |             b: Self::Idx, | ||||||
|  |         ) -> (Self::Slice, Self::Slice, Self::Slice) { | ||||||
|  |             let (a, b) = if a < b { (a, b) } else { (b, a) }; | ||||||
|  |             let (mid, end) = | ||||||
|  |                 if b < self.len() { self.split_at(b) } else { (self, Default::default()) }; | ||||||
|  |             let (start, mid) = | ||||||
|  |                 if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) }; | ||||||
|  |             (start, mid, end) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl<'t> SplitTwice<'t> for str { | ||||||
|  |         type Slice = &'t str; | ||||||
|  |         type Idx = usize; | ||||||
|  |         fn split_twice( | ||||||
|  |             &'t self, | ||||||
|  |             a: Self::Idx, | ||||||
|  |             b: Self::Idx, | ||||||
|  |         ) -> (Self::Slice, Self::Slice, Self::Slice) { | ||||||
|  |             let (a, b) = if a < b { (a, b) } else { (b, a) }; | ||||||
|  |             let (mid, end) = | ||||||
|  |                 if b < self.len() { self.split_at(b) } else { (self, Default::default()) }; | ||||||
|  |             let (start, mid) = | ||||||
|  |                 if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) }; | ||||||
|  |             (start, mid, end) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub mod cursor { | ||||||
|  |     use std::fmt::{Arguments, Display}; | ||||||
|  |  | ||||||
|  |     /// Moves to the {line}th previous line | ||||||
|  |     pub macro previous($line:literal) { | ||||||
|  |         csi!("{}F", $line) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Injects a Command Sequence Introducer | ||||||
|  |     pub macro csi($($t:tt)*) { | ||||||
|  |         format_args!("\x1b[{}", format_args!($($t)*)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Formats the args with a foreground [Color] | ||||||
|  |     pub macro fg($fg:expr, $($t:tt)*) { | ||||||
|  |         Colorized::new(Some($fg), None, format_args!($($t)*)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Formats the args with a background [Color] | ||||||
|  |     pub macro bg($bg:expr, $(t:tt)*) { | ||||||
|  |         Colorized::new(None, Some($bg), format_args!($($t)*)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Formats the args with both a foreground and background [Color] | ||||||
|  |     pub macro color($fg:expr, $bg:expr, $($t:tt)*) { | ||||||
|  |         Colorized::new(Some($fg), Some($bg), format_args!($($t)*)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  |     pub enum Color { | ||||||
|  |         #[default] | ||||||
|  |         Black    = 30, | ||||||
|  |         Red, | ||||||
|  |         Green, | ||||||
|  |         Yellow, | ||||||
|  |         Blue, | ||||||
|  |         Magenta, | ||||||
|  |         Cyan, | ||||||
|  |         Gray, | ||||||
|  |         DarkGray = 90, | ||||||
|  |         Pink, | ||||||
|  |         Lime, | ||||||
|  |         Sunflower, | ||||||
|  |         SkyBlue, | ||||||
|  |         HotPink, | ||||||
|  |         Turquoise, | ||||||
|  |         White, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[derive(Clone, Copy, Debug)] | ||||||
|  |     pub struct Colorized<'args> { | ||||||
|  |         fg: Option<Color>, | ||||||
|  |         bg: Option<Color>, | ||||||
|  |         args: Arguments<'args>, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl<'t> Colorized<'t> { | ||||||
|  |         pub fn new(fg: Option<Color>, bg: Option<Color>, args: Arguments<'t>) -> Self { | ||||||
|  |             Self { fg, bg, args } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for Colorized<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let &Self { fg, bg, args } = self; | ||||||
|  |             if let Some(fg) = fg { | ||||||
|  |                 write!(f, "{}", csi!("{}m", fg as u8))?; | ||||||
|  |             } | ||||||
|  |             if let Some(bg) = bg { | ||||||
|  |                 write!(f, "{}", csi!("{}m", bg as u8 + 10))?; | ||||||
|  |             } | ||||||
|  |             write!(f, "{args}{}", csi!("0m")) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -1,7 +1,17 @@ | |||||||
|  | // © 2023-2024 John Breaux
 | ||||||
|  | //See LICENSE.md for license
 | ||||||
| //! Simple frontend for the assembler
 | //! Simple frontend for the assembler
 | ||||||
| #![feature(decl_macro)] | #![feature(decl_macro)] | ||||||
| use argp::parse_args_or_exit; | use argp::parse_args_or_exit; | ||||||
| use msp430_asm::preamble::*; | use libmsp430::{ | ||||||
|  |     assembler::Assemble, | ||||||
|  |     parser::ast::{canonical::Canonicalize, *}, | ||||||
|  |     parser::{error::Error as PError, Parser}, | ||||||
|  | }; | ||||||
|  | use msp430_asm::{ | ||||||
|  |     cursor::{fg, Color::*}, | ||||||
|  |     split_twice::SplitTwice, | ||||||
|  | }; | ||||||
| use std::{ | use std::{ | ||||||
|     error::Error, |     error::Error, | ||||||
|     io::{stdin, IsTerminal, Read}, |     io::{stdin, IsTerminal, Read}, | ||||||
| @@ -11,16 +21,30 @@ fn main() -> Result<(), Box<dyn Error>> { | |||||||
|     let mut buf = String::new(); |     let mut buf = String::new(); | ||||||
|     if let Some(file) = parse_args_or_exit::<args::Args>(argp::DEFAULT).file { |     if let Some(file) = parse_args_or_exit::<args::Args>(argp::DEFAULT).file { | ||||||
|         buf = std::fs::read_to_string(file)?; |         buf = std::fs::read_to_string(file)?; | ||||||
|     } else if !stdin().is_terminal() { |     } else if stdin().is_terminal() { | ||||||
|         // if stdin is not a terminal, don't parsecheck each line.
 |  | ||||||
|         stdin().lock().read_to_string(&mut buf)?; |  | ||||||
|     } else { |  | ||||||
|         // if stdin is a terminal, enter parse-checked REPL mode.
 |         // if stdin is a terminal, enter parse-checked REPL mode.
 | ||||||
|         repl::repl(&mut buf)?; |         repl::repl(&mut buf)?; | ||||||
|  |     } else { | ||||||
|  |         // if stdin is not a terminal, don't parsecheck each line.
 | ||||||
|  |         stdin().lock().read_to_string(&mut buf)?; | ||||||
|     } |     } | ||||||
|     asm(&buf) |     asm(&buf) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // Parses and assembles a buffer, then prints it in hex to stdout
 | ||||||
|  | fn asm(buf: &str) -> Result<(), Box<dyn Error>> { | ||||||
|  |     match Parser::new(buf).parse::<Statements>()?.to_canonical().assemble() { | ||||||
|  |         Err(error) => println!("{error}"), | ||||||
|  |         Ok(out) => { | ||||||
|  |             for word in out { | ||||||
|  |                 print!("{:04x} ", word.swap_bytes()) | ||||||
|  |             } | ||||||
|  |             println!(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     Ok(()) | ||||||
|  | } | ||||||
|  | 
 | ||||||
| mod args { | mod args { | ||||||
|     use argp::FromArgs; |     use argp::FromArgs; | ||||||
|     use std::path::PathBuf; |     use std::path::PathBuf; | ||||||
| @@ -34,41 +58,29 @@ mod args { | |||||||
|         pub file: Option<PathBuf>, |         pub file: Option<PathBuf>, | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | 
 | ||||||
| mod repl { | mod repl { | ||||||
|  |     //! The REPL reads, evaluates, and prints in a loop
 | ||||||
|     use super::*; |     use super::*; | ||||||
|     use anes::{Color, MoveCursorToPreviousLine, ResetAttributes, SetForegroundColor}; |     use msp430_asm::cursor::*; | ||||||
|     use msp430_asm::{ |  | ||||||
|         assembler::error::AssemblyError, error::Error as MspError, lexer::error::LexError, parser::error::ParseError, |  | ||||||
|     }; |  | ||||||
|     use std::io::{stderr, Write}; |     use std::io::{stderr, Write}; | ||||||
| 
 | 
 | ||||||
|     macro color ($color: expr, $fmt: literal, $($str: expr),*) { |     /// Formats the line number
 | ||||||
|         format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)* ::anes::ResetAttributes) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     macro linenr($n: expr) { |     macro linenr($n: expr) { | ||||||
|         format_args!("{:4}: ", $n) |         format_args!("{:4}: ", $n) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     macro printflush ($($x: expr),+) { |     /// [println], but without the newline
 | ||||||
|  |     macro printfl ($($x: expr),+) { | ||||||
|         {print!($($x),+); let _ = ::std::io::stdout().flush();} |         {print!($($x),+); let _ = ::std::io::stdout().flush();} | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     macro move_cursor($x:expr, $y:expr) { |     /// Runs the read-evaluate-print loop
 | ||||||
|         format_args!("{}{}", ::anes::MoveCursorToPreviousLine($x), "") |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     pub fn repl(buf: &mut String) -> Result<(), Box<dyn Error>> { |     pub fn repl(buf: &mut String) -> Result<(), Box<dyn Error>> { | ||||||
|         let mut line = String::new(); |         let mut line = String::new(); | ||||||
|         let mut linenr = 1; |         let mut linenr = 1; | ||||||
|         println!( |         println!("{}", fg!(DarkGray, "{} v{}", env!("CARGO_BIN_NAME"), env!("CARGO_PKG_VERSION"))); | ||||||
|             "{}{} v{}{}", |         printfl!("{}", linenr!(linenr)); | ||||||
|             SetForegroundColor(Color::DarkGray), |  | ||||||
|             env!("CARGO_BIN_NAME"), |  | ||||||
|             env!("CARGO_PKG_VERSION"), |  | ||||||
|             ResetAttributes |  | ||||||
|         ); |  | ||||||
|         printflush!("{}", linenr!(linenr)); |  | ||||||
|         while let Ok(len) = stdin().read_line(&mut line) { |         while let Ok(len) = stdin().read_line(&mut line) { | ||||||
|             match len { |             match len { | ||||||
|                 0 => break,    // No newline (reached EOF)
 |                 0 => break,    // No newline (reached EOF)
 | ||||||
| @@ -76,61 +88,36 @@ mod repl { | |||||||
|                 _ => (), |                 _ => (), | ||||||
|             } |             } | ||||||
|             // Try to parse this line in isolation (this restricts preprocessing)
 |             // Try to parse this line in isolation (this restricts preprocessing)
 | ||||||
|             match Parser::default().parse(&line) { |             match Parser::new(&line).parse::<Statements>() { | ||||||
|                 Err(error) => errpp(&line, linenr, &error.into()), |                 Err(error) => format_err(&line, linenr, &error), | ||||||
|                 Ok(_) => { |                 Ok(_) => { | ||||||
|                     okpp(&line, linenr); |                     format_ok(&line, linenr); | ||||||
|                     *buf += &line; |                     *buf += &line; | ||||||
|                     linenr += 1; |                     linenr += 1; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             line.clear(); |             line.clear(); | ||||||
|             printflush!("{}", linenr!(linenr)); |             printfl!("{}", linenr!(linenr)); | ||||||
|         } |         } | ||||||
|         println!(); |         println!("{}", fg!(DarkGray, "[EOF]")); | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fn okpp(line: &str, linenr: i32) { |     /// Rewrites the line in OK format, with a green linenr
 | ||||||
|         println!("{}{}{}", move_cursor!(1, 5), color!(Color::Green, "{:4}", linenr!(linenr)), line.trim_end(),); |     fn format_ok(line: &str, linenr: i32) { | ||||||
|  |         println!("{}{}{}", previous!(1), fg!(Lime, "{:4}", linenr!(linenr)), line.trim_end(),); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Pretty-prints a line error
 |     /// Pretty-prints a line error
 | ||||||
|     fn errpp(line: &str, linenr: i32, err: &msp430_asm::error::Error) { |     fn format_err(line: &str, linenr: i32, err: &PError) { | ||||||
|  |         let loc = err.loc; | ||||||
|         if stderr().is_terminal() { |         if stderr().is_terminal() { | ||||||
|             let line = line.trim_end(); |             let line = line.trim_end(); | ||||||
|             eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Color::Red, "{}", linenr!(linenr))); |             eprint!("{}{}", previous!(1), fg!(Red, "{}", linenr!(linenr))); | ||||||
|             match err { |             let (start, mid, end) = line.split_twice(loc.start, loc.end); | ||||||
|                 // TODO: use a recursive enum to store all valid states
 |             eprintln!("{start}{}{end} {}", fg!(Red, "{}", mid), fg!(DarkGray, "; {}", err)); | ||||||
|                 MspError::LexError(LexError::Contextual(c, e)) |  | ||||||
|                 | MspError::ParseError(ParseError::LexError(LexError::Contextual(c, e))) |  | ||||||
|                 | MspError::AssemblyError(AssemblyError::ParseError(ParseError::LexError(LexError::Contextual( |  | ||||||
|                     c, |  | ||||||
|                     e, |  | ||||||
|                 )))) => { |  | ||||||
|                     let (start, end) = line.split_at(c.position() - 1); |  | ||||||
|                     eprintln!("{start}{} ({e})", color!(Color::Red, "{}", end)); |  | ||||||
|                 } |  | ||||||
|                 _ => { |  | ||||||
|                     eprintln!("{} ({err})", color!(Color::Red, "{}", line)); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } else { |         } else { | ||||||
|             eprintln!("{} ({err})", line.trim()) |             eprintln!("{} ({err})", line.trim()) | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 |  | ||||||
| // Parses and assembles a buffer, then prints it in hex to stdout
 |  | ||||||
| fn asm(buf: &str) -> Result<(), Box<dyn Error>> { |  | ||||||
|     match Assembler::assemble(&Parser::default().parse(&buf)?) { |  | ||||||
|         Err(error) => println!("{error}"), |  | ||||||
|         Ok(out) => { |  | ||||||
|             for word in out { |  | ||||||
|                 print!("{:04x} ", word.swap_bytes()) |  | ||||||
|             } |  | ||||||
|             println!(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     Ok(()) |  | ||||||
| } |  | ||||||
							
								
								
									
										99
									
								
								sample-asm/shellcode.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								sample-asm/shellcode.asm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | |||||||
|  | ; © 2023-2024 John Breaux | ||||||
|  | ; Comtains spoilers for Microcorruption Halifax! Be warned! | ||||||
|  | ; just hash the first 0x140 B and stick them in memory | ||||||
|  |  | ||||||
|  |  | ||||||
|  | const: | ||||||
|  | .define msize  0x1    ; length of each hash in bytes | ||||||
|  | .define hsize  0x3    ; bytes kept per hash (only needs to be 3 to determine 1 byte of sram) | ||||||
|  | .define sr_len 0x140  ; number of bytes in sram to dump | ||||||
|  | .define ha_len 0x3c0  ; number of bytes in hash array (hsize * sr_len) | ||||||
|  | .define haddr  0x7000 ; address of the big hash array | ||||||
|  | .define iaddr  0x8000 ; address of the sram input buffer | ||||||
|  | .define kaddr  0x9000 ; address of the key buffer | ||||||
|  | external_data: | ||||||
|  | .define HEX_LUT 0x4710; "0123456789ABCDEF" | ||||||
|  | external_func: | ||||||
|  | ; INT(int interrupt, ...) | ||||||
|  | .define INT             #0x4550 | ||||||
|  | ; getsn(void *dest, size_t len) | ||||||
|  | .define getsn           #0x4568 | ||||||
|  | ; putchar(char character) | ||||||
|  | .define putchar         #0x4578 | ||||||
|  | ; puts(char *str) | ||||||
|  | .define puts            #0x4586 | ||||||
|  | ; memcpy(void *dest, void *src, size_t len) | ||||||
|  | .define memcpy          #0x45a4 | ||||||
|  | ; sha256_internal(void * sram_addr, size_t sr_len, void * sha_buf) | ||||||
|  | .define sha256_internal #0x45b6 | ||||||
|  | ; memset(void* buf, char value, size_t length) | ||||||
|  | .define memset          #0x45c8 | ||||||
|  |  | ||||||
|  | get_sram_hashes: | ||||||
|  |     clr   r11                   ; loop variable in r11 | ||||||
|  |     mov   #msize, r14           ; r14 = 1 | ||||||
|  |     mov   #haddr, r13           ; set destination to 0x8000 | ||||||
|  |     sr_loop: | ||||||
|  |         mov   r11, r15          ; mov addr r15 | ||||||
|  |         call  sha256_internal   ; <sha256_internal> | ||||||
|  |         add   #hsize, r13       ; keep 3 bytes of the output | ||||||
|  |         inc   r11               ; inc r11 | ||||||
|  |         cmp   #sr_len, r11      ; do that 0x1000 times | ||||||
|  |         jnc   sr_loop | ||||||
|  |  | ||||||
|  | print_hex: | ||||||
|  |     clr   r11; | ||||||
|  |     ph_loop: | ||||||
|  |         mov.b haddr(r11), r14 | ||||||
|  |         mov.b r14, r15 | ||||||
|  |         rra   r15               ; using rra here instead of rra.b means the value won't roll into the highest bit | ||||||
|  |         rra   r15               ; which negates the need to and 0xf, r15 | ||||||
|  |         rra   r15 | ||||||
|  |         rra   r15 | ||||||
|  |         clrc | ||||||
|  |         and   #0xf, r14 | ||||||
|  |         mov.b HEX_LUT(r15), r15 | ||||||
|  |         call  putchar           ; <putchar> | ||||||
|  |         mov.b HEX_LUT(r14), r15 | ||||||
|  |         call  putchar           ; <putchar> | ||||||
|  |         inc   r11               ; inc r11 | ||||||
|  |         cmp   #ha_len, r11      ; do that sram_length*3 times | ||||||
|  |         jnc   ph_loop | ||||||
|  |  | ||||||
|  |     mov.b #0xa, r15             ; '\n' | ||||||
|  |     call  #0x4578               ; putchar ('\n') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | take_input: | ||||||
|  |     mov   #sr_len, r14 | ||||||
|  |     mov   #iaddr, r15 | ||||||
|  |     call  getsn                 ; <getsn> | ||||||
|  |  | ||||||
|  | check_all_passwords: | ||||||
|  |     ;for i in 0..sr_len: | ||||||
|  |     clr   r9 | ||||||
|  |     pw_loop: | ||||||
|  |         ; 	memcpy(kaddr, iaddr + i, len) | ||||||
|  |         mov   #0x10, r13 | ||||||
|  |         mov   #iaddr, r14 | ||||||
|  |         add   r9, r14 | ||||||
|  |         mov   #kaddr, r15 | ||||||
|  |         call  memcpy | ||||||
|  |         ; INT (0x42, key) | ||||||
|  |         push  #kaddr | ||||||
|  |         push  #0x42 | ||||||
|  |         call  INT | ||||||
|  |         add   #4, sp | ||||||
|  |         ; INT(7f) | ||||||
|  |         unlock7f: | ||||||
|  |         push  #0 | ||||||
|  |         push  #0 | ||||||
|  |         push  #0x7f | ||||||
|  |         call  INT | ||||||
|  |         add   #6, sp | ||||||
|  |         inc   r9 | ||||||
|  |         cmp   #sr_len, r9 | ||||||
|  |         jl    pw_loop | ||||||
|  |  | ||||||
|  | end: | ||||||
|  |     ret | ||||||
| @@ -9,11 +9,11 @@ jmp main | |||||||
| .string "ABA" | .string "ABA" | ||||||
| .string "ABAB" | .string "ABAB" | ||||||
| .word 0b0101101001011010 | .word 0b0101101001011010 | ||||||
| .words [dead beef] | .words [0xdead 0xbeef 0x0000] | ||||||
| 
 | 
 | ||||||
| main: | main: | ||||||
| ; testing defines | ; testing defines | ||||||
| .define asdfgh #1000 | .define asdfgh #0x1000 | ||||||
| .define qwerty @sp+ | .define qwerty @sp+ | ||||||
| br asdfgh | br asdfgh | ||||||
| mov qwerty, r15 | mov qwerty, r15 | ||||||
| @@ -88,73 +88,74 @@ mov @r13+, r14 | |||||||
| mov @r14+, r15 | mov @r14+, r15 | ||||||
| 
 | 
 | ||||||
| .define special r6 | .define special r6 | ||||||
| ;mov @pc+, r15 ; This is how mov-immediate is encoded, and is not valid | ; mov , r14 | ||||||
| ;mov @sp+, r15 ; pop r15 | ; mov @pc+, r15 ; This is a mov-immediate, and may corrupt your output | ||||||
| ;mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] | mov @sp+, r15 ; pop r15 | ||||||
| ;mov @cg+, r15 | mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] | ||||||
|  | mov @cg+, r15 | ||||||
| 
 | 
 | ||||||
| indexed_mode: | indexed_mode: | ||||||
| .define numbered r7 | .define numbered r7 | ||||||
| mov.b 10(r0), r1 | mov.b 0x10(r0), r1 | ||||||
| mov 10(r1), r2 | mov 0x10(r1), r2 | ||||||
| ;mov 10(r2), r3 ; Invalid: cannot index relative to sr | ;mov 10(r2), r3 ; Invalid: cannot index relative to sr | ||||||
| ;mov 10(r3), r4 ; Invalid: cannot index relative to cg | ;mov 10(r3), r4 ; Invalid: cannot index relative to cg | ||||||
| mov 10(r4), r5 | mov 0x10(r4), r5 | ||||||
| mov 10(r5), r6 | mov 0x10(r5), r6 | ||||||
| mov 10(r6), r7 | mov 0x10(r6), r7 | ||||||
| mov 10(r7), r8 | mov 0x10(r7), r8 | ||||||
| mov 10(r8), r9 | mov 0x10(r8), r9 | ||||||
| mov 10(r9), r10 | mov 0x10(r9), r10 | ||||||
| mov 10(r10), r11 | mov 0x10(r10), r11 | ||||||
| mov 10(r11), r12 | mov 0x10(r11), r12 | ||||||
| mov 10(r12), r13 | mov 0x10(r12), r13 | ||||||
| mov 10(r13), r14 | mov 0x10(r13), r14 | ||||||
| mov 10(r14), r15 | mov 0x10(r14), r15 | ||||||
| 
 | 
 | ||||||
| .define special r8 | .define special r8 | ||||||
| mov 10(pc), r15 | mov 0x10(pc), r15 | ||||||
| mov 10(sp), r15 | mov 0x10(sp), r15 | ||||||
| ;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] | ;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] | ||||||
| ;mov 10(cg), r15 | ;mov 10(cg), r15 | ||||||
| 
 | 
 | ||||||
| _immediate_mode: | _immediate_mode: | ||||||
| .define numbered r9 | .define numbered r9 | ||||||
| mov #beef, r0 | mov #0xbeef, r0 | ||||||
| mov #beef, r1 | mov #0xbeef, r1 | ||||||
| mov #beef, r2 | mov #0xbeef, r2 | ||||||
| mov #beef, r3 | mov #0xbeef, r3 | ||||||
| mov #beef, r4 | mov #0xbeef, r4 | ||||||
| mov #beef, r5 | mov #0xbeef, r5 | ||||||
| mov #beef, r6 | mov #0xbeef, r6 | ||||||
| mov #beef, r7 | mov #0xbeef, r7 | ||||||
| mov #beef, r8 | mov #0xbeef, r8 | ||||||
| mov #beef, r9 | mov #0xbeef, r9 | ||||||
| mov #beef, r10 | mov #0xbeef, r10 | ||||||
| mov #beef, r11 | mov #0xbeef, r11 | ||||||
| mov #beef, r12 | mov #0xbeef, r12 | ||||||
| mov #beef, r13 | mov #0xbeef, r13 | ||||||
| mov #beef, r14 | mov #0xbeef, r14 | ||||||
| mov #beef, r15 | mov #0xbeef, r15 | ||||||
| 
 | 
 | ||||||
| .define special r10 | .define special r10 | ||||||
| mov #beef, pc | mov #0xbeef, pc | ||||||
| mov #beef, sp | mov #0xbeef, sp | ||||||
| mov #beef, sr | mov #0xbeef, sr | ||||||
| mov #beef, cg | mov #0xbeef, cg | ||||||
| 
 | 
 | ||||||
| jmp _register_mode | jmp _register_mode | ||||||
| jmp 3fe | jmp 0x3fe | ||||||
| jmp -3fc | jmp -0x3fc | ||||||
| ret | ret | ||||||
| 
 | 
 | ||||||
| ; Funky encodings | ; Funky encodings | ||||||
| mov r6,         r4 | mov r6,         r4 | ||||||
| mov @r6,        r4 | mov @r6,        r4 | ||||||
| mov @r6+,       r4 | mov @r6+,       r4 | ||||||
| mov 0(r6),      r4 | mov 0x0(r6),    r4 | ||||||
| mov 4141(r6),   r4 | mov 0x4141(r6), r4 | ||||||
| mov #-1,        r4 | mov #-1,        r4 | ||||||
| mov #ffff,      r4 | mov #0xffff,    r4 | ||||||
| mov #0,         r4 | mov #0,         r4 | ||||||
| mov #1,         r4 | mov #1,         r4 | ||||||
| mov #2,         r4 | mov #2,         r4 | ||||||
| @@ -164,33 +165,33 @@ mov r6,         0(r4) | |||||||
| mov @r6,        0(r4) | mov @r6,        0(r4) | ||||||
| mov @r6+,       0(r4) | mov @r6+,       0(r4) | ||||||
| mov 0(r6),      0(r4) | mov 0(r6),      0(r4) | ||||||
| mov 4141(r6),   0(r4) | mov 0x4141(r6), 0(r4) | ||||||
| mov #-1,        0(r4) | mov #-1,        0(r4) | ||||||
| mov #ffff,      0(r4) | mov #0xffff,    0(r4) | ||||||
| mov #0,         0(r4) | mov #0,         0(r4) | ||||||
| mov #1,         0(r4) | mov #1,         0(r4) | ||||||
| mov #2,         0(r4) | mov #2,         0(r4) | ||||||
| mov #4,         0(r4) | mov #4,         0(r4) | ||||||
| mov #8,         0(r4) | mov #8,         0(r4) | ||||||
| mov r6,         4141(r4) | mov r6,         0x4141(r4) | ||||||
| mov @r6,        4141(r4) | mov @r6,        0x4141(r4) | ||||||
| mov @r6+,       4141(r4) | mov @r6+,       0x4141(r4) | ||||||
| mov 0(r6),      4141(r4) | mov 0(r6),      0x4141(r4) | ||||||
| mov 4141(r6),   4141(r4) | mov 0x4141(r6), 0x4141(r4) | ||||||
| mov #-1,        4141(r4) | mov #-1,        0x4141(r4) | ||||||
| mov #ffff,      4141(r4) | mov #0xffff,    0x4141(r4) | ||||||
| mov #0,         4141(r4) | mov #0,         0x4141(r4) | ||||||
| mov #1,         4141(r4) | mov #1,         0x4141(r4) | ||||||
| mov #2,         4141(r4) | mov #2,         0x4141(r4) | ||||||
| mov #4,         4141(r4) | mov #4,         0x4141(r4) | ||||||
| mov #8,         4141(r4) | mov #8,         0x4141(r4) | ||||||
| mov r6,         #0 | mov r6,         #0 | ||||||
| mov @r6,        #0 | mov @r6,        #0 | ||||||
| mov @r6+,       #0 | mov @r6+,       #0 | ||||||
| mov 0(r6),      #0 | mov 0(r6),      #0 | ||||||
| mov 4141(r6),   #0 | mov 0x4141(r6), #0 | ||||||
| mov #-1,        #0 | mov #-1,        #0 | ||||||
| mov #ffff,      #0 | mov #0xffff,    #0 | ||||||
| mov #0,         #0 | mov #0,         #0 | ||||||
| mov #1,         #0 | mov #1,         #0 | ||||||
| mov #2,         #0 | mov #2,         #0 | ||||||
| @@ -200,9 +201,9 @@ mov r6,         #1 | |||||||
| mov @r6,        #1 | mov @r6,        #1 | ||||||
| mov @r6+,       #1 | mov @r6+,       #1 | ||||||
| mov 0(r6),      #1 | mov 0(r6),      #1 | ||||||
| mov 4141(r6),   #1 | mov 0x4141(r6), #1 | ||||||
| mov #-1,        #1 | mov #-1,        #1 | ||||||
| mov #ffff,      #1 | mov #0xffff,    #1 | ||||||
| mov #0,         #1 | mov #0,         #1 | ||||||
| mov #1,         #1 | mov #1,         #1 | ||||||
| mov #2,         #1 | mov #2,         #1 | ||||||
| @@ -211,14 +212,14 @@ mov #8,         #1 | |||||||
| 
 | 
 | ||||||
| ; Instruction exercise | ; Instruction exercise | ||||||
| ; Jumps | ; Jumps | ||||||
| jne 10 | jne 0x10 | ||||||
| jeq 10 | jeq 0x10 | ||||||
| jlo 10 | jlo 0x10 | ||||||
| jhs 10 | jhs 0x10 | ||||||
| jn  10 | jn  0x10 | ||||||
| jge 10 | jge 0x10 | ||||||
| jl  10 | jl  0x10 | ||||||
| jmp 10 | jmp 0x10 | ||||||
| 
 | 
 | ||||||
| ; Two-ops | ; Two-ops | ||||||
| mov  r14, r15 | mov  r14, r15 | ||||||
| @@ -232,7 +233,7 @@ bit  r14, r15 | |||||||
| bic  r14, r15 | bic  r14, r15 | ||||||
| bis  r14, r15 | bis  r14, r15 | ||||||
| xor  r14, r15 | xor  r14, r15 | ||||||
| and  r14, 10(r15) | and  r14, 0x10(r15) | ||||||
| 
 | 
 | ||||||
| ; One-ops | ; One-ops | ||||||
| rrc  r15 | rrc  r15 | ||||||
| @@ -241,13 +242,14 @@ rra  r15 | |||||||
| sxt  r15 | sxt  r15 | ||||||
| push r15 | push r15 | ||||||
| call r15 | call r15 | ||||||
| reti r15 | ; reti is special | ||||||
|  | reti | ||||||
| 
 | 
 | ||||||
| ; Jump aliases | ; Jump aliases | ||||||
| jnc 10 | jnc 0x10 | ||||||
| jnz 10 | jnz 0x10 | ||||||
| jc  10 | jc  0x10 | ||||||
| jz  10 | jz  0x10 | ||||||
| 
 | 
 | ||||||
| ; "emulated" no-op instructions | ; "emulated" no-op instructions | ||||||
| ret | ret | ||||||
							
								
								
									
										576
									
								
								src/assembler.rs
									
									
									
									
									
								
							
							
						
						
									
										576
									
								
								src/assembler.rs
									
									
									
									
									
								
							| @@ -1,197 +1,425 @@ | |||||||
| // © 2023 John Breaux | // © 2023-2024 John Breaux | ||||||
| //! Traverses an AST, assembling instructions. | //See LICENSE.md for license | ||||||
| //! | //! Assembles a binary using the given [AST](crate::parser::ast) | ||||||
| //! [Assembler] carries *some* state |  | ||||||
|  |  | ||||||
| use crate::parser::preamble::*; | use error::{AResult, ErrorKind::*}; | ||||||
| use error::AssemblyError; |  | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::path::Path; |  | ||||||
|  |  | ||||||
| pub mod error; | use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, span::Span}; | ||||||
|  |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | use self::error::{Error, ErrorKind}; | ||||||
| pub enum IdentType { |  | ||||||
|     Word, |  | ||||||
|     Jump, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// Takes in an AST's [Root], and outputs a sequence of bytes | /// Assembles a binary using the given [Assemble]-able item | ||||||
| #[derive(Clone, Debug, Default, PartialEq, Eq)] | #[derive(Clone, Debug, Default, PartialEq, Eq)] | ||||||
| pub struct Assembler { | pub struct Assembler<'t> { | ||||||
|     out: Vec<u16>, |     /// The assembled output | ||||||
|     /// A map from Labels' [Identifier]s to their location in the binary |     output: Vec<u16>, | ||||||
|     labels: HashMap<Identifier, usize>, |     /// Table of labels, for backpatching | ||||||
|     /// A list of all referenced [Identifier]s in the binary, and their locations |     labels: HashMap<&'t str, usize>, | ||||||
|     identifiers: Vec<(usize, Identifier, IdentType)>, |     /// Backpatch table for jump instructions | ||||||
|  |     jump_queue: Vec<(usize, &'t str)>, | ||||||
|  |     /// Backpatch table for immediate values | ||||||
|  |     expr_queue: Vec<(usize, Expr<'t>)>, | ||||||
|  |     /// Base address from .org directives | ||||||
|  |     org_base: usize, | ||||||
|  |     /// Last seen index in input | ||||||
|  |     loc: Span<usize>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Assembler { | impl<'t> Assembler<'t> { | ||||||
|     pub fn assemble(r: &Root) -> Result<Vec<u16>, AssemblyError> { |     pub fn new() -> Self { | ||||||
|         let mut out = Self::default(); |         Default::default() | ||||||
|         out.visit_root(r)?; |  | ||||||
|         Ok(out.out) |  | ||||||
|     } |     } | ||||||
|     pub fn load(&mut self, r: &Root) -> Result<(), AssemblyError> { self.visit_root(r) } |     pub fn assemble<T: Assemble<'t>>(&mut self, t: &T) -> AResult<&mut Self> { | ||||||
|     pub fn out(self) -> Vec<u16> { self.out } |         t.assemble_in(self) | ||||||
|      |  | ||||||
|     fn last_mut(&mut self) -> Result<&mut u16, AssemblyError> { self.out.last_mut().ok_or(AssemblyError::EmptyBuffer) } |  | ||||||
|     fn push_default(&mut self) -> usize { |  | ||||||
|         self.out.push(Default::default()); |  | ||||||
|         self.out.len() - 1 |  | ||||||
|     } |     } | ||||||
| } |     /// Gets the address of a label | ||||||
|  |     pub fn addrof(&self, name: &str) -> Option<u16> { | ||||||
| impl Assembler { |         self.labels.get(name).map(|v| *v as u16) | ||||||
|     /// Visits the [Root] node of a parse tree |     } | ||||||
|     fn visit_root(&mut self, r: &Root) -> Result<(), AssemblyError> { |     /// Gets the value at a label | ||||||
|         // Visit the entire tree |     pub fn valueof(&self, name: &str) -> Option<u16> { | ||||||
|         for (num, line) in r.lines() { |         self.output.get(self.addrof(name)? as usize).copied() | ||||||
|             self.visit_line(line).map_err(|e| e.ctx(r.file().unwrap_or(Path::new("stdin")), *num))?; |     } | ||||||
|  |     fn push(&mut self, word: u16) { | ||||||
|  |         self.output.push(word) | ||||||
|  |     } | ||||||
|  |     fn error(&self, kind: ErrorKind) -> Error { | ||||||
|  |         Error { span: self.loc, kind } | ||||||
|  |     } | ||||||
|  |     /// Backpatches everything, and yoinks the output buffer. | ||||||
|  |     pub fn out(&mut self) -> AResult<Vec<u16>> { | ||||||
|  |         // Resolve jumps | ||||||
|  |         for (idx, key) in &self.jump_queue { | ||||||
|  |             // eprintln!("Patching jump at {idx} with key {key}"); | ||||||
|  |             match self.labels.get(key).map(|addr| addr.wrapping_sub(*idx as _) as i16) { | ||||||
|  |                 None => Err(self.error(UndefinedLabel(key.to_string())))?, | ||||||
|  |                 Some(value @ -0x3ff..=0x3fc) => self.output[*idx] |= (value - 1) as u16 & 0x3ff, | ||||||
|  |                 Some(value) => Err(self.error(LongJump(value)))?, | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|         // Link identifiers |         // Resolve immediates through late expression evaluation. | ||||||
|         for (idx, id, id_type) in self.identifiers.iter() { |         for (idx, expr) in &self.expr_queue { | ||||||
|             let Some(&num) = self.labels.get(id) else { return Err(AssemblyError::UnresolvedIdentifier(id.clone())) }; |             // eprintln!("Patching immediate at {idx} with expression {expr:?}"); | ||||||
|             let offset = (num as isize - *idx as isize) * 2; |             self.output[*idx] = self.eval(expr)?; | ||||||
|             *self.out.get_mut(*idx).expect("idx should be a valid index into out") |= match id_type { |         } | ||||||
|                 IdentType::Word => offset as u16, |         let out = std::mem::take(&mut self.output); | ||||||
|                 IdentType::Jump => JumpTarget::squish(offset)?, |         *self = Default::default(); | ||||||
|             }; |         Ok(out) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn add_label(&mut self, label: &'t str) -> AResult<()> { | ||||||
|  |         if *self.labels.entry(label).or_insert(self.output.len()) != self.output.len() { | ||||||
|  |             Err(self.error(RedefinedLabel(label.into())))? | ||||||
|         } |         } | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// visit a [Line] |     /// Appends an expr as an extword, deferring its calculation for later | ||||||
|     fn visit_line(&mut self, line: &Line) -> Result<(), AssemblyError> { |     pub fn defer_expr(&mut self, e: Expr<'t>) { | ||||||
|         match line { |         self.expr_queue.push((self.output.len(), e)); | ||||||
|             Line::Insn(insn) => self.visit_instruction(insn), |         self.push(0); | ||||||
|             Line::Label(label) => self.visit_label(label), |     } | ||||||
|             Line::Directive(d) => self.visit_directive(d), |     /// Defers resolution of a jump label until output time | ||||||
|             _ => Ok(()), |     /// The jump label will be later resolved to the NEXT word. | ||||||
|  |     pub fn defer_jump(&mut self, label: &'t str) { | ||||||
|  |         self.jump_queue.push((self.output.len(), label)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub trait Assemble<'t> { | ||||||
|  |     fn assemble(&self) -> AResult<Vec<u16>> { | ||||||
|  |         self.assemble_in(&mut Default::default())?.out() | ||||||
|  |     } | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>>; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t> Assemble<'t> for Statements<'t> { | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         for stmt in &self.stmts { | ||||||
|  |             stmt.assemble_in(a)?; | ||||||
|  |         } | ||||||
|  |         Ok(a) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Assemble<'t> for Statement<'t> { | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         match self { | ||||||
|  |             Statement::Label(label) => a.add_label(label).map(|_| a), | ||||||
|  |             Statement::Insn(i) => i.assemble_in(a), | ||||||
|  |             Statement::Directive(d) => d.assemble_in(a), | ||||||
|  |             Statement::Comment(_) => Ok(a), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  | } | ||||||
|     /// Visits a [Directive] | impl<'t> Assemble<'t> for Directive<'t> { | ||||||
|     fn visit_directive(&mut self, node: &Directive) -> Result<(), AssemblyError> { |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|         match node { |         match self { | ||||||
|             Directive::Org(_) => todo!("{node}"), |             Directive::Define(_) => {} | ||||||
|             Directive::Define(..) => (), |             Directive::Org(base) => a.org_base = a.eval(base)? as usize, | ||||||
|             Directive::Include(r) => self.visit_root(r)?, |             Directive::Word(expr) => a.defer_expr(*expr.clone()), | ||||||
|             Directive::Byte(word) | Directive::Word(word) => self.out.push((*word).into()), |             Directive::Words(exprs) => { | ||||||
|             Directive::Bytes(words) | Directive::Words(words) => { |                 for expr in exprs { | ||||||
|                 for word in words { |                     a.defer_expr(expr.clone()) | ||||||
|                     self.out.push((*word).into()); |  | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             Directive::String(s) => self.visit_string(s)?, |             Directive::String(str) => { | ||||||
|             Directive::Strings(strs) => { |                 str.assemble_in(a)?; | ||||||
|                 for s in strs { |             } | ||||||
|                     self.visit_string(s)?; |         } | ||||||
|                 } |         Ok(a) | ||||||
|             } |     } | ||||||
|         }; | } | ||||||
|         Ok(()) |  | ||||||
|     } | impl<'t> Assemble<'t> for &'t str { | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|     /// Visits a [Label] |         for chunk in self.as_bytes().chunks(2) { | ||||||
|     fn visit_label(&mut self, node: &Label) -> Result<(), AssemblyError> { |             match chunk.len() { | ||||||
|         // Register the label |                 0 => a.push(0), | ||||||
|         match self.labels.insert(node.0.to_owned(), self.out.len()) { |                 1 => { | ||||||
|             Some(_) => Err(AssemblyError::RedefinedLabel(node.0.to_owned())), |                     a.push(chunk[0] as u16); | ||||||
|             _ => Ok(()), |                     return Ok(a); | ||||||
|         } |                 } | ||||||
|     } |                 2 => a.push((chunk[1] as u16) << 8 | chunk[0] as u16), | ||||||
|  |                 n => unreachable!("expected chunks of length 2, got length {n}"), | ||||||
|     /// Visits an [Instruction] |             } | ||||||
|     fn visit_instruction(&mut self, insn: &Instruction) -> Result<(), AssemblyError> { |         } | ||||||
|         self.push_default(); |         a.push(0); | ||||||
|         self.visit_opcode(insn.opcode())?; |         Ok(a) | ||||||
|         self.visit_encoding(insn.encoding())?; |     } | ||||||
|         Ok(()) | } | ||||||
|     } |  | ||||||
|  | impl<'t> Assemble<'t> for Instruction<'t> { | ||||||
|     /// Visits an [Opcode] |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|     fn visit_opcode(&mut self, node: &Opcode) -> Result<(), AssemblyError> { |         let Self { span, kind } = self; | ||||||
|         *self.last_mut()? |= *node as u16; |         a.loc = *span; | ||||||
|         Ok(()) |         kind.assemble_in(a) | ||||||
|     } |     } | ||||||
|  | } | ||||||
|     /// Visits an [Encoding] | impl<'t> Assemble<'t> for InstructionKind<'t> { | ||||||
|     fn visit_encoding(&mut self, node: &Encoding) -> Result<(), AssemblyError> { |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|         *self.last_mut()? |= node.word(); |         match self { | ||||||
|         match node { |             InstructionKind::NoEm(v) => v.assemble_in(a), | ||||||
|             Encoding::Single { dst, .. } => { |             InstructionKind::OneEm(v) => v.assemble_in(a), | ||||||
|                 self.visit_primary_operand(dst)?; |             InstructionKind::OneArg(v) => v.assemble_in(a), | ||||||
|             } |             InstructionKind::TwoArg(v) => v.assemble_in(a), | ||||||
|             Encoding::Jump { target } => { |             InstructionKind::Jump(v) => v.assemble_in(a), | ||||||
|                 self.visit_jump_target(target)?; |             InstructionKind::Reti(v) => v.assemble_in(a), | ||||||
|             } |             InstructionKind::Br(v) => v.assemble_in(a), | ||||||
|             Encoding::Double { src, dst, .. } => { |         } | ||||||
|                 self.visit_primary_operand(src)?; |     } | ||||||
|                 self.visit_secondary_operand(dst)?; | } | ||||||
|  | impl<'t> Assemble<'t> for NoEm { | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         eprintln!( | ||||||
|  |             "Warning: directly assembling a noncanonical instruction may lead to unwanted overhead" | ||||||
|  |         ); | ||||||
|  |         self.clone().to_canonical().assemble_in(a) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Assemble<'t> for OneEm<'t> { | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         eprintln!( | ||||||
|  |             "Warning: directly assembling a noncanonical instruction may lead to unwanted overhead" | ||||||
|  |         ); | ||||||
|  |         self.clone().to_canonical().assemble_in(a) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Assemble<'t> for OneArg<'t> { | ||||||
|  |     /// `[ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]` | ||||||
|  |     /// `[  0  0  0  1  0  0 [op:3  ] bw [Ad ] [dst_reg:4] ]` | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         let Self { opcode, width, src } = self; | ||||||
|  |         let (src_reg, src_mode, src_ext) = source(src); | ||||||
|  |         a.push( | ||||||
|  |             0b000100 << 10 | one_arg(*opcode) << 7 | (*width as u16) << 6 | src_mode << 4 | src_reg, | ||||||
|  |         ); | ||||||
|  |         if let Some(expr) = src_ext { | ||||||
|  |             a.defer_expr(expr) | ||||||
|  |         } | ||||||
|  |         Ok(a) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Assemble<'t> for TwoArg<'t> { | ||||||
|  |     /// `[ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]` | ||||||
|  |     /// `[ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]` | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         let Self { opcode, width, src, dst } = self; | ||||||
|  |         let (src_reg, src_mode, src_ext) = source(src); | ||||||
|  |         let (dst_reg, dst_mode, dst_ext) = destination(dst); | ||||||
|  |         a.push( | ||||||
|  |             two_arg(*opcode) << 12 | ||||||
|  |                 | src_reg << 8 | ||||||
|  |                 | dst_mode << 7 | ||||||
|  |                 | (*width as u16) << 6 | ||||||
|  |                 | src_mode << 4 | ||||||
|  |                 | dst_reg, | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         if let Some(expr) = src_ext { | ||||||
|  |             a.defer_expr(expr) | ||||||
|  |         } | ||||||
|  |         if let Some(expr) = dst_ext { | ||||||
|  |             a.defer_expr(expr) | ||||||
|  |         } | ||||||
|  |         Ok(a) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Assemble<'t> for Jump<'t> { | ||||||
|  |     /// `[ 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 ]` | ||||||
|  |     /// `[  0  0  1 [cond:3] +- [word_offset:10          ] ]` | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         let Self { opcode, dst } = self; | ||||||
|  |         let word = 1 << 13 | ||||||
|  |             | jump(*opcode) << 10 | ||||||
|  |             | match *dst { | ||||||
|  |                 JumpDst::Rel(value) if value & 1 == 1 => return Err(a.error(OddJump(value))), | ||||||
|  |                 JumpDst::Rel(value) if !(-0x3fe..=0x400).contains(&value) => { | ||||||
|  |                     return Err(a.error(LongJump(value))) | ||||||
|  |                 } | ||||||
|  |                 JumpDst::Rel(value) => (value - 1) as u16 >> 1 & 0x3ff, | ||||||
|  |                 JumpDst::Label(label) => { | ||||||
|  |                     a.defer_jump(label); | ||||||
|  |                     0 | ||||||
|  |                 } | ||||||
|  |             } & 0x3ff; | ||||||
|  |         a.push(word); | ||||||
|  |         Ok(a) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Assemble<'t> for Reti { | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         a.output.push(0b0001_0011_0000_0000); | ||||||
|  |         Ok(a) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Assemble<'t> for Br<'t> { | ||||||
|  |     fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> { | ||||||
|  |         eprintln!( | ||||||
|  |             "Warning: directly assembling a noncanonical instruction may lead to unwanted overhead" | ||||||
|  |         ); | ||||||
|  |         self.clone().to_canonical().assemble_in(a) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub fn one_arg(opcode: token::OneArg) -> u16 { | ||||||
|  |     opcode as u16 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub fn two_arg(opcode: token::TwoArg) -> u16 { | ||||||
|  |     opcode as u16 + 4 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub fn jump(opcode: token::Jump) -> u16 { | ||||||
|  |     use token::Jump; | ||||||
|  |     match opcode { | ||||||
|  |         Jump::Jne | Jump::Jnz => 0, | ||||||
|  |         Jump::Jeq | Jump::Jz => 1, | ||||||
|  |         Jump::Jnc | Jump::Jlo => 2, | ||||||
|  |         Jump::Jc | Jump::Jhs => 3, | ||||||
|  |         Jump::Jn => 4, | ||||||
|  |         Jump::Jge => 5, | ||||||
|  |         Jump::Jl => 6, | ||||||
|  |         Jump::Jmp => 7, | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Returns a tuple of (Reg, AddrMode, extword) | ||||||
|  | pub fn source<'t>(src: &Src<'t>) -> (u16, u16, Option<Expr<'t>>) { | ||||||
|  |     use SrcSpecial::*; | ||||||
|  |     match src { | ||||||
|  |         Src::Special(Four) => (2, 2, None), | ||||||
|  |         Src::Special(Eight) => (2, 3, None), | ||||||
|  |         Src::Special(Zero) => (3, 0, None), | ||||||
|  |         Src::Special(One) => (3, 1, None), | ||||||
|  |         Src::Special(Two) => (3, 2, None), | ||||||
|  |         Src::Special(NegOne) => (3, 3, None), | ||||||
|  |         Src::Immediate(e) => (0, 3, Some(*e.clone())), | ||||||
|  |         Src::Absolute(e) => (2, 1, Some(*e.clone())), | ||||||
|  |         Src::Direct(r) => (*r as u16, 0, None), | ||||||
|  |         Src::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())), | ||||||
|  |         Src::Indirect(r) => (*r as u16, 2, None), | ||||||
|  |         Src::PostInc(r) => (*r as u16, 3, None), | ||||||
|  |         Src::BareExpr(e) => (0, 3, Some(*e.clone())), | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | /// Returns a tuple of (Reg, AddrMode, Extword) | ||||||
|  | pub fn destination<'t>(dst: &Dst<'t>) -> (u16, u16, Option<Expr<'t>>) { | ||||||
|  |     use DstSpecial::*; | ||||||
|  |     match dst { | ||||||
|  |         Dst::Special(Zero) => (3, 0, None), | ||||||
|  |         Dst::Special(One) => (3, 1, None), | ||||||
|  |         Dst::Absolute(e) => (2, 1, Some(*e.clone())), | ||||||
|  |         Dst::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())), | ||||||
|  |         Dst::Direct(r) => (*r as u16, 0, None), | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t> Assembler<'t> { | ||||||
|  |     /// Evaluates an [Expr] using labels and constants defined in the current program | ||||||
|  |     fn eval(&self, expr: &Expr) -> AResult<u16> { | ||||||
|  |         match expr { | ||||||
|  |             Expr::Binary(head, tails) => { | ||||||
|  |                 let mut head = self.eval(head)?; | ||||||
|  |                 for (op, tail) in tails { | ||||||
|  |                     let tail = self.eval(tail)?; | ||||||
|  |                     head = match op { | ||||||
|  |                         BinOp::Mul => head.wrapping_mul(tail), | ||||||
|  |                         BinOp::Div => head.wrapping_div(tail), | ||||||
|  |                         BinOp::Rem => head.wrapping_rem(tail), | ||||||
|  |                         BinOp::Add => head.wrapping_add(tail), | ||||||
|  |                         BinOp::Sub => head.wrapping_sub(tail), | ||||||
|  |                         BinOp::Lsh => head.wrapping_shl(tail as u32), | ||||||
|  |                         BinOp::Rsh => head.wrapping_shr(tail as u32), | ||||||
|  |                         BinOp::And => head & tail, | ||||||
|  |                         BinOp::Xor => head ^ tail, | ||||||
|  |                         BinOp::Or => head | tail, | ||||||
|  |                     }; | ||||||
|  |                 } | ||||||
|  |                 Ok(head) | ||||||
|  |             } | ||||||
|  |             Expr::Unary(ops, tail) => { | ||||||
|  |                 let mut tail = self.eval(tail)?; | ||||||
|  |                 for op in ops { | ||||||
|  |                     tail = match op { | ||||||
|  |                         UnOp::Not => !tail, | ||||||
|  |                         UnOp::Neg => 0u16.wrapping_sub(tail), | ||||||
|  |                         UnOp::Deref => *self | ||||||
|  |                             .output | ||||||
|  |                             .get(tail.wrapping_sub(self.org_base as u16) as usize >> 1) | ||||||
|  |                             .ok_or_else(|| self.error(OobRead(tail)))?, | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 Ok(tail) | ||||||
|  |             } | ||||||
|  |             Expr::Group(e) => self.eval(e), | ||||||
|  |             Expr::Number(n) => Ok(*n), | ||||||
|  |             Expr::Ident(name) => { | ||||||
|  |                 self.valueof(name).ok_or_else(|| self.error(UndefinedLabel(name.to_string()))) | ||||||
|  |             } | ||||||
|  |             Expr::AddrOf(name) => self | ||||||
|  |                 .addrof(name) | ||||||
|  |                 .map(|p| (p << 1).wrapping_add(self.org_base as u16)) | ||||||
|  |                 .ok_or_else(|| self.error(UndefinedLabel(name.to_string()))), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub mod error { | ||||||
|  |     use std::fmt::Display; | ||||||
|  |  | ||||||
|  |     use crate::span::Span; | ||||||
|  |  | ||||||
|  |     pub type AResult<T> = Result<T, Error>; | ||||||
|  |  | ||||||
|  |     #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  |     pub struct Error { | ||||||
|  |         pub span: Span<usize>, | ||||||
|  |         pub kind: ErrorKind, | ||||||
|  |     } | ||||||
|  |     impl std::error::Error for Error {} | ||||||
|  |  | ||||||
|  |     #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  |     pub enum ErrorKind { | ||||||
|  |         #[default] | ||||||
|  |         Todo, | ||||||
|  |         /// A label was used, but not defined | ||||||
|  |         UndefinedLabel(String), | ||||||
|  |         RedefinedLabel(String), | ||||||
|  |         OobRead(u16), | ||||||
|  |         OddJump(i16), | ||||||
|  |         LongJump(i16), | ||||||
|  |         /// A plethora of [Error]s | ||||||
|  |         Errors(Vec<Error>), | ||||||
|  |     } | ||||||
|  |     impl Display for Error { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let Self { kind, span } = self; | ||||||
|  |             write!(f, "[{span}]: ")?; | ||||||
|  |             write!(f, "Error: {kind}") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for ErrorKind { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 ErrorKind::Todo => write!(f, "Not yet implemented"), | ||||||
|  |                 ErrorKind::UndefinedLabel(label) => write!(f, "Label '{label}' not defined"), | ||||||
|  |                 ErrorKind::RedefinedLabel(label) => write!(f, "Label '{label}' already defined"), | ||||||
|  |                 ErrorKind::OobRead(addr) => { | ||||||
|  |                     write!(f, "Out of bounds read in constant expression: {addr}") | ||||||
|  |                 } | ||||||
|  |                 ErrorKind::OddJump(to) => write!(f, "Cannot jump to odd location: {to}"), | ||||||
|  |                 ErrorKind::LongJump(to) => { | ||||||
|  |                     write!(f, "Jump target ({to}) outside of range -0x400..=0x3fe") | ||||||
|  |                 } | ||||||
|  |                 ErrorKind::Errors(errors) => { | ||||||
|  |                     writeln!(f, "Could not complete assembly:")?; | ||||||
|  |                     for error in errors { | ||||||
|  |                         writeln!(f, "{error}")?; | ||||||
|  |                     } | ||||||
|  |                     Ok(()) | ||||||
|  |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Visits a [JumpTarget] |  | ||||||
|     fn visit_jump_target(&mut self, node: &JumpTarget) -> Result<(), AssemblyError> { |  | ||||||
|         match node { |  | ||||||
|             JumpTarget::Number(num) => self.visit_number(num), |  | ||||||
|             JumpTarget::Identifier(id) => { |  | ||||||
|                 self.visit_identifier(id, self.out.len() - 1, IdentType::Jump)?; |  | ||||||
|                 Ok(()) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Visits a [SecondaryOperand] |  | ||||||
|     fn visit_secondary_operand(&mut self, node: &SecondaryOperand) -> Result<(), AssemblyError> { |  | ||||||
|         use SecondaryOperand as O; |  | ||||||
|         if let O::Indexed(_, num) | O::Absolute(num) = node { |  | ||||||
|             self.push_default(); |  | ||||||
|             self.visit_number(num)?; |  | ||||||
|         } |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Visits a [PrimaryOperand] |  | ||||||
|     fn visit_primary_operand(&mut self, node: &PrimaryOperand) -> Result<(), AssemblyError> { |  | ||||||
|         use PrimaryOperand as O; |  | ||||||
|         match node { |  | ||||||
|             O::Indexed(_, num) | O::Absolute(num) | O::Immediate(num) => { |  | ||||||
|                 self.push_default(); |  | ||||||
|                 self.visit_number(num)?; |  | ||||||
|             } |  | ||||||
|             O::Relative(id) => { |  | ||||||
|                 let addr = self.push_default(); |  | ||||||
|                 self.visit_identifier(id, addr, IdentType::Word)?; |  | ||||||
|             } |  | ||||||
|             _ => (), |  | ||||||
|         } |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Visits a number and writes it into the last index |  | ||||||
|     fn visit_number(&mut self, node: &Number) -> Result<(), AssemblyError> { |  | ||||||
|         *self.last_mut()? |= u16::from(*node); |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Visits a number and appends it to the output buffer |  | ||||||
|     fn visit_string(&mut self, node: &str) -> Result<(), AssemblyError> { |  | ||||||
|         for (idx, byte) in node.bytes().chain([0u8].into_iter()).enumerate() { |  | ||||||
|             if idx % 2 == 0 { |  | ||||||
|                 self.push_default(); |  | ||||||
|             } |  | ||||||
|             *self.last_mut()? |= (byte as u16) << (8 * (idx % 2)); |  | ||||||
|         } |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Visits an [Identifier], and registers it to the identifier list |  | ||||||
|     fn visit_identifier(&mut self, node: &Identifier, addr: usize, ty: IdentType) -> Result<(), AssemblyError> { |  | ||||||
|         self.identifiers.push((addr, node.clone(), ty)); |  | ||||||
|         Ok(()) |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,56 +0,0 @@ | |||||||
| // © 2023 John Breauxs |  | ||||||
| use crate::parser::{error::ParseError, preamble::*}; |  | ||||||
| use std::{ |  | ||||||
|     fmt::Display, |  | ||||||
|     path::{Path, PathBuf}, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| #[derive(Debug)] |  | ||||||
| pub enum AssemblyError { |  | ||||||
|     UnresolvedIdentifier(Identifier), |  | ||||||
|     RedefinedLabel(Identifier), |  | ||||||
|     JumpedTooFar(Identifier, isize), |  | ||||||
|     ParseError(ParseError), |  | ||||||
|     // TODO: This, better' |  | ||||||
|     Context(Box<AssemblyError>, PathBuf, usize), |  | ||||||
|     EmptyBuffer, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl AssemblyError { |  | ||||||
|     pub(super) fn ctx<P: AsRef<Path> + ?Sized>(self, file: &P, line: usize) -> Self { |  | ||||||
|         Self::Context(self.into(), file.as_ref().into(), line) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<ParseError> for AssemblyError { |  | ||||||
|     fn from(value: ParseError) -> Self { Self::ParseError(value) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for AssemblyError { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Self::UnresolvedIdentifier(id) => { |  | ||||||
|                 write!(f, "Identifier {id} is undefined, but referenced anyway.") |  | ||||||
|             } |  | ||||||
|             Self::RedefinedLabel(id) => { |  | ||||||
|                 write!(f, "Redefined label '{id}'.") |  | ||||||
|             } |  | ||||||
|             Self::JumpedTooFar(id, num) => { |  | ||||||
|                 write!(f, "Label '{id}' is too far away. ({num} is outside range -0x400..=0x3fe)") |  | ||||||
|             } |  | ||||||
|             Self::ParseError(e) => Display::fmt(e, f), |  | ||||||
|             Self::Context(e, file, line) => write!(f, "{}:{line}:\n\t{e}", file.display()), |  | ||||||
|             Self::EmptyBuffer => Display::fmt("Tried to get last element of output buffer, but buffer was empty", f), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::error::Error for AssemblyError { |  | ||||||
|     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |  | ||||||
|         match self { |  | ||||||
|             Self::ParseError(e) => Some(e), |  | ||||||
|             Self::Context(e, ..) => Some(e), |  | ||||||
|             _ => None, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
							
								
								
									
										49
									
								
								src/error.rs
									
									
									
									
									
								
							
							
						
						
									
										49
									
								
								src/error.rs
									
									
									
									
									
								
							| @@ -1,49 +0,0 @@ | |||||||
| // © 2023 John Breauxs |  | ||||||
| //! Common error type for [msp430-asm](crate) errors |  | ||||||
|  |  | ||||||
| use super::*; |  | ||||||
| use std::fmt::Display; |  | ||||||
|  |  | ||||||
| #[derive(Debug)] |  | ||||||
| pub enum Error { |  | ||||||
|     /// Produced by [lexer] |  | ||||||
|     LexError(lexer::error::LexError), |  | ||||||
|     /// Produced by [parser] |  | ||||||
|     ParseError(parser::error::ParseError), |  | ||||||
|     /// Produced by [assembler] |  | ||||||
|     AssemblyError(assembler::error::AssemblyError), |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Error {} |  | ||||||
|  |  | ||||||
| impl From<lexer::error::LexError> for Error { |  | ||||||
|     fn from(value: lexer::error::LexError) -> Self { Self::LexError(value) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<parser::error::ParseError> for Error { |  | ||||||
|     fn from(value: parser::error::ParseError) -> Self { Self::ParseError(value) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<assembler::error::AssemblyError> for Error { |  | ||||||
|     fn from(value: assembler::error::AssemblyError) -> Self { Self::AssemblyError(value) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for Error { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Error::LexError(e) => Display::fmt(e, f), |  | ||||||
|             Error::ParseError(e) => Display::fmt(e, f), |  | ||||||
|             Error::AssemblyError(e) => Display::fmt(e, f), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::error::Error for Error { |  | ||||||
|     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |  | ||||||
|         match self { |  | ||||||
|             Error::LexError(e) => Some(e), |  | ||||||
|             Error::ParseError(e) => Some(e), |  | ||||||
|             Error::AssemblyError(e) => Some(e), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
							
								
								
									
										22
									
								
								src/hash.rs
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								src/hash.rs
									
									
									
									
									
								
							| @@ -1,22 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! Convenience functions and traits for dealing with hashable data |  | ||||||
| pub type Hash = u64; |  | ||||||
|  |  | ||||||
| /// Calculates a hash using Rust hashmap's default hasher. |  | ||||||
| pub fn hash<T: std::hash::Hash>(hashable: T) -> Hash { |  | ||||||
|     use std::hash::Hasher; |  | ||||||
|     let mut hasher = std::collections::hash_map::DefaultHasher::new(); |  | ||||||
|     hashable.hash(&mut hasher); |  | ||||||
|     hasher.finish() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| pub trait FromHash: From<Hash> { |  | ||||||
|     /// Hashes anything that implements [type@Hash] using the |  | ||||||
|     /// [DefaultHasher](std::collections::hash_map::DefaultHasher) |  | ||||||
|     fn hash<T: std::hash::Hash>(hashable: T) -> Hash { hash(hashable) } |  | ||||||
|     fn from_hash<T: std::hash::Hash>(hashable: T) -> Self |  | ||||||
|     where Self: Sized { |  | ||||||
|         Self::from(Self::hash(hashable)) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| impl<T: From<Hash>> FromHash for T {} |  | ||||||
							
								
								
									
										312
									
								
								src/lexer.rs
									
									
									
									
									
								
							
							
						
						
									
										312
									
								
								src/lexer.rs
									
									
									
									
									
								
							| @@ -1,69 +1,269 @@ | |||||||
| // © 2023 John Breaux | // © 2023-2024 John Breaux | ||||||
| //! Iterates over [`&str`](str), producing [`Token`s](Token) | //See LICENSE.md for license | ||||||
|  | //! The [Lexer] turns a [sequence of characters](str) into a stream of | ||||||
|  | //! [lexically-tagged tokens](token) | ||||||
|  |  | ||||||
| pub mod context; |  | ||||||
| pub mod error; |  | ||||||
| pub mod ignore; |  | ||||||
| pub mod preprocessed; |  | ||||||
| pub mod token; | pub mod token; | ||||||
| pub mod token_stream; |  | ||||||
|  |  | ||||||
| use context::Context; | use self::token::{Special, TokenKind, *}; | ||||||
| use error::LexError; | use crate::span::Span; | ||||||
| use token::{Token, Type}; | use std::{ | ||||||
| use token_stream::TokenStream; |     iter::Peekable, | ||||||
|  |     str::{CharIndices, FromStr}, | ||||||
|  | }; | ||||||
|  | use unicode_ident::*; | ||||||
|  |  | ||||||
| /// Iterates over &[str], producing [Token]s | const DEFAULT_BASE: u32 = 10; | ||||||
| #[must_use = "iterators are lazy and do nothing unless consumed"] |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | /// Turns a [sequence of characters](str) into a stream of [lexically identified tokens](token). | ||||||
| pub struct Tokenizer<'t> { | /// | ||||||
|  | /// # Examples | ||||||
|  | /// ```rust | ||||||
|  | /// # use libmsp430::lexer::{Lexer, token::*}; | ||||||
|  | /// let text = "mov r14, r15"; | ||||||
|  | /// let mut lexer = Lexer::new(text); | ||||||
|  | /// assert_eq!(lexer.scan().unwrap().kind, TokenKind::TwoArg(TwoArg::Mov)); | ||||||
|  | /// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R14)); | ||||||
|  | /// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Comma); | ||||||
|  | /// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R15)); | ||||||
|  | /// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Eof); | ||||||
|  | /// ``` | ||||||
|  | #[derive(Clone, Debug)] | ||||||
|  | pub struct Lexer<'t> { | ||||||
|  |     /// Keeps track of the byte offset into the string | ||||||
|  |     iter: Peekable<CharIndices<'t>>, | ||||||
|     text: &'t str, |     text: &'t str, | ||||||
|     idx: usize, |     start: usize, | ||||||
|     context: Context, |     index: usize, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'t> Tokenizer<'t> { | impl<'t> Lexer<'t> { | ||||||
|     /// Produces a new [Tokenizer] from a [str]ing slice |     /// Creates a new [Lexer] over some [text](str) | ||||||
|     pub fn new<T>(text: &'t T) -> Self |     pub fn new(text: &'t str) -> Self { | ||||||
|     where T: AsRef<str> + ?Sized { |         Self { iter: text.char_indices().peekable(), text, start: 0, index: 0 } | ||||||
|         Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() } |  | ||||||
|     } |     } | ||||||
|  |     /// Gets the current byte-position | ||||||
|     fn count(&mut self, token: &Token) { |     pub fn location(&self) -> usize { | ||||||
|         // update the context |         self.start | ||||||
|         self.context.count(token); |  | ||||||
|         // advance the index |  | ||||||
|         self.idx += token.len(); |  | ||||||
|     } |     } | ||||||
| } |     /// Internal: Emits a token with the provided [TokenKind], providing its extents. | ||||||
|  |     fn emit(&mut self, kind: TokenKind) -> Option<Token<'t>> { | ||||||
| impl<'text> Iterator for Tokenizer<'text> { |         let out = | ||||||
|     type Item = Token<'text>; |             Some(Token::new(self.next_lexeme(), kind, Span { start: self.start, end: self.index })); | ||||||
|  |         self.start = self.index; | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |         out | ||||||
|         if self.idx >= self.text.len() { |     } | ||||||
|             return None; |     fn next_lexeme(&self) -> &'t str { | ||||||
|  |         &self.text[self.start..self.index] | ||||||
|  |     } | ||||||
|  |     fn repeat(&mut self, f: impl Fn(char) -> bool) -> &mut Self { | ||||||
|  |         while let Some(&c) = self.peek() { | ||||||
|  |             if !f(c) { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             self.next(); | ||||||
|         } |         } | ||||||
|         let token = Token::from(&self.text[self.idx..]); |         self | ||||||
|         // Process [Type::Directive]s |     } | ||||||
|         // Count the token |     fn space(&mut self) -> Option<&mut Self> { | ||||||
|         self.count(&token); |         while self.peek()?.is_whitespace() && *self.peek()? != '\n' { | ||||||
|         Some(token) |             self.next(); | ||||||
|  |         } | ||||||
|  |         self.start = self.index; | ||||||
|  |         Some(self) | ||||||
|  |     } | ||||||
|  |     /// Consumes a [char] without checking, for ergonomic chaining | ||||||
|  |     fn then(&mut self) -> &mut Self { | ||||||
|  |         self.next(); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |     fn peek(&mut self) -> Option<&char> { | ||||||
|  |         self.iter.peek().map(|(_, c)| c) | ||||||
|  |     } | ||||||
|  |     fn next(&mut self) -> Option<char> { | ||||||
|  |         let (index, c) = self.iter.next()?; | ||||||
|  |         self.index = index + c.len_utf8(); | ||||||
|  |         Some(c) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Scans for the next [Token] in the stream | ||||||
|  |     pub fn scan(&mut self) -> Option<Token<'t>> { | ||||||
|  |         if self.space().is_none() { | ||||||
|  |             return self.emit(TokenKind::Eof); | ||||||
|  |         } | ||||||
|  |         let Some(c) = self.peek() else { | ||||||
|  |             return self.emit(TokenKind::Eof); | ||||||
|  |         }; | ||||||
|  |         match c { | ||||||
|  |             '\n' => self.then().emit(TokenKind::Newline), | ||||||
|  |             '!' => self.then().emit(TokenKind::Bang), | ||||||
|  |             '#' => self.then().emit(TokenKind::Hash), | ||||||
|  |             '$' => self.then().emit(TokenKind::Dollar), | ||||||
|  |             '%' => self.then().emit(TokenKind::Percent), | ||||||
|  |             '&' => self.then().emit(TokenKind::Amp), | ||||||
|  |             '\'' => self.then().char(), | ||||||
|  |             '"' => self.then().string(), | ||||||
|  |             '(' => self.then().emit(TokenKind::OpenParen), | ||||||
|  |             ')' => self.then().emit(TokenKind::CloseParen), | ||||||
|  |             '*' => self.then().emit(TokenKind::Star), | ||||||
|  |             '+' => self.then().emit(TokenKind::Plus), | ||||||
|  |             ',' => self.then().emit(TokenKind::Comma), | ||||||
|  |             '-' => self.then().emit(TokenKind::Minus), | ||||||
|  |             '.' => self.then().directive_or_bw(), | ||||||
|  |             '/' => self.then().comment_or_slash(), | ||||||
|  |             '0' => self.then().number_with_base(), | ||||||
|  |             ':' => self.then().emit(TokenKind::Colon), | ||||||
|  |             ';' => self.repeat(|c| c != '\n').emit(TokenKind::Comment), | ||||||
|  |             '<' => self.then().less(), | ||||||
|  |             '>' => self.then().greater(), | ||||||
|  |             '@' => self.then().emit(TokenKind::At), | ||||||
|  |             '[' => self.then().emit(TokenKind::OpenBrace), | ||||||
|  |             ']' => self.then().emit(TokenKind::CloseBrace), | ||||||
|  |             '^' => self.then().emit(TokenKind::Caret), | ||||||
|  |             '_' => self.then().identifier(), | ||||||
|  |             '{' => self.then().emit(TokenKind::OpenCurly), | ||||||
|  |             '|' => self.then().emit(TokenKind::Bar), | ||||||
|  |             '}' => self.then().emit(TokenKind::CloseCurly), | ||||||
|  |             c if c.is_numeric() => self.number::<DEFAULT_BASE>(), | ||||||
|  |             &c if is_xid_start(c) => self.identifier(), | ||||||
|  |             c => todo!("Unrecognized character: {c}"), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn number_with_base(&mut self) -> Option<Token<'t>> { | ||||||
|  |         match self.peek() { | ||||||
|  |             Some('x') => self.then().number::<16>(), | ||||||
|  |             Some('d') => self.then().number::<10>(), | ||||||
|  |             Some('o') => self.then().number::<8>(), | ||||||
|  |             Some('b') => self.then().number::<2>(), | ||||||
|  |             Some(c) if c.is_ascii_digit() => self.number::<DEFAULT_BASE>(), | ||||||
|  |             _ => self.emit(TokenKind::Number(0, 10)), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn number<const B: u32>(&mut self) -> Option<Token<'t>> { | ||||||
|  |         let mut num = self.digit::<B>()?; | ||||||
|  |         while let Some(digit) = self.digit::<B>() { | ||||||
|  |             num = num * B + digit; | ||||||
|  |         } | ||||||
|  |         if num > u16::MAX as u32 { | ||||||
|  |             None | ||||||
|  |         } else { | ||||||
|  |             self.emit(TokenKind::Number(num as u16, B as u8)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn digit<const B: u32>(&mut self) -> Option<u32> { | ||||||
|  |         let digit = self.peek()?.to_digit(B)?; | ||||||
|  |         self.then(); | ||||||
|  |         Some(digit) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn comment_or_slash(&mut self) -> Option<Token<'t>> { | ||||||
|  |         match self.peek() { | ||||||
|  |             Some('/') => self.repeat(|c| c != '\n').emit(TokenKind::Comment), | ||||||
|  |             _ => self.emit(TokenKind::Slash), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn less(&mut self) -> Option<Token<'t>> { | ||||||
|  |         match self.peek() { | ||||||
|  |             Some('<') => self.then().emit(TokenKind::Lsh), | ||||||
|  |             _ => todo!("less"), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn greater(&mut self) -> Option<Token<'t>> { | ||||||
|  |         match self.peek() { | ||||||
|  |             Some('>') => self.then().emit(TokenKind::Rsh), | ||||||
|  |             _ => todo!("greater"), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn identifier(&mut self) -> Option<Token<'t>> { | ||||||
|  |         while let Some(c) = self.then().peek() { | ||||||
|  |             if !is_xid_continue(*c) { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         let lexeme = self.next_lexeme(); | ||||||
|  |         if let Ok(op) = Reg::from_str(lexeme) { | ||||||
|  |             self.emit(TokenKind::Reg(op)) | ||||||
|  |         } else if let Ok(op) = NoEm::from_str(lexeme) { | ||||||
|  |             self.emit(TokenKind::NoEm(op)) | ||||||
|  |         } else if let Ok(op) = OneEm::from_str(lexeme) { | ||||||
|  |             self.emit(TokenKind::OneEm(op)) | ||||||
|  |         } else if let Ok(op) = Special::from_str(lexeme) { | ||||||
|  |             self.emit(TokenKind::Special(op)) | ||||||
|  |         } else if let Ok(op) = OneArg::from_str(lexeme) { | ||||||
|  |             self.emit(TokenKind::OneArg(op)) | ||||||
|  |         } else if let Ok(op) = TwoArg::from_str(lexeme) { | ||||||
|  |             self.emit(TokenKind::TwoArg(op)) | ||||||
|  |         } else if let Ok(op) = Jump::from_str(lexeme) { | ||||||
|  |             self.emit(TokenKind::Jump(op)) | ||||||
|  |         } else { | ||||||
|  |             self.emit(TokenKind::Identifier) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn directive_or_bw(&mut self) -> Option<Token<'t>> { | ||||||
|  |         while let Some(c) = self.then().peek() { | ||||||
|  |             if !is_xid_continue(*c) { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         match self.next_lexeme() { | ||||||
|  |             ".b" => self.emit(TokenKind::Byte), | ||||||
|  |             ".w" => self.emit(TokenKind::Word), | ||||||
|  |             _ => self.emit(TokenKind::Directive), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Todo: Character unescaping in Lexer::string | ||||||
|  |     fn string(&mut self) -> Option<Token<'t>> { | ||||||
|  |         while '"' != self.next()? {} | ||||||
|  |         self.emit(TokenKind::String) | ||||||
|  |     } | ||||||
|  |     fn char(&mut self) -> Option<Token<'t>> { | ||||||
|  |         let out = self.unescape()?; | ||||||
|  |         self.next().filter(|c| *c == '\'').and_then(|_| self.emit(TokenKind::Char(out))) | ||||||
|  |     } | ||||||
|  |     /// Unescape a single character | ||||||
|  |     fn unescape(&mut self) -> Option<char> { | ||||||
|  |         match self.next() { | ||||||
|  |             Some('\\') => (), | ||||||
|  |             other => return other, | ||||||
|  |         } | ||||||
|  |         Some(match self.next()? { | ||||||
|  |             'a' => '\x07', | ||||||
|  |             'b' => '\x08', | ||||||
|  |             'f' => '\x0c', | ||||||
|  |             'n' => '\n', | ||||||
|  |             'r' => '\r', | ||||||
|  |             't' => '\t', | ||||||
|  |             'x' => self.hex_escape()?, | ||||||
|  |             'u' => self.unicode_escape()?, | ||||||
|  |             '0' => '\0', | ||||||
|  |             chr => chr, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |     /// unescape a single 2-digit hex escape | ||||||
|  |     fn hex_escape(&mut self) -> Option<char> { | ||||||
|  |         let out = (self.digit::<16>()? << 4) + self.digit::<16>()?; | ||||||
|  |         char::from_u32(out) //.ok_or(Error::bad_unicode(out, self.line(), self.col())) | ||||||
|  |     } | ||||||
|  |     /// unescape a single \u{} unicode escape | ||||||
|  |     fn unicode_escape(&mut self) -> Option<char> { | ||||||
|  |         let mut out = 0; | ||||||
|  |         let Some('{') = self.peek() else { | ||||||
|  |             return None; //Err(Error::invalid_escape('u', self.line(), self.col())); | ||||||
|  |         }; | ||||||
|  |         self.then(); | ||||||
|  |         while let Some(c) = self.peek() { | ||||||
|  |             match c { | ||||||
|  |                 '}' => { | ||||||
|  |                     self.then(); | ||||||
|  |                     return char::from_u32(out); //.ok_or(Error::bad_unicode(out, self.line(), self.col())); | ||||||
|  |                 } | ||||||
|  |                 _ => out = (out << 4) + self.digit::<16>()?, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         None //Err(Error::invalid_escape('u', self.line(), self.col())) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'text> TokenStream<'text> for Tokenizer<'text> { | #[cfg(test)] | ||||||
|     fn context(&self) -> Context { self.context } | mod tests; | ||||||
|     // Tokenizer has access to the source buffer, and can implement expect and peek without cloning |  | ||||||
|     // itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and |  | ||||||
|     // registers are valid identifiers. |  | ||||||
|     fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> { |  | ||||||
|         let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?; |  | ||||||
|         self.count(&token); |  | ||||||
|         Ok(token) |  | ||||||
|     } |  | ||||||
|     fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) } |  | ||||||
|     fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> { |  | ||||||
|         Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context())) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -1,38 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [Context] stores contextual information about the current tokenizer state |  | ||||||
| //! |  | ||||||
| //! This data is trivially copyable and can be provided in error messages using the |  | ||||||
| //! [Error::Contextual] specialization) |  | ||||||
| use super::*; |  | ||||||
| /// Stores contextual information about the current tokenizer state, useful for printing errors |  | ||||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Context { |  | ||||||
|     line: usize, |  | ||||||
|     position: usize, |  | ||||||
|     tokens: usize, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Context { |  | ||||||
|     pub fn new() -> Self { Default::default() } |  | ||||||
|     pub fn line(&self) -> usize { self.line } |  | ||||||
|     pub fn tokens(&self) -> usize { self.tokens } |  | ||||||
|     pub fn position(&self) -> usize { self.position } |  | ||||||
|     pub(super) fn count(&mut self, t: &Token) { |  | ||||||
|         match t.variant() { |  | ||||||
|             Type::EndOfFile => return, |  | ||||||
|             Type::Endl => { |  | ||||||
|                 self.line += 1; |  | ||||||
|                 self.position = 1; |  | ||||||
|             } |  | ||||||
|             _ => self.position += t.len(), |  | ||||||
|         } |  | ||||||
|         self.tokens += 1; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| impl Default for Context { |  | ||||||
|     fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::fmt::Display for Context { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) } |  | ||||||
| } |  | ||||||
| @@ -1,68 +0,0 @@ | |||||||
| // © 2023 John Breauxs |  | ||||||
| use super::{ |  | ||||||
|     context::Context, |  | ||||||
|     token::{OwnedToken, *}, |  | ||||||
| }; |  | ||||||
| use std::fmt::Display; |  | ||||||
|  |  | ||||||
| #[derive(Debug)] |  | ||||||
| pub enum LexError { |  | ||||||
|     /// Any other error, tagged with [Context]. Created by [`Error::context()`] |  | ||||||
|     Contextual(Context, Box<Self>), |  | ||||||
|     /// Produced by [Token] when the input is entirely unexpected. |  | ||||||
|     UnexpectedSymbol(String), |  | ||||||
|     /// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type] |  | ||||||
|     UnexpectedToken { expected: Type, got: OwnedToken }, |  | ||||||
|     /// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the |  | ||||||
|     /// expected [Types](Type) |  | ||||||
|     AllExpectationsFailed { expected: Types, got: OwnedToken }, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl LexError { |  | ||||||
|     pub fn context(self, c: Context) -> Self { |  | ||||||
|         match self { |  | ||||||
|             Self::Contextual(..) => self, |  | ||||||
|             _ => Self::Contextual(c, Box::new(self)), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Extracts the root of the error tree |  | ||||||
|     pub fn bare(self) -> Self { |  | ||||||
|         match self { |  | ||||||
|             Self::Contextual(_, bare) => bare.bare(), |  | ||||||
|             _ => self, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn expected<E: AsRef<[Type]>, T: Into<OwnedToken>>(expected: E, got: T) -> Self { |  | ||||||
|         match expected.as_ref().len() { |  | ||||||
|             1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() }, |  | ||||||
|             _ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() }, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn mask_expectation(mut self, expected: Type) -> Self { |  | ||||||
|         match self { |  | ||||||
|             LexError::UnexpectedToken { got, .. } => self = LexError::UnexpectedToken { expected, got }, |  | ||||||
|             LexError::AllExpectationsFailed { got, .. } => self = LexError::UnexpectedToken { expected, got }, |  | ||||||
|             LexError::Contextual(context, err) => { |  | ||||||
|                 self = LexError::Contextual(context, Box::new(err.mask_expectation(expected))) |  | ||||||
|             } |  | ||||||
|             _ => (), |  | ||||||
|         } |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for LexError { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             LexError::Contextual(ctx, error) => write!(f, "{ctx}: {error}"), |  | ||||||
|             LexError::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""), |  | ||||||
|             LexError::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."), |  | ||||||
|             LexError::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::error::Error for LexError {} |  | ||||||
| @@ -1,55 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! Removes a single [kind](Type) of [`Token`] from a [`TokenStream`] |  | ||||||
| use super::*; |  | ||||||
| #[must_use = "iterators are lazy and do nothing unless consumed"] |  | ||||||
| #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Ignore<'t, T> |  | ||||||
| where T: TokenStream<'t> |  | ||||||
| { |  | ||||||
|     ignore: Type, |  | ||||||
|     inner: &'t mut T, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'t, T> Ignore<'t, T> |  | ||||||
| where T: TokenStream<'t> |  | ||||||
| { |  | ||||||
|     /// Creates a new [Ignore], which ignores the [ignore Type](Type) |  | ||||||
|     pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } } |  | ||||||
|  |  | ||||||
|     /// Gets a mutable reference to the inner [Iterator] |  | ||||||
|     pub fn inner_mut(&mut self) -> &mut T { self.inner } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'t, T> Iterator for Ignore<'t, T> |  | ||||||
| where T: TokenStream<'t> |  | ||||||
| { |  | ||||||
|     type Item = Token<'t>; |  | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |  | ||||||
|         let next = self.inner.next()?; |  | ||||||
|         // Space tokens are greedy, so the next token shouldn't be a Space |  | ||||||
|         match next.variant() { |  | ||||||
|             Type::Space => self.next(), |  | ||||||
|             _ => Some(next), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'t, T> TokenStream<'t> for Ignore<'t, T> |  | ||||||
| where T: TokenStream<'t> |  | ||||||
| { |  | ||||||
|     fn context(&self) -> Context { self.inner.context() } |  | ||||||
|     fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> { |  | ||||||
|         self.inner.allow(self.ignore); |  | ||||||
|         self.inner.expect(expected) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn peek(&mut self) -> Self::Item { |  | ||||||
|         self.inner.allow(self.ignore); |  | ||||||
|         self.inner.peek() |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> { |  | ||||||
|         self.inner.allow(self.ignore); |  | ||||||
|         self.inner.peek_expect(expected) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,174 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band |  | ||||||
| //! ".define" rules |  | ||||||
| use super::*; |  | ||||||
| use std::collections::{HashMap, VecDeque}; |  | ||||||
|  |  | ||||||
| // TODO: Clean this spaghetti mess up |  | ||||||
|  |  | ||||||
| /// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define" |  | ||||||
| /// rules |  | ||||||
| #[must_use = "iterators are lazy and do nothing unless consumed"] |  | ||||||
| #[derive(PartialEq, Eq)] |  | ||||||
| pub struct Preprocessed<'t, T> |  | ||||||
| where T: TokenStream<'t> |  | ||||||
| { |  | ||||||
|     sub_table: HashMap<Token<'t>, Vec<Token<'t>>>, |  | ||||||
|     sub_types: Vec<Type>, |  | ||||||
|     queue: VecDeque<Token<'t>>, |  | ||||||
|     inner: &'t mut T, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'t, T> Iterator for Preprocessed<'t, T> |  | ||||||
| where T: TokenStream<'t> |  | ||||||
| { |  | ||||||
|     type Item = Token<'t>; |  | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |  | ||||||
|         match self.queue.pop_front() { |  | ||||||
|             Some(token) => Some(token), |  | ||||||
|             None => { |  | ||||||
|                 let next = self.inner.next()?; |  | ||||||
|                 if let Some(subs) = self.sub_table.get(&next) { |  | ||||||
|                     self.queue.extend(subs); |  | ||||||
|                     return self.next(); |  | ||||||
|                 } |  | ||||||
|                 Some(next) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> { |  | ||||||
|     /// Creates a new [Preprocessed] [TokenStream] |  | ||||||
|     pub fn new(inner: &'t mut T) -> Self { |  | ||||||
|         Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Gets a mutable reference to the inner [TokenStream] |  | ||||||
|     pub fn inner_mut(&mut self) -> &mut T { self.inner } |  | ||||||
|  |  | ||||||
|     /// Preserve the next token in the queue |  | ||||||
|     fn enqueue(&mut self, token: Token<'t>) -> Token<'t> { |  | ||||||
|         self.queue.push_back(token); |  | ||||||
|         token |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Process .define directives in the preprocessor |  | ||||||
|     fn define(&mut self, token: Token<'t>) -> Result<(), LexError> { |  | ||||||
|         if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) { |  | ||||||
|             return Ok(()); |  | ||||||
|         } |  | ||||||
|         // Tokenize the subdocument |  | ||||||
|         self.allow(Type::Directive); |  | ||||||
|         self.allow(Type::Space); |  | ||||||
|  |  | ||||||
|         let Some(k) = self.inner.next() else { return Ok(()) }; |  | ||||||
|         if !self.sub_types.contains(&k.variant()) { |  | ||||||
|             self.sub_types.push(k.variant()); |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         self.allow(Type::Space); |  | ||||||
|  |  | ||||||
|         let mut replacement = vec![]; |  | ||||||
|         loop { |  | ||||||
|             match self.inner.peek().variant() { |  | ||||||
|                 Type::Endl | Type::EndOfFile => break, |  | ||||||
|                 Type::Comment | Type::Space => { |  | ||||||
|                     // ignore comments |  | ||||||
|                     self.inner.next(); |  | ||||||
|                 } |  | ||||||
|                 _ => { |  | ||||||
|                     let next = self.inner.next().unwrap(); |  | ||||||
|                     replacement.push(self.enqueue(next)); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         self.sub_table.insert(k, replacement); |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Does the preprocessing step |  | ||||||
|     fn preprocess(&mut self, token: Token<'t>) { |  | ||||||
|         if let Some(subs) = self.sub_table.get(&token) { |  | ||||||
|             self.queue.extend(subs); |  | ||||||
|             self.inner.next(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'t, T> TokenStream<'t> for Preprocessed<'t, T> |  | ||||||
| where T: TokenStream<'t> |  | ||||||
| { |  | ||||||
|     fn context(&self) -> Context { self.inner.context() } |  | ||||||
|  |  | ||||||
|     fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> { |  | ||||||
|         match self.queue.front() { |  | ||||||
|             Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()), |  | ||||||
|             Some(&token) => Err(LexError::expected([expected], token).context(self.context())), |  | ||||||
|             None => { |  | ||||||
|                 // Only resolve defines when expecting, otherwise you'll run into issues. |  | ||||||
|                 if let Ok(next) = self.inner.expect(expected) { |  | ||||||
|                     self.define(next)?; |  | ||||||
|                     return Ok(next); |  | ||||||
|                 } |  | ||||||
|                 if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) { |  | ||||||
|                     if let Some(subs) = self.sub_table.get(&next) { |  | ||||||
|                         self.inner.allow_any_of(&self.sub_types); |  | ||||||
|                         self.queue.extend(subs); |  | ||||||
|                     } |  | ||||||
|                     return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) }; |  | ||||||
|                 } |  | ||||||
|                 Err(LexError::expected([expected], self.inner.peek()).context(self.context())) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn peek(&mut self) -> Self::Item { |  | ||||||
|         match self.queue.front() { |  | ||||||
|             Some(token) => *token, |  | ||||||
|             None => { |  | ||||||
|                 // Only allow substitution when the next token is unexpected |  | ||||||
|                 let old = self.inner.peek(); |  | ||||||
|                 self.preprocess(old); |  | ||||||
|                 match self.queue.front() { |  | ||||||
|                     Some(&new) => new, |  | ||||||
|                     None => old, |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> { |  | ||||||
|         match self.queue.front() { |  | ||||||
|             Some(&token) if token.is_variant(expected) => Ok(token), |  | ||||||
|             Some(&token) => Err(LexError::expected([expected], token).context(self.context())), |  | ||||||
|             None => { |  | ||||||
|                 if let Ok(next) = self.inner.peek_expect(expected) { |  | ||||||
|                     return Ok(next); |  | ||||||
|                 } |  | ||||||
|                 if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) { |  | ||||||
|                     self.preprocess(next); |  | ||||||
|                     return if self.queue.is_empty() { |  | ||||||
|                         self.inner.peek_expect(expected) |  | ||||||
|                     } else { |  | ||||||
|                         self.peek_expect(expected) |  | ||||||
|                     }; |  | ||||||
|                 } |  | ||||||
|                 Err(LexError::expected([expected], self.inner.peek())) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'t, T> std::fmt::Debug for Preprocessed<'t, T> |  | ||||||
| where T: TokenStream<'t> |  | ||||||
| { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         f.debug_struct("Preprocessed") |  | ||||||
|             .field("sub_table", &self.sub_table) |  | ||||||
|             .field("sub_types", &self.sub_types) |  | ||||||
|             .field("queue", &self.queue) |  | ||||||
|             .field("context", &self.context()) |  | ||||||
|             .finish_non_exhaustive() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
							
								
								
									
										175
									
								
								src/lexer/tests.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										175
									
								
								src/lexer/tests.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,175 @@ | |||||||
|  | use super::*; | ||||||
|  | macro_rules! lex { | ||||||
|  |     (type ($($t:tt)*), $expected:expr) => { | ||||||
|  |         let token = Lexer::new(stringify!($($t)*)).scan().expect(stringify!($($t:tt)* should yield a valid token)); | ||||||
|  |         assert_eq!(token.kind, $expected); | ||||||
|  |     }; | ||||||
|  |     (str $t:literal, $expected:expr) => { | ||||||
|  |         let token = Lexer::new($t).scan().expect(stringify!($t:tt should yield a valid token)); | ||||||
|  |         assert_eq!(token.kind, $expected); | ||||||
|  |     }; | ||||||
|  |     ({ $($t:tt)* }) => { | ||||||
|  |         Lexer::new(stringify!($($t)*)) | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn ascii_char() { | ||||||
|  |     lex!(type ('A'), TokenKind::Char('A')); // 'A' should be a valid char | ||||||
|  |     lex!(type ('\x1b'), TokenKind::Char('\x1b')); // '\\x1b' should be a valid char | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn unicode_escape_char() { | ||||||
|  |     lex!(type ('\u{1f988}'), TokenKind::Char('🦈')); // '\\u{1f988}' should be a valid 🦈 | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn number_with_base() { | ||||||
|  |     lex!(type (0), TokenKind::Number(0, 10)); // 0 should be a 16-bit base-10 number | ||||||
|  |     lex!(type (42069), TokenKind::Number(42069, 10)); // 42069 should be a 16-bit base-10 number | ||||||
|  |     lex!(type (0x420), TokenKind::Number(0x420, 16)); // 0x420 should be a 16-bit base-16 number | ||||||
|  |     lex!(type (0d100), TokenKind::Number(100, 10)); // 0d100 should be a 16-bit base-10 number | ||||||
|  |     lex!(type (0o100), TokenKind::Number(64, 8)); // 0o100 should be a 16-bit base-8 number | ||||||
|  |     lex!(type (0b100), TokenKind::Number(4, 2)); // 0b100 should be a 16-bit base-8 number | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn no_operand_emulated() { | ||||||
|  |     lex!(type (nop), TokenKind::NoEm(NoEm::Nop)); // nop should be a valid NoEm | ||||||
|  |     lex!(type (ret), TokenKind::NoEm(NoEm::Ret)); // ret should be a valid NoEm | ||||||
|  |     lex!(type (clrc), TokenKind::NoEm(NoEm::Clrc)); // clrc should be a valid NoEm | ||||||
|  |     lex!(type (clrz), TokenKind::NoEm(NoEm::Clrz)); // clrz should be a valid NoEm | ||||||
|  |     lex!(type (clrn), TokenKind::NoEm(NoEm::Clrn)); // clrn should be a valid NoEm | ||||||
|  |     lex!(type (setc), TokenKind::NoEm(NoEm::Setc)); // setc should be a valid NoEm | ||||||
|  |     lex!(type (setz), TokenKind::NoEm(NoEm::Setz)); // setz should be a valid NoEm | ||||||
|  |     lex!(type (setn), TokenKind::NoEm(NoEm::Setn)); // setn should be a valid NoEm | ||||||
|  |     lex!(type (dint), TokenKind::NoEm(NoEm::Dint)); // dint should be a valid NoEm | ||||||
|  |     lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn br() { | ||||||
|  |     lex!(type (br), TokenKind::Special(Special::Br)); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn one_operand_emulated() { | ||||||
|  |     lex!(type (pop), TokenKind::OneEm(OneEm::Pop)); | ||||||
|  |     lex!(type (rla), TokenKind::OneEm(OneEm::Rla)); | ||||||
|  |     lex!(type (rlc), TokenKind::OneEm(OneEm::Rlc)); | ||||||
|  |     lex!(type (inv), TokenKind::OneEm(OneEm::Inv)); | ||||||
|  |     lex!(type (clr), TokenKind::OneEm(OneEm::Clr)); | ||||||
|  |     lex!(type (tst), TokenKind::OneEm(OneEm::Tst)); | ||||||
|  |     lex!(type (dec), TokenKind::OneEm(OneEm::Dec)); | ||||||
|  |     lex!(type (decd), TokenKind::OneEm(OneEm::Decd)); | ||||||
|  |     lex!(type (inc), TokenKind::OneEm(OneEm::Inc)); | ||||||
|  |     lex!(type (incd), TokenKind::OneEm(OneEm::Incd)); | ||||||
|  |     lex!(type (adc), TokenKind::OneEm(OneEm::Adc)); | ||||||
|  |     lex!(type (dadc), TokenKind::OneEm(OneEm::Dadc)); | ||||||
|  |     lex!(type (sbc), TokenKind::OneEm(OneEm::Sbc)); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn one_operand() { | ||||||
|  |     lex!(type (rrc), TokenKind::OneArg(OneArg::Rrc)); | ||||||
|  |     lex!(type (swpb), TokenKind::OneArg(OneArg::Swpb)); | ||||||
|  |     lex!(type (rra), TokenKind::OneArg(OneArg::Rra)); | ||||||
|  |     lex!(type (sxt), TokenKind::OneArg(OneArg::Sxt)); | ||||||
|  |     lex!(type (push), TokenKind::OneArg(OneArg::Push)); | ||||||
|  |     lex!(type (call), TokenKind::OneArg(OneArg::Call)); | ||||||
|  |     lex!(type (reti), TokenKind::OneArg(OneArg::Reti)); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn two_operand() { | ||||||
|  |     lex!(type (mov), TokenKind::TwoArg(TwoArg::Mov)); | ||||||
|  |     lex!(type (add), TokenKind::TwoArg(TwoArg::Add)); | ||||||
|  |     lex!(type (addc), TokenKind::TwoArg(TwoArg::Addc)); | ||||||
|  |     lex!(type (subc), TokenKind::TwoArg(TwoArg::Subc)); | ||||||
|  |     lex!(type (sub), TokenKind::TwoArg(TwoArg::Sub)); | ||||||
|  |     lex!(type (cmp), TokenKind::TwoArg(TwoArg::Cmp)); | ||||||
|  |     lex!(type (dadd), TokenKind::TwoArg(TwoArg::Dadd)); | ||||||
|  |     lex!(type (bit), TokenKind::TwoArg(TwoArg::Bit)); | ||||||
|  |     lex!(type (bic), TokenKind::TwoArg(TwoArg::Bic)); | ||||||
|  |     lex!(type (bis), TokenKind::TwoArg(TwoArg::Bis)); | ||||||
|  |     lex!(type (xor), TokenKind::TwoArg(TwoArg::Xor)); | ||||||
|  |     lex!(type (and), TokenKind::TwoArg(TwoArg::And)); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn jump() { | ||||||
|  |     lex!(type (jne), TokenKind::Jump(Jump::Jne)); | ||||||
|  |     lex!(type (jnz), TokenKind::Jump(Jump::Jnz)); | ||||||
|  |     lex!(type (jeq), TokenKind::Jump(Jump::Jeq)); | ||||||
|  |     lex!(type (jz), TokenKind::Jump(Jump::Jz)); | ||||||
|  |     lex!(type (jnc), TokenKind::Jump(Jump::Jnc)); | ||||||
|  |     lex!(type (jlo), TokenKind::Jump(Jump::Jlo)); | ||||||
|  |     lex!(type (jc), TokenKind::Jump(Jump::Jc)); | ||||||
|  |     lex!(type (jhs), TokenKind::Jump(Jump::Jhs)); | ||||||
|  |     lex!(type (jn), TokenKind::Jump(Jump::Jn)); | ||||||
|  |     lex!(type (jge), TokenKind::Jump(Jump::Jge)); | ||||||
|  |     lex!(type (jl), TokenKind::Jump(Jump::Jl)); | ||||||
|  |     lex!(type (jmp), TokenKind::Jump(Jump::Jmp)); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn registers() { | ||||||
|  |     lex!(type (pc), TokenKind::Reg(Reg::PC)); | ||||||
|  |     lex!(type (sp), TokenKind::Reg(Reg::SP)); | ||||||
|  |     lex!(type (sr), TokenKind::Reg(Reg::SR)); | ||||||
|  |     lex!(type (cg), TokenKind::Reg(Reg::CG)); | ||||||
|  |     lex!(type (r0), TokenKind::Reg(Reg::PC)); | ||||||
|  |     lex!(type (r1), TokenKind::Reg(Reg::SP)); | ||||||
|  |     lex!(type (r2), TokenKind::Reg(Reg::SR)); | ||||||
|  |     lex!(type (r3), TokenKind::Reg(Reg::CG)); | ||||||
|  |     lex!(type (r4), TokenKind::Reg(Reg::R4)); | ||||||
|  |     lex!(type (r5), TokenKind::Reg(Reg::R5)); | ||||||
|  |     lex!(type (r6), TokenKind::Reg(Reg::R6)); | ||||||
|  |     lex!(type (r7), TokenKind::Reg(Reg::R7)); | ||||||
|  |     lex!(type (r8), TokenKind::Reg(Reg::R8)); | ||||||
|  |     lex!(type (r9), TokenKind::Reg(Reg::R9)); | ||||||
|  |     lex!(type (r10), TokenKind::Reg(Reg::R10)); | ||||||
|  |     lex!(type (r11), TokenKind::Reg(Reg::R11)); | ||||||
|  |     lex!(type (r12), TokenKind::Reg(Reg::R12)); | ||||||
|  |     lex!(type (r13), TokenKind::Reg(Reg::R13)); | ||||||
|  |     lex!(type (r14), TokenKind::Reg(Reg::R14)); | ||||||
|  |     lex!(type (r15), TokenKind::Reg(Reg::R15)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn delimiters() { | ||||||
|  |     lex!(str "", TokenKind::Eof); | ||||||
|  |     lex!(str "\n", TokenKind::Newline); | ||||||
|  |     lex!(str "(", TokenKind::OpenParen); | ||||||
|  |     lex!(str ")", TokenKind::CloseParen); | ||||||
|  |     lex!(str "{", TokenKind::OpenCurly); | ||||||
|  |     lex!(str "}", TokenKind::CloseCurly); | ||||||
|  |     lex!(str "[", TokenKind::OpenBrace); | ||||||
|  |     lex!(str "]", TokenKind::CloseBrace); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn comment() { | ||||||
|  |     lex!(str "; this is a comment!\n\n", TokenKind::Comment); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn other() { | ||||||
|  |     // lex!(type (), TokenKind::) | ||||||
|  |     lex!(type (,), TokenKind::Comma); | ||||||
|  |     lex!(type (:), TokenKind::Colon); | ||||||
|  |     lex!(type (!), TokenKind::Bang); | ||||||
|  |     lex!(type (@), TokenKind::At); | ||||||
|  |     lex!(type (&), TokenKind::Amp); | ||||||
|  |     lex!(type (|), TokenKind::Bar); | ||||||
|  |     lex!(type (^), TokenKind::Caret); | ||||||
|  |     lex!(type (*), TokenKind::Star); | ||||||
|  |     lex!(type (#), TokenKind::Hash); | ||||||
|  |     lex!(type ($), TokenKind::Dollar); | ||||||
|  |     lex!(type (%), TokenKind::Percent); | ||||||
|  |     lex!(type (+), TokenKind::Plus); | ||||||
|  |     lex!(type (-), TokenKind::Minus); | ||||||
|  |     lex!(type (/), TokenKind::Slash); | ||||||
|  |     lex!(type (<<), TokenKind::Lsh); | ||||||
|  |     lex!(type (>>), TokenKind::Rsh); | ||||||
|  |     lex!(type (.directive), TokenKind::Directive); | ||||||
|  |     lex!(type (identifier), TokenKind::Identifier); | ||||||
|  |     lex!(type (.b), TokenKind::Byte); | ||||||
|  |     lex!(type (.w), TokenKind::Word); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn ignores_leading_whitespace() { | ||||||
|  |     lex!(str " \u{a0}\t\t\t\t\t\t\t-", TokenKind::Minus); | ||||||
|  | } | ||||||
| @@ -1,335 +1,480 @@ | |||||||
| // © 2023 John Breaux | // © 2023-2024 John Breaux | ||||||
| //! A [Token] is a [semantically tagged](Type) sequence of characters. | //See LICENSE.md for license | ||||||
|  | //! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span] | ||||||
| //! | //! | ||||||
| //! Token, and the tokenizer, intend to copy as little as possible. | //! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`) | ||||||
|  | use crate::span::Span; | ||||||
| use super::error::LexError; | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
| use regex::Regex; | pub struct Token<'t> { | ||||||
| use std::{ |     pub lexeme: &'t str, | ||||||
|     fmt::{Debug, Display}, |     pub kind: TokenKind, | ||||||
|     sync::OnceLock, |     pub pos: Span<usize>, | ||||||
| }; | } | ||||||
|  | impl<'t> Token<'t> { | ||||||
| /// Implements regex matching functions on [`Token`] for each [`Type`], |     pub fn new(lexeme: &'t str, kind: TokenKind, pos: Span<usize>) -> Self { | ||||||
| /// and implements [`From<&str>`] for [`Token`] |         Self { lexeme, kind, pos } | ||||||
| macro_rules! regex_impl { |  | ||||||
| (<$t:lifetime> $type:ty {$( |  | ||||||
|     $(#[$meta:meta])* |  | ||||||
|     pub fn $func:ident (text: &str) -> Option<Self> { |  | ||||||
|         regex!($out:path = $re:literal) |  | ||||||
|     } |     } | ||||||
| )*}) => { |     pub fn kind(&self) -> TokenKind { | ||||||
| impl<$t> $type { |         self.kind | ||||||
|     /// Lexes a token only for the expected `variant` |  | ||||||
|     /// |  | ||||||
|     /// Warning: This bypasses precedence rules. Only use for specific patterns. |  | ||||||
|     pub fn expect(text: &$t str, expected: Type) -> Result<Self, LexError> { |  | ||||||
|         match expected {$( |  | ||||||
|             $out => Self::$func(text), |  | ||||||
|         )*}.ok_or(LexError::UnexpectedToken { |  | ||||||
|             expected, |  | ||||||
|             got: Self::from(text).into(), |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
|     $( |  | ||||||
|     $(#[$meta])* |  | ||||||
|     /// Tries to read [` |  | ||||||
|     #[doc = stringify!($out)] |  | ||||||
|     /// `] from `text` |  | ||||||
|     pub fn $func(text: &$t str) -> Option<Self> { |  | ||||||
|         static RE: OnceLock<Regex> = OnceLock::new(); |  | ||||||
|         let lexeme = RE.get_or_init(|| Regex::new($re).unwrap()) |  | ||||||
|             .find(text)?.into(); |  | ||||||
|         Some(Self { variant: $out, lexeme }) |  | ||||||
|     })* |  | ||||||
| } |  | ||||||
| impl<$t> From<&$t str> for $type { |  | ||||||
|     fn from (value: &$t str) -> Self { |  | ||||||
|         $( |  | ||||||
|             if let Some(token) = Self::$func(value) { |  | ||||||
|                 token |  | ||||||
|             } else |  | ||||||
|         )* |  | ||||||
|         {todo!("Unexpected input: {value:#?} (Tokenization failure)")} |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// A [Token] is a [semantically tagged](Type) sequence of characters |  | ||||||
| #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Token<'text> { |  | ||||||
|     /// The type of this token |  | ||||||
|     variant: Type, |  | ||||||
|     /// The sub[str]ing corresponding to this token |  | ||||||
|     lexeme: &'text str, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'text> Token<'text> { |  | ||||||
|     /// Returns the [Type] of this [Token] |  | ||||||
|     pub fn variant(&self) -> Type { self.variant } |  | ||||||
|  |  | ||||||
|     /// Returns the lexeme (originating string slice) of this token |  | ||||||
|     pub fn lexeme(&self) -> &'text str { self.lexeme } |  | ||||||
|  |  | ||||||
|     /// Parses this [Token] into another type |  | ||||||
|     pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err> |  | ||||||
|     where F: std::str::FromStr { |  | ||||||
|         self.lexeme.parse() |  | ||||||
|     } |  | ||||||
|     /// Returns whether the Lexeme is the expected [Type] |  | ||||||
|     pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected } |  | ||||||
|  |  | ||||||
|     /// Returns the length of [Self::lexeme] in bytes. |  | ||||||
|     pub fn len(&self) -> usize { self.lexeme.len() } |  | ||||||
|  |  | ||||||
|     /// Returns `true` if [Self::lexeme] has a length of zero bytes. |  | ||||||
|     pub fn is_empty(&self) -> bool { self.lexeme.is_empty() } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'text> Debug for Token<'text> { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         f.debug_list().entry(&self.variant).entry(&self.lexeme).finish() |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'text> Display for Token<'text> { | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | pub enum TokenKind { | ||||||
|         match self.variant { |     Eof, | ||||||
|             Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f), |     Newline,    // \n | ||||||
|             v => write!(f, "{v} \"{}\"", self.lexeme), |     OpenParen,  // ( | ||||||
|  |     CloseParen, // ) | ||||||
|  |     OpenCurly,  // { | ||||||
|  |     CloseCurly, // } | ||||||
|  |     OpenBrace,  // [ | ||||||
|  |     CloseBrace, // ] | ||||||
|  |     Comma,      // , | ||||||
|  |     Colon,      // : | ||||||
|  |     Bang,       // ! | ||||||
|  |     At,         // @ | ||||||
|  |     Amp,        // & | ||||||
|  |     Bar,        // | | ||||||
|  |     Caret,      // ^ | ||||||
|  |     Star,       // * | ||||||
|  |     Hash,       // # | ||||||
|  |     Dollar,     // $ | ||||||
|  |     Percent,    // % | ||||||
|  |     Plus,       // + | ||||||
|  |     Minus,      // - | ||||||
|  |     Slash,      // / | ||||||
|  |     Lsh,        // << | ||||||
|  |     Rsh,        // >> | ||||||
|  |  | ||||||
|  |     Comment,         // (';' | '//') .* '\n' | | ||||||
|  |     Directive,       // '.' XID_CONTINUE* | ||||||
|  |     Identifier,      // XID_START XID_CONTINUE* | ||||||
|  |     Number(u16, u8), // varies depending on base | ||||||
|  |     Char(char),      // '\'' ('\' Escape | .) '\'' | ||||||
|  |     String,          // '"' .* '"' | ||||||
|  |     Reg(Reg), | ||||||
|  |     NoEm(NoEm), | ||||||
|  |     OneEm(OneEm), | ||||||
|  |     Special(Special), | ||||||
|  |     OneArg(OneArg), | ||||||
|  |     TwoArg(TwoArg), | ||||||
|  |     Jump(Jump), | ||||||
|  |  | ||||||
|  |     Byte, // .b | ||||||
|  |     Word, // .w | ||||||
|  | } | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub enum Reg { | ||||||
|  |     PC, | ||||||
|  |     SP, | ||||||
|  |     SR, | ||||||
|  |     CG, | ||||||
|  |     R4, | ||||||
|  |     R5, | ||||||
|  |     R6, | ||||||
|  |     R7, | ||||||
|  |     R8, | ||||||
|  |     R9, | ||||||
|  |     R10, | ||||||
|  |     R11, | ||||||
|  |     R12, | ||||||
|  |     R13, | ||||||
|  |     R14, | ||||||
|  |     R15, | ||||||
|  | } | ||||||
|  | /// Fake instructions of the form `opcode` | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub enum NoEm { | ||||||
|  |     Nop, | ||||||
|  |     Ret, | ||||||
|  |     Clrc, | ||||||
|  |     Clrz, | ||||||
|  |     Clrn, | ||||||
|  |     Setc, | ||||||
|  |     Setz, | ||||||
|  |     Setn, | ||||||
|  |     Dint, | ||||||
|  |     Eint, | ||||||
|  | } | ||||||
|  | /// Fake instructions of the form `opcode dst` | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub enum OneEm { | ||||||
|  |     Pop, | ||||||
|  |     Rla, | ||||||
|  |     Rlc, | ||||||
|  |     Inv, | ||||||
|  |     Clr, | ||||||
|  |     Tst, | ||||||
|  |     Dec, | ||||||
|  |     Decd, | ||||||
|  |     Inc, | ||||||
|  |     Incd, | ||||||
|  |     Adc, | ||||||
|  |     Dadc, | ||||||
|  |     Sbc, | ||||||
|  | } | ||||||
|  | /// These opcodes have bespoke grammatical rules | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub enum Special { | ||||||
|  |     /// Br = "br" Src | ||||||
|  |     Br, | ||||||
|  | } | ||||||
|  | /// Real instructions of the form `opcode src` | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub enum OneArg { | ||||||
|  |     Rrc, | ||||||
|  |     Swpb, | ||||||
|  |     Rra, | ||||||
|  |     Sxt, | ||||||
|  |     Push, | ||||||
|  |     Call, | ||||||
|  |     Reti, | ||||||
|  | } | ||||||
|  | /// Real instructions of the form `opcode src, dst` | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub enum TwoArg { | ||||||
|  |     Mov, | ||||||
|  |     Add, | ||||||
|  |     Addc, | ||||||
|  |     Subc, | ||||||
|  |     Sub, | ||||||
|  |     Cmp, | ||||||
|  |     Dadd, | ||||||
|  |     Bit, | ||||||
|  |     Bic, | ||||||
|  |     Bis, | ||||||
|  |     Xor, | ||||||
|  |     And, | ||||||
|  | } | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub enum Jump { | ||||||
|  |     Jne, | ||||||
|  |     Jnz, | ||||||
|  |     Jeq, | ||||||
|  |     Jz, | ||||||
|  |     Jnc, | ||||||
|  |     Jlo, | ||||||
|  |     Jc, | ||||||
|  |     Jhs, | ||||||
|  |     Jn, | ||||||
|  |     Jge, | ||||||
|  |     Jl, | ||||||
|  |     Jmp, | ||||||
|  | } | ||||||
|  | mod convert { | ||||||
|  |     //! Implementations of [FromStr] for [token](super) types. | ||||||
|  |     use super::*; | ||||||
|  |     use std::str::FromStr; | ||||||
|  |  | ||||||
|  |     impl FromStr for Reg { | ||||||
|  |         type Err = (); | ||||||
|  |         fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||
|  |             Ok(match s { | ||||||
|  |                 "pc" => Reg::PC, | ||||||
|  |                 "sp" => Reg::SP, | ||||||
|  |                 "sr" => Reg::SR, | ||||||
|  |                 "cg" => Reg::CG, | ||||||
|  |                 "r0" => Reg::PC, | ||||||
|  |                 "r1" => Reg::SP, | ||||||
|  |                 "r2" => Reg::SR, | ||||||
|  |                 "r3" => Reg::CG, | ||||||
|  |                 "r4" => Reg::R4, | ||||||
|  |                 "r5" => Reg::R5, | ||||||
|  |                 "r6" => Reg::R6, | ||||||
|  |                 "r7" => Reg::R7, | ||||||
|  |                 "r8" => Reg::R8, | ||||||
|  |                 "r9" => Reg::R9, | ||||||
|  |                 "r10" => Reg::R10, | ||||||
|  |                 "r11" => Reg::R11, | ||||||
|  |                 "r12" => Reg::R12, | ||||||
|  |                 "r13" => Reg::R13, | ||||||
|  |                 "r14" => Reg::R14, | ||||||
|  |                 "r15" => Reg::R15, | ||||||
|  |                 _ => Err(())?, | ||||||
|  |             }) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl FromStr for NoEm { | ||||||
|  |         type Err = (); | ||||||
|  |         fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||
|  |             Ok(match s { | ||||||
|  |                 "nop" => NoEm::Nop, | ||||||
|  |                 "ret" => NoEm::Ret, | ||||||
|  |                 "clrc" => NoEm::Clrc, | ||||||
|  |                 "clrz" => NoEm::Clrz, | ||||||
|  |                 "clrn" => NoEm::Clrn, | ||||||
|  |                 "setc" => NoEm::Setc, | ||||||
|  |                 "setz" => NoEm::Setz, | ||||||
|  |                 "setn" => NoEm::Setn, | ||||||
|  |                 "dint" => NoEm::Dint, | ||||||
|  |                 "eint" => NoEm::Eint, | ||||||
|  |                 _ => Err(())?, | ||||||
|  |             }) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl FromStr for OneEm { | ||||||
|  |         type Err = (); | ||||||
|  |         fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||
|  |             Ok(match s { | ||||||
|  |                 "pop" => OneEm::Pop, | ||||||
|  |                 "rla" => OneEm::Rla, | ||||||
|  |                 "rlc" => OneEm::Rlc, | ||||||
|  |                 "inv" => OneEm::Inv, | ||||||
|  |                 "clr" => OneEm::Clr, | ||||||
|  |                 "tst" => OneEm::Tst, | ||||||
|  |                 "dec" => OneEm::Dec, | ||||||
|  |                 "decd" => OneEm::Decd, | ||||||
|  |                 "inc" => OneEm::Inc, | ||||||
|  |                 "incd" => OneEm::Incd, | ||||||
|  |                 "adc" => OneEm::Adc, | ||||||
|  |                 "dadc" => OneEm::Dadc, | ||||||
|  |                 "sbc" => OneEm::Sbc, | ||||||
|  |                 _ => Err(())?, | ||||||
|  |             }) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl FromStr for Special { | ||||||
|  |         type Err = (); | ||||||
|  |         fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||
|  |             Ok(match s { | ||||||
|  |                 "br" => Special::Br, | ||||||
|  |                 _ => Err(())?, | ||||||
|  |             }) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl FromStr for OneArg { | ||||||
|  |         type Err = (); | ||||||
|  |         fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||
|  |             Ok(match s { | ||||||
|  |                 "rrc" => OneArg::Rrc, | ||||||
|  |                 "swpb" => OneArg::Swpb, | ||||||
|  |                 "rra" => OneArg::Rra, | ||||||
|  |                 "sxt" => OneArg::Sxt, | ||||||
|  |                 "push" => OneArg::Push, | ||||||
|  |                 "call" => OneArg::Call, | ||||||
|  |                 "reti" => OneArg::Reti, | ||||||
|  |                 _ => Err(())?, | ||||||
|  |             }) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl FromStr for TwoArg { | ||||||
|  |         type Err = (); | ||||||
|  |         fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||
|  |             Ok(match s { | ||||||
|  |                 "mov" => TwoArg::Mov, | ||||||
|  |                 "add" => TwoArg::Add, | ||||||
|  |                 "addc" => TwoArg::Addc, | ||||||
|  |                 "subc" => TwoArg::Subc, | ||||||
|  |                 "sub" => TwoArg::Sub, | ||||||
|  |                 "cmp" => TwoArg::Cmp, | ||||||
|  |                 "dadd" => TwoArg::Dadd, | ||||||
|  |                 "bit" => TwoArg::Bit, | ||||||
|  |                 "bic" => TwoArg::Bic, | ||||||
|  |                 "bis" => TwoArg::Bis, | ||||||
|  |                 "xor" => TwoArg::Xor, | ||||||
|  |                 "and" => TwoArg::And, | ||||||
|  |                 _ => Err(())?, | ||||||
|  |             }) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl FromStr for Jump { | ||||||
|  |         type Err = (); | ||||||
|  |         fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||||
|  |             Ok(match s { | ||||||
|  |                 "jne" => Jump::Jne, | ||||||
|  |                 "jnz" => Jump::Jnz, | ||||||
|  |                 "jeq" => Jump::Jeq, | ||||||
|  |                 "jz" => Jump::Jz, | ||||||
|  |                 "jnc" => Jump::Jnc, | ||||||
|  |                 "jlo" => Jump::Jlo, | ||||||
|  |                 "jc" => Jump::Jc, | ||||||
|  |                 "jhs" => Jump::Jhs, | ||||||
|  |                 "jn" => Jump::Jn, | ||||||
|  |                 "jge" => Jump::Jge, | ||||||
|  |                 "jl" => Jump::Jl, | ||||||
|  |                 "jmp" => Jump::Jmp, | ||||||
|  |                 _ => Err(())?, | ||||||
|  |             }) | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | mod display { | ||||||
| /// A [token Type](Type) is a semantic tag for a sequence of characters |     //! Implementations of [Display] for [token](super) types. | ||||||
| #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] |     use super::*; | ||||||
| pub enum Type { |     use std::fmt::Display; | ||||||
|     /// contiguous whitespace, excluding newline |     impl<'t> Display for Token<'t> { | ||||||
|     Space, |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|     /// newline and contiguous whitespace |             let Self { lexeme, kind, pos: _ } = self; | ||||||
|     Endl, |             match kind { | ||||||
|     /// A line-comment |                 TokenKind::Comment | ||||||
|     Comment, |                 | TokenKind::Directive | ||||||
|     /// Jump label *definition* |                 | TokenKind::Identifier | ||||||
|     Label, |                 | TokenKind::String => { | ||||||
|     /// Instructions |                     write!(f, "{}", lexeme) | ||||||
|     Insn, |                 } | ||||||
|     /// Operand width is byte |                 ty => ty.fmt(f), | ||||||
|     ByteWidth, |             } | ||||||
|     /// Operand width is word |         } | ||||||
|     WordWidth, |     } | ||||||
|     /// Register mnemonic (i.e. `pc`, `r14`) |     impl Display for TokenKind { | ||||||
|     Register, |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|     /// Marker for base-10 |             match self { | ||||||
|     RadixMarkerDec, |                 TokenKind::Eof => write!(f, "[EOF]"), | ||||||
|     /// Marker for base-16 |                 TokenKind::Newline => writeln!(f), | ||||||
|     RadixMarkerHex, |                 TokenKind::OpenParen => write!(f, "("), | ||||||
|     /// Marker for base-8 |                 TokenKind::CloseParen => write!(f, ")"), | ||||||
|     RadixMarkerOct, |                 TokenKind::OpenCurly => write!(f, "{{"), | ||||||
|     /// Marker for base-2 |                 TokenKind::CloseCurly => write!(f, "}}"), | ||||||
|     RadixMarkerBin, |                 TokenKind::OpenBrace => write!(f, "["), | ||||||
|     /// 1-4 hexadigit numbers only |                 TokenKind::CloseBrace => write!(f, "]"), | ||||||
|     Number, |                 TokenKind::Comma => write!(f, ","), | ||||||
|     /// Negative number marker |                 TokenKind::Colon => write!(f, ":"), | ||||||
|     Minus, |                 TokenKind::Bang => write!(f, "!"), | ||||||
|     /// post-increment mode marker |                 TokenKind::At => write!(f, "@"), | ||||||
|     Plus, |                 TokenKind::Amp => write!(f, "&"), | ||||||
|     /// Open-Indexed-Mode marker |                 TokenKind::Bar => write!(f, "|"), | ||||||
|     LParen, |                 TokenKind::Caret => write!(f, "^"), | ||||||
|     /// Close-Indexed-Mode marker |                 TokenKind::Star => write!(f, "*"), | ||||||
|     RParen, |                 TokenKind::Hash => write!(f, "#"), | ||||||
|     /// Open Square Bracket |                 TokenKind::Dollar => write!(f, "$"), | ||||||
|     LBracket, |                 TokenKind::Percent => write!(f, "%"), | ||||||
|     /// Closed Square Bracket |                 TokenKind::Plus => write!(f, "+"), | ||||||
|     RBracket, |                 TokenKind::Minus => write!(f, "-"), | ||||||
|     /// Indirect mode marker |                 TokenKind::Slash => write!(f, "/"), | ||||||
|     Indirect, |                 TokenKind::Lsh => write!(f, "<<"), | ||||||
|     /// absolute address marker |                 TokenKind::Rsh => write!(f, ">>"), | ||||||
|     Absolute, |                 TokenKind::Comment => write!(f, "; "), | ||||||
|     /// immediate value marker |                 TokenKind::Directive => write!(f, "."), | ||||||
|     Immediate, |                 TokenKind::Identifier => write!(f, "Identifier"), | ||||||
|     /// Valid identifier. Identifiers must start with a Latin alphabetic character or underline |                 TokenKind::Number(val, 2) => write!(f, "0b{val:b}"), | ||||||
|     Identifier, |                 TokenKind::Number(val, 8) => write!(f, "0o{val:o}"), | ||||||
|     /// A string, encased in "quotes" |                 TokenKind::Number(val, 16) => write!(f, "0x{val:x}"), | ||||||
|     String, |                 TokenKind::Number(val, _) => write!(f, "{val}"), | ||||||
|     /// Assembler directive |                 TokenKind::Char(c) => write!(f, "'{c}'"), | ||||||
|     Directive, |                 TokenKind::String => write!(f, "\"String\""), | ||||||
|     /// Separator (comma) |                 TokenKind::Reg(kw) => write!(f, "{kw}"), | ||||||
|     Separator, |                 TokenKind::NoEm(kw) => write!(f, "{kw}"), | ||||||
|     /// End of File marker |                 TokenKind::OneEm(kw) => write!(f, "{kw}"), | ||||||
|     #[default] |                 TokenKind::Special(kw) => write!(f, "{kw}"), | ||||||
|     EndOfFile, |                 TokenKind::OneArg(kw) => write!(f, "{kw}"), | ||||||
|     /// Invalid token |                 TokenKind::TwoArg(kw) => write!(f, "{kw}"), | ||||||
|     Invalid, |                 TokenKind::Jump(kw) => write!(f, "{kw}"), | ||||||
| } |                 TokenKind::Byte => write!(f, ".b"), | ||||||
|  |                 TokenKind::Word => write!(f, ".w"), | ||||||
| regex_impl! {<'text> Token<'text> { |             } | ||||||
|     pub fn expect_space(text: &str) -> Option<Self> { |         } | ||||||
|         regex!(Type::Space = r"^[\s--\n]+") |     } | ||||||
|     } |     impl Display for Reg { | ||||||
|     pub fn expect_endl(text: &str) -> Option<Self> { |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|         regex!(Type::Endl = r"^\n[\s--\n]*") |             match self { | ||||||
|     } |                 Reg::PC => "pc".fmt(f), | ||||||
|     pub fn expect_comment(text: &str) -> Option<Self> { |                 Reg::SP => "sp".fmt(f), | ||||||
|         regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*") |                 Reg::SR => "sr".fmt(f), | ||||||
|     } |                 Reg::CG => "cg".fmt(f), | ||||||
|     pub fn expect_label(text: &str) -> Option<Self> { |                 Reg::R4 => "r4".fmt(f), | ||||||
|         regex!(Type::Label = r"^:") |                 Reg::R5 => "r5".fmt(f), | ||||||
|     } |                 Reg::R6 => "r6".fmt(f), | ||||||
|     pub fn expect_insn(text: &str) -> Option<Self> { |                 Reg::R7 => "r7".fmt(f), | ||||||
|         regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)") |                 Reg::R8 => "r8".fmt(f), | ||||||
|     } |                 Reg::R9 => "r9".fmt(f), | ||||||
|     pub fn expect_byte_width(text: &str) -> Option<Self> { |                 Reg::R10 => "r10".fmt(f), | ||||||
|         regex!(Type::ByteWidth = r"(?i)^\.b") |                 Reg::R11 => "r11".fmt(f), | ||||||
|     } |                 Reg::R12 => "r12".fmt(f), | ||||||
|     pub fn expect_word_width(text: &str) -> Option<Self> { |                 Reg::R13 => "r13".fmt(f), | ||||||
|         regex!(Type::WordWidth = r"(?i)^\.w") |                 Reg::R14 => "r14".fmt(f), | ||||||
|     } |                 Reg::R15 => "r15".fmt(f), | ||||||
|     pub fn expect_register(text: &str) -> Option<Self> { |             } | ||||||
|         // old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)") |         } | ||||||
|         regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)") |     } | ||||||
|     } |     impl Display for NoEm { | ||||||
|     pub fn expect_radix_marker_dec(text: &str) -> Option<Self> { |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|         regex!(Type::RadixMarkerDec = r"(?i)^0d") |             match self { | ||||||
|     } |                 NoEm::Nop => "nop".fmt(f), | ||||||
|     pub fn expect_radix_marker_hex(text: &str) -> Option<Self> { |                 NoEm::Ret => "ret".fmt(f), | ||||||
|         regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)") |                 NoEm::Clrc => "clrc".fmt(f), | ||||||
|     } |                 NoEm::Clrz => "clrz".fmt(f), | ||||||
|     pub fn expect_radix_marker_oct(text: &str) -> Option<Self> { |                 NoEm::Clrn => "clrn".fmt(f), | ||||||
|         regex!(Type::RadixMarkerOct = r"(?i)^0o") |                 NoEm::Setc => "setc".fmt(f), | ||||||
|     } |                 NoEm::Setz => "setz".fmt(f), | ||||||
|     pub fn expect_radix_marker_bin(text: &str) -> Option<Self> { |                 NoEm::Setn => "setn".fmt(f), | ||||||
|         regex!(Type::RadixMarkerBin = r"(?i)^0b") |                 NoEm::Dint => "dint".fmt(f), | ||||||
|     } |                 NoEm::Eint => "eint".fmt(f), | ||||||
|     pub fn expect_number(text: &str) -> Option<Self> { |             } | ||||||
|         regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)") |         } | ||||||
|     } |     } | ||||||
|     pub fn expect_minus(text: &str) -> Option<Self> { |     impl Display for OneEm { | ||||||
|         regex!(Type::Minus = r"^-") |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|     } |             match self { | ||||||
|     pub fn expect_plus(text: &str) -> Option<Self> { |                 OneEm::Pop => "pop".fmt(f), | ||||||
|         regex!(Type::Plus = r"^\+") |                 OneEm::Rla => "rla".fmt(f), | ||||||
|     } |                 OneEm::Rlc => "rlc".fmt(f), | ||||||
|     pub fn expect_l_paren(text: &str) -> Option<Self> { |                 OneEm::Inv => "inv".fmt(f), | ||||||
|         regex!(Type::LParen = r"^\(") |                 OneEm::Clr => "clr".fmt(f), | ||||||
|     } |                 OneEm::Tst => "tst".fmt(f), | ||||||
|     pub fn expect_r_paren(text: &str) -> Option<Self> { |                 OneEm::Dec => "dec".fmt(f), | ||||||
|         regex!(Type::RParen = r"^\)") |                 OneEm::Decd => "decd".fmt(f), | ||||||
|     } |                 OneEm::Inc => "inc".fmt(f), | ||||||
|     pub fn expect_l_bracket(text: &str) -> Option<Self> { |                 OneEm::Incd => "incd".fmt(f), | ||||||
|         regex!(Type::LBracket = r"^\[") |                 OneEm::Adc => "adc".fmt(f), | ||||||
|     } |                 OneEm::Dadc => "dadc".fmt(f), | ||||||
|     pub fn expect_r_bracket(text: &str) -> Option<Self> { |                 OneEm::Sbc => "sbc".fmt(f), | ||||||
|         regex!(Type::RBracket = r"^]") |             } | ||||||
|     } |         } | ||||||
|     pub fn expect_indrect(text: &str) -> Option<Self> { |     } | ||||||
|         regex!(Type::Indirect = r"^@") |     impl Display for Special { | ||||||
|     } |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|     pub fn expect_absolute(text: &str) -> Option<Self> { |             match self { | ||||||
|         regex!(Type::Absolute = r"^&") |                 Special::Br => "br".fmt(f), | ||||||
|     } |             } | ||||||
|     pub fn expect_immediate(text: &str) -> Option<Self> { |         } | ||||||
|         regex!(Type::Immediate = r"^#") |     } | ||||||
|     } |     impl Display for OneArg { | ||||||
|     pub fn expect_string(text: &str) -> Option<Self> { |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|         regex!(Type::String = r#"^"[^"]*""#) |             match self { | ||||||
|     } |                 OneArg::Rrc => "rrc".fmt(f), | ||||||
|     pub fn expect_directive(text: &str) -> Option<Self> { |                 OneArg::Swpb => "swpb".fmt(f), | ||||||
|         regex!(Type::Directive = r"^\.\w+") |                 OneArg::Rra => "rra".fmt(f), | ||||||
|     } |                 OneArg::Sxt => "sxt".fmt(f), | ||||||
|     pub fn expect_identifier(text: &str) -> Option<Self> { |                 OneArg::Push => "push".fmt(f), | ||||||
|         regex!(Type::Identifier = r"^[A-Za-z_]\w*") |                 OneArg::Call => "call".fmt(f), | ||||||
|     } |                 OneArg::Reti => "reti".fmt(f), | ||||||
|     pub fn expect_separator(text: &str) -> Option<Self> { |             } | ||||||
|         regex!(Type::Separator = r"^,") |         } | ||||||
|     } |     } | ||||||
|     pub fn expect_end_of_file(text: &str) -> Option<Self> { |     impl Display for TwoArg { | ||||||
|         regex!(Type::EndOfFile = r"^$") |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|     } |             match self { | ||||||
|     pub fn expect_anything(text: &str) -> Option<Self> { |                 TwoArg::Mov => "mov".fmt(f), | ||||||
|         regex!(Type::Invalid = r"^.*") |                 TwoArg::Add => "add".fmt(f), | ||||||
|     } |                 TwoArg::Addc => "addc".fmt(f), | ||||||
| }} |                 TwoArg::Subc => "subc".fmt(f), | ||||||
|  |                 TwoArg::Sub => "sub".fmt(f), | ||||||
| impl Display for Type { |                 TwoArg::Cmp => "cmp".fmt(f), | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |                 TwoArg::Dadd => "dadd".fmt(f), | ||||||
|         match self { |                 TwoArg::Bit => "bit".fmt(f), | ||||||
|             Self::Space => Display::fmt("space", f), |                 TwoArg::Bic => "bic".fmt(f), | ||||||
|             Self::Endl => Display::fmt("newline", f), |                 TwoArg::Bis => "bis".fmt(f), | ||||||
|             Self::Comment => Display::fmt("comment", f), |                 TwoArg::Xor => "xor".fmt(f), | ||||||
|             Self::Label => Display::fmt("label definition", f), |                 TwoArg::And => "and".fmt(f), | ||||||
|             Self::Insn => Display::fmt("opcode", f), |             } | ||||||
|             Self::ByteWidth => Display::fmt("byte-width", f), |         } | ||||||
|             Self::WordWidth => Display::fmt("word-width", f), |     } | ||||||
|             Self::Register => Display::fmt("register", f), |     impl Display for Jump { | ||||||
|             Self::RadixMarkerDec => Display::fmt("decimal marker", f), |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|             Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f), |             match self { | ||||||
|             Self::RadixMarkerOct => Display::fmt("octal marker", f), |                 Jump::Jne => "jne".fmt(f), | ||||||
|             Self::RadixMarkerBin => Display::fmt("binary marker", f), |                 Jump::Jnz => "jnz".fmt(f), | ||||||
|             Self::Number => Display::fmt("number", f), |                 Jump::Jeq => "jeq".fmt(f), | ||||||
|             Self::Minus => Display::fmt("minus sign", f), |                 Jump::Jz => "jz".fmt(f), | ||||||
|             Self::Plus => Display::fmt("plus sign", f), |                 Jump::Jnc => "jnc".fmt(f), | ||||||
|             Self::LParen => Display::fmt("left parenthesis", f), |                 Jump::Jlo => "jlo".fmt(f), | ||||||
|             Self::RParen => Display::fmt("right parenthesis", f), |                 Jump::Jc => "jc".fmt(f), | ||||||
|             Self::LBracket => Display::fmt("left bracket", f), |                 Jump::Jhs => "jhs".fmt(f), | ||||||
|             Self::RBracket => Display::fmt("right bracket", f), |                 Jump::Jn => "jn".fmt(f), | ||||||
|             Self::Indirect => Display::fmt("indirect", f), |                 Jump::Jge => "jge".fmt(f), | ||||||
|             Self::Absolute => Display::fmt("absolute", f), |                 Jump::Jl => "jl".fmt(f), | ||||||
|             Self::Immediate => Display::fmt("immediate", f), |                 Jump::Jmp => "jmp".fmt(f), | ||||||
|             Self::Identifier => Display::fmt("identifier", f), |  | ||||||
|             Self::String => Display::fmt("string", f), |  | ||||||
|             Self::Directive => Display::fmt("directive", f), |  | ||||||
|             Self::Separator => Display::fmt("comma", f), |  | ||||||
|             Self::EndOfFile => Display::fmt("EOF", f), |  | ||||||
|             Self::Invalid => Display::fmt("invalid token", f), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// A [Token] which can outlive its parent buffer |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct OwnedToken { |  | ||||||
|     /// The type of this token |  | ||||||
|     variant: Type, |  | ||||||
|     /// The sub[String] corresponding to this token |  | ||||||
|     lexeme: String, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for OwnedToken { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'t> From<&'t OwnedToken> for Token<'t> { |  | ||||||
|     fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<Token<'_>> for OwnedToken { |  | ||||||
|     fn from(value: Token<'_>) -> Self { |  | ||||||
|         let Token { variant, lexeme } = value; |  | ||||||
|         OwnedToken { variant, lexeme: lexeme.to_owned() } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// [Types] are an owned array of [types](Type), with a custom [Display] implementation |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Types(Vec<Type>); |  | ||||||
|  |  | ||||||
| impl<T: AsRef<[Type]>> From<T> for Types { |  | ||||||
|     // TODO: Possibly bad. Check out in rust playground. |  | ||||||
|     fn from(value: T) -> Self { Self(value.as_ref().to_owned()) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for Types { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         for (idx, t) in self.0.iter().enumerate() { |  | ||||||
|             Display::fmt(t, f)?; |  | ||||||
|             match idx { |  | ||||||
|                 i if i < self.0.len() - 2 => Display::fmt(", ", f)?, |  | ||||||
|                 i if i < self.0.len() - 1 => Display::fmt(" or ", f)?, |  | ||||||
|                 _ => (), |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         Ok(()) |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,85 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A TokenStream is a specialized [Iterator] which produces [Tokens](Token) |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| use super::ignore::Ignore; |  | ||||||
| use super::preprocessed::Preprocessed; |  | ||||||
|  |  | ||||||
| /// A TokenStream is a specialized [Iterator] which produces [Tokens](Token) |  | ||||||
| pub trait TokenStream<'text>: Iterator<Item = Token<'text>> + std::fmt::Debug { |  | ||||||
|     /// Gets this stream's [Context] |  | ||||||
|     fn context(&self) -> Context; |  | ||||||
|  |  | ||||||
|     /// Creates an iterator that skips [Type::Space] in the input |  | ||||||
|     #[inline] |  | ||||||
|     fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self> |  | ||||||
|     where Self: Sized { |  | ||||||
|         Ignore::new(variant, self) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Creates a [TokenStream] that performs live substitution of the input |  | ||||||
|     #[inline] |  | ||||||
|     fn preprocessed(&'text mut self) -> Preprocessed<'text, Self> |  | ||||||
|     where Self: Sized { |  | ||||||
|         Preprocessed::new(self) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Returns the next [Token] without advancing |  | ||||||
|     fn peek(&mut self) -> Self::Item; |  | ||||||
|  |  | ||||||
|     /// Returns the next [Token] if it is of the expected [Type], without advancing |  | ||||||
|     fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError>; |  | ||||||
|  |  | ||||||
|     /// Consumes and returns a [Token] if it is the expected [Type] |  | ||||||
|     /// |  | ||||||
|     /// Otherwise, does not consume a [Token] |  | ||||||
|     fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError>; |  | ||||||
|  |  | ||||||
|     /// Ignores a [Token] of the expected [Type], propegating errors. |  | ||||||
|     #[inline] |  | ||||||
|     fn require(&mut self, expected: Type) -> Result<(), LexError> { self.expect(expected).map(|_| ()) } |  | ||||||
|  |  | ||||||
|     /// Ignores a [Token] of the expected [Type], discarding errors. |  | ||||||
|     #[inline] |  | ||||||
|     fn allow(&mut self, expected: Type) { let _ = self.expect(expected); } |  | ||||||
|  |  | ||||||
|     /// Runs a function on each |  | ||||||
|     fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, LexError>, expected: T) -> Result<U, LexError> |  | ||||||
|     where T: AsRef<[Type]> { |  | ||||||
|         for &expected in expected.as_ref() { |  | ||||||
|             match f(self, expected).map_err(|e| e.bare()) { |  | ||||||
|                 Ok(t) => return Ok(t), |  | ||||||
|                 Err(LexError::UnexpectedToken { .. }) => continue, |  | ||||||
|                 Err(e) => return Err(e.context(self.context())), |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         Err(LexError::expected(expected, self.peek()).context(self.context())) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Returns the next [Token] if it is of the expected [Types](Type), without advancing |  | ||||||
|     #[inline] |  | ||||||
|     fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError> |  | ||||||
|     where T: AsRef<[Type]> { |  | ||||||
|         self.any_of(Self::peek_expect, expected) |  | ||||||
|     } |  | ||||||
|     /// Consumes and returns a [Token] if it matches any of the expected [Types](Type) |  | ||||||
|     /// |  | ||||||
|     /// Otherwise, does not consume a [Token] |  | ||||||
|     #[inline] |  | ||||||
|     fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError> |  | ||||||
|     where T: AsRef<[Type]> { |  | ||||||
|         self.any_of(Self::expect, expected) |  | ||||||
|     } |  | ||||||
|     /// Ignores a [Token] of any expected [Type], discarding errors. |  | ||||||
|     #[inline] |  | ||||||
|     fn allow_any_of<T>(&mut self, expected: T) |  | ||||||
|     where T: AsRef<[Type]> { |  | ||||||
|         let _ = self.expect_any_of(expected); |  | ||||||
|     } |  | ||||||
|     /// Ignores a [Token] of any expected [Type], propegating errors. |  | ||||||
|     #[inline] |  | ||||||
|     fn require_any_of<T>(&mut self, expected: T) -> Result<(), LexError> |  | ||||||
|     where T: AsRef<[Type]> { |  | ||||||
|         self.any_of(Self::require, expected) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
							
								
								
									
										141
									
								
								src/lib.rs
									
									
									
									
									
								
							
							
						
						
									
										141
									
								
								src/lib.rs
									
									
									
									
									
								
							| @@ -1,76 +1,95 @@ | |||||||
| // © 2023 John Breaux | // © 2023-2024 John Breaux | ||||||
|  | //See LICENSE.md for license | ||||||
| //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption | //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption | ||||||
| //! | //! | ||||||
| //! This project aims to assemble any valid msp430 instructions, while being lenient about the | //! This project aims to assemble any valid msp430 instructions, while including important quality | ||||||
| //! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it | //! of life features such as constant expression evaluation. | ||||||
| //! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a |  | ||||||
| //! useful message to the author.) |  | ||||||
| //! | //! | ||||||
| //! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines, | //! ## Tokenization | ||||||
| //! unless syntactically relevant. It will also discard comma-separators between operands of a | //! The [`Lexer`](lexer::Lexer) will ignore whitespace, except newlines. It borrows a text buffer, | ||||||
| //! two-operand instruction. | //! and outputs [tokens](lexer::token::Token) of various [TokenKinds](lexer::token::TokenKind). | ||||||
| //! | //! | ||||||
| //! It returns an AST structured as follows | //! ## Preprocessing | ||||||
|  | //! The [`Preprocessor`](preprocessor::Preprocessor) will filter | ||||||
|  | //! [newlines](lexer::token::TokenKind::Newline), unless used to terminate a `.define` directive. | ||||||
|  | //! | ||||||
|  | //! ## Parsing | ||||||
|  | //! The [`Parser`](parser::Parser) consumes a [Lexer](lexer::Lexer) | ||||||
|  | //! and returns an [AST](parser::ast) structured roughly as follows: | ||||||
| //! ```text | //! ```text | ||||||
| //! Root | //! Statements | ||||||
| //! ├─ Line | //! ├─ Stmt | ||||||
| //! │  └─ Empty |  | ||||||
| //! ├─ Line |  | ||||||
| //! │  └─ Comment | //! │  └─ Comment | ||||||
| //! ├─ Line | //! ├─ Stmt | ||||||
| //! │  └─ Directive                 // Pre- or Post-processor directive | //! │  └─ Directive                 // Pre- or Post-processor directive | ||||||
| //! ├─ Linel | //! ├─ Stmt | ||||||
| //! │  └─ Label                     // Label definition | //! │  └─ Label                     // Label definition | ||||||
| //! ├─ Line | //! ├─ Stmt | ||||||
| //! │  └─ Instruction | //! │  └─ Insn | ||||||
| //! │     ├─ Opcode | //! │     └─ NoEm                   // A zero-operand "emulated" instruction | ||||||
| //! │     └─ Encoding::Single | //! ├─ Stmt | ||||||
|  | //! │  └─ Insn | ||||||
|  | //! │     └─ OneEm                  // A one-operand "emulated" instruction | ||||||
|  | //! │        ├─ Opcode | ||||||
| //! │        ├─ Width | //! │        ├─ Width | ||||||
| //! │        └─ PrimaryOperand | //! │        └─ Dst                 // A destination register has several addressing modes: | ||||||
| //! │           ├─ Identifier       // Label, for relative-addressed data/code | //! │           └─ Direct           // - The contents of a register | ||||||
| //! │           ├─ Register         // Direct, indexed, indirect or indirect-post-increment register. | //! │           ╶─ Indexed          // - The register, as a pointer, plus a byte index | ||||||
| //! │           └─ Number           // Index, absolute address or immediate value. | //! │           ╶─ Absolute         // - An immediate absolute address | ||||||
| //! ├─ Line | //! │           ╶─ Special          // - A so-called "special" immediate (#0 or #1) - these are joke encodings. | ||||||
| //! │  └─ Instruction | //! ├─ Stmt | ||||||
| //! │     ├─ Opcode | //! │  └─ Insn | ||||||
| //! │     └─ Encoding::Double | //! │     └─ OneArg                 // A one-operand instruction | ||||||
|  | //! │        ├─ Opcode | ||||||
| //! │        ├─ Width | //! │        ├─ Width | ||||||
| //! │        ├─ PrimaryOperand | //! │        └─ Src                 // A source register has even more addressing modes: | ||||||
| //! │           ├─ Identifier       // Label, for relative-addressed data/code | //! │           └─ Direct           // - The contents of a register | ||||||
| //! │        │  ├─ Register         // Direct, indexed, indirect or indirect-post-increment register. | //! │           ╶─ Indexed          // - The register, as a pointer, plus a byte index | ||||||
| //! │        │  └─ Number           // Index, absolute address or immediate value. | //! │           ╶─ Indirect         // - The word at the address stored in the register | ||||||
| //! │        └─ SecondaryOperand | //! │                               //   (like Indexed, but without an extension word.) | ||||||
| //! │           ├─ Identifier       // Label, for relative-addressed data/code | //! │           ╶─ PostIncrement    // - Indirect, but the register is post-incremented by 1 | ||||||
| //! │           ├─ Register         // Direct or indexed register | //! │                               //   (or, if it's the PC or SP, by 2) | ||||||
| //! │           └─ Number           // Index or absolute address | //! │           ╶─ Absolute         // - An immediate absolute address | ||||||
| //! ├─ Line | //! │           ╶─ Immediate        // - An immediate 16-bit number | ||||||
| //! │  └─ Instruction | //! │           ╶─ Special          // - A so-called "special" immediate (#0 or #1) - these are joke encodings. | ||||||
| //! │     ├─ Opcode | //! ├─ Stmt | ||||||
| //! │     └─ Encoding::Jump | //! │  └─ Insn | ||||||
| //! │        └─ JumpTarget | //! │     └─ TwoArg                 // A two-operand instruction | ||||||
| //! │           ├─ Identifier       // Label | //! │        ├─ Opcode | ||||||
| //! │           └─ Number           // Even, PC-relative offset in range (-1024..=1022) | //! │        ├─ Width | ||||||
| //! └─ Line | //! │        ├─ Src | ||||||
| //!    └─ EndOfFile | //! │        └─ Dst | ||||||
|  | //! └─ Stmt | ||||||
|  | //!    └─ Insn | ||||||
|  | //!       └─ Jump                   // A relative jump instruction | ||||||
|  | //!          ├─ Opcode              // The jump condition | ||||||
|  | //!          └─ JumpDst             // A jump instruction's destination can be either: | ||||||
|  | //!             └─ Rel              // - An even, signed 11-bit offset | ||||||
|  | //!             ╶─ Label            // - A label to jump to | ||||||
| //! ``` | //! ``` | ||||||
|  | //! | ||||||
|  | //! ## Canonicalization | ||||||
|  | //! After parsing, tokens must be [canonicalized](parser::ast::canonical::Canonicalize): | ||||||
|  | //! - Expressions which act exclusively on numbers are eagerly evaluated | ||||||
|  | //!   - Expressions which begin with a numeric part are repacked for late evaluation | ||||||
|  | //! - "Emulated" instructions are desugared into their canonical counterparts | ||||||
|  | //! | ||||||
|  | //! ## Assembly | ||||||
|  | //! The [Assembler](assembler::Assembler) takes an [AST](parser::ast), and | ||||||
|  | //! 1. Encodes all [Instructions](parser::ast::Instruction) into 16-bit words | ||||||
|  | //! 2. Records all jump labels, for backpatching | ||||||
|  | //! 3. Records all expressions, for late evaluation | ||||||
|  | //! 4. Performs late evaluation and backpatching | ||||||
|  | //! | ||||||
|  | //! If a non-canonical instruction is found, the assembler will print a warning, | ||||||
|  | //! and canonicalize it. | ||||||
|  |  | ||||||
| pub mod preamble { | pub mod span; | ||||||
|     //! Common imports for msp430-asm |  | ||||||
|     use super::*; |  | ||||||
|     pub use assembler::Assembler; |  | ||||||
|     pub use error::Error; |  | ||||||
|     pub use lexer::{ |  | ||||||
|         context::Context, |  | ||||||
|         token::{Token, Type}, |  | ||||||
|         token_stream::TokenStream, |  | ||||||
|         Tokenizer, |  | ||||||
|     }; |  | ||||||
|     pub use parser::Parser; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| use preamble::*; | pub mod lexer; | ||||||
| pub mod error; |  | ||||||
|  | pub mod preprocessor; | ||||||
|  |  | ||||||
|  | pub mod parser; | ||||||
|  |  | ||||||
| pub mod assembler; | pub mod assembler; | ||||||
| pub mod lexer; |  | ||||||
| pub mod parser; |  | ||||||
|   | |||||||
							
								
								
									
										667
									
								
								src/parser.rs
									
									
									
									
									
								
							
							
						
						
									
										667
									
								
								src/parser.rs
									
									
									
									
									
								
							| @@ -1,81 +1,598 @@ | |||||||
| // © 2023 John Breaux | // © 2023-2024 John Breaux | ||||||
| //! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root) | //See LICENSE.md for license | ||||||
|  | //! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast) | ||||||
|  | pub mod ast; | ||||||
|  |  | ||||||
| use crate::{TokenStream, Type}; | use self::error::{ | ||||||
| use error::ParseError; |     Error, | ||||||
| use preamble::*; |     ErrorKind::{self, *}, | ||||||
| use std::{ |     PResult, Parsing, | ||||||
|     fmt::{Debug, Display}, |  | ||||||
|     path::Path, |  | ||||||
| }; | }; | ||||||
|  | use crate::{ | ||||||
|  |     lexer::{ | ||||||
|  |         token::{Reg, Special, Token, TokenKind as Kind}, | ||||||
|  |         Lexer, | ||||||
|  |     }, | ||||||
|  |     preprocessor::Preprocessor, | ||||||
|  |     span::Span, | ||||||
|  | }; | ||||||
|  | use ast::*; | ||||||
|  |  | ||||||
| pub mod preamble { | #[derive(Clone, Debug)] | ||||||
|     //! All the different AST node types | pub struct Parser<'t> { | ||||||
|     use super::*; |     lexer: Preprocessor<'t>, | ||||||
|     // Traits |     next: Option<Token<'t>>, | ||||||
|     pub use parsable::Parsable; |     loc: Span<usize>, | ||||||
|     // Nodes | } | ||||||
|     pub use comment::Comment; |  | ||||||
|     pub use directive::Directive; | impl<'t> Parser<'t> { | ||||||
|     pub use identifier::Identifier; |     /// Creates a new [Parser] | ||||||
|     pub use instruction::{ |     pub fn new(text: &'t str) -> Self { | ||||||
|         encoding::{ |         let lexer = Preprocessor::new(text); | ||||||
|             encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand, |         Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer } | ||||||
|             register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding, |     } | ||||||
|         }, |     /// Createes a new [Parser] from an existing [Lexer] | ||||||
|         opcode::Opcode, |     pub fn with_lexer(lexer: Lexer<'t>) -> Self { | ||||||
|  |         let lexer = Preprocessor::with_lexer(lexer); | ||||||
|  |         Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn parse<T: Parsable<'t>>(&mut self) -> PResult<T> { | ||||||
|  |         Parsable::parse_with(self) | ||||||
|  |     } | ||||||
|  |     pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error { | ||||||
|  |         Error { parsing, kind, loc: self.loc } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Peek a token out of the lexer | ||||||
|  |     pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> { | ||||||
|  |         if self.next.is_none() { | ||||||
|  |             self.next = self.lexer.scan(); | ||||||
|  |         } | ||||||
|  |         self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p)) | ||||||
|  |     } | ||||||
|  |     pub fn next(&mut self, p: Parsing) -> PResult<Token<'t>> { | ||||||
|  |         Ok(match self.take() { | ||||||
|  |             Some(token) => token, | ||||||
|  |             None => { | ||||||
|  |                 self.peek(p)?; | ||||||
|  |                 self.take().expect("should have been populated by peek") | ||||||
|  |             } | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |     /// Consumes the next token | ||||||
|  |     pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> { | ||||||
|  |         match self.peek(p)?.kind { | ||||||
|  |             kind if kind == expect => { | ||||||
|  |                 self.take(); | ||||||
|  |                 Ok(self) | ||||||
|  |             } | ||||||
|  |             kind => Err(self.error(Unexpected(kind), p)), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     /// Consumes the next token without checking it | ||||||
|  |     pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> { | ||||||
|  |         self.next(p)?; | ||||||
|  |         Ok(self) | ||||||
|  |     } | ||||||
|  |     /// Take the last peeked token | ||||||
|  |     pub fn take(&mut self) -> Option<Token<'t>> { | ||||||
|  |         self.next.take() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Expressions | ||||||
|  | impl<'t> Parser<'t> { | ||||||
|  |     /// Parses an expression | ||||||
|  |     pub fn expr(&mut self) -> PResult<Expr<'t>> { | ||||||
|  |         self.term() | ||||||
|  |     } | ||||||
|  |     /// Parses a term-expression (binary `*`mul, `/`div, `%`rem) | ||||||
|  |     pub fn term(&mut self) -> PResult<Expr<'t>> { | ||||||
|  |         let p = Parsing::Expr; | ||||||
|  |         let a = self.factor()?; | ||||||
|  |         let mut other = vec![]; | ||||||
|  |         loop { | ||||||
|  |             match self.peek(p)?.kind { | ||||||
|  |                 Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)), | ||||||
|  |                 Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)), | ||||||
|  |                 Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)), | ||||||
|  |                 _ if other.is_empty() => break Ok(a), | ||||||
|  |                 _ => break Ok(Expr::Binary(a.into(), other)), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     /// Parses a factor expression (binary `+`add, `-`sub) | ||||||
|  |     pub fn factor(&mut self) -> PResult<Expr<'t>> { | ||||||
|  |         let p = Parsing::Expr; | ||||||
|  |         let a = self.shift()?; | ||||||
|  |         let mut other = vec![]; | ||||||
|  |         loop { | ||||||
|  |             match self.peek(p)?.kind { | ||||||
|  |                 Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)), | ||||||
|  |                 Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)), | ||||||
|  |                 _ if other.is_empty() => break Ok(a), | ||||||
|  |                 _ => break Ok(Expr::Binary(a.into(), other)), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     /// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right) | ||||||
|  |     pub fn shift(&mut self) -> PResult<Expr<'t>> { | ||||||
|  |         let p = Parsing::Expr; | ||||||
|  |         let a = self.bin()?; | ||||||
|  |         let mut other = vec![]; | ||||||
|  |         loop { | ||||||
|  |             match self.peek(p)?.kind { | ||||||
|  |                 Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)), | ||||||
|  |                 Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)), | ||||||
|  |                 _ if other.is_empty() => break Ok(a), | ||||||
|  |                 _ => break Ok(Expr::Binary(a.into(), other)), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     pub fn bin(&mut self) -> PResult<Expr<'t>> { | ||||||
|  |         let p = Parsing::Expr; | ||||||
|  |         let a = self.unary()?; | ||||||
|  |         let mut other = vec![]; | ||||||
|  |         loop { | ||||||
|  |             match self.peek(p)?.kind { | ||||||
|  |                 Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)), | ||||||
|  |                 Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)), | ||||||
|  |                 Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)), | ||||||
|  |                 _ if other.is_empty() => break Ok(a), | ||||||
|  |                 _ => break Ok(Expr::Binary(a.into(), other)), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     /// Parses a unary expression (`!`invert, `-`negate) | ||||||
|  |     pub fn unary(&mut self) -> PResult<Expr<'t>> { | ||||||
|  |         let p = Parsing::Expr; | ||||||
|  |         let mut ops = vec![]; | ||||||
|  |         loop { | ||||||
|  |             match self.peek(p)?.kind { | ||||||
|  |                 Kind::Star => ops.push(UnOp::Deref), | ||||||
|  |                 Kind::Minus => ops.push(UnOp::Neg), | ||||||
|  |                 Kind::Bang => ops.push(UnOp::Not), | ||||||
|  |                 _ if ops.is_empty() => break Ok(self.primary()?), | ||||||
|  |                 _ => break Ok(Expr::Unary(ops, self.primary()?.into())), | ||||||
|  |             } | ||||||
|  |             self.take(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     /// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier | ||||||
|  |     pub fn primary(&mut self) -> PResult<Expr<'t>> { | ||||||
|  |         let p = Parsing::Expr; | ||||||
|  |         let Token { lexeme, kind, .. } = *self.peek(p)?; | ||||||
|  |         Ok(match kind { | ||||||
|  |             Kind::OpenParen => { | ||||||
|  |                 let out = Expr::Group(self.then(p)?.parse()?); | ||||||
|  |                 self.assert(Kind::CloseParen, p)?; | ||||||
|  |                 out | ||||||
|  |             } | ||||||
|  |             Kind::Number(n, _) => { | ||||||
|  |                 self.take(); | ||||||
|  |                 Expr::Number(n) | ||||||
|  |             } | ||||||
|  |             Kind::Identifier => { | ||||||
|  |                 self.take(); | ||||||
|  |                 Expr::Ident(lexeme) | ||||||
|  |             } | ||||||
|  |             Kind::Amp => self.then(p)?.addrof()?, | ||||||
|  |             ty => Err(self.error(NonNumeric(ty), p))?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |     pub fn addrof(&mut self) -> PResult<Expr<'t>> { | ||||||
|  |         let p = Parsing::Expr; | ||||||
|  |         let token = self.peek(p)?; | ||||||
|  |         let out = match token.kind { | ||||||
|  |             Kind::Identifier => Expr::AddrOf(token.lexeme), | ||||||
|  |             Kind::Number(n, _) => Expr::Number(n), | ||||||
|  |             ty => Err(self.error(Unexpected(ty), p))?, | ||||||
|  |         }; | ||||||
|  |         self.take(); | ||||||
|  |         Ok(out) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub trait Parsable<'t>: Sized { | ||||||
|  |     fn parse(text: &'t str) -> PResult<Self> { | ||||||
|  |         Self::parse_with(&mut Parser::new(text)) | ||||||
|  |     } | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self>; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t> Parsable<'t> for Statements<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let mut stmts = vec![]; | ||||||
|  |         while p.peek(Parsing::File)?.kind != Kind::Eof { | ||||||
|  |             stmts.push(p.parse()?) | ||||||
|  |         } | ||||||
|  |         Ok(Self { stmts }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t> Parsable<'t> for Statement<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let token = *p.peek(Parsing::Stmt)?; | ||||||
|  |         Ok(match token.kind { | ||||||
|  |             Kind::Comment => { | ||||||
|  |                 p.take(); | ||||||
|  |                 Statement::Comment(token.lexeme) | ||||||
|  |             } | ||||||
|  |             Kind::Directive => Statement::Directive(p.parse()?), | ||||||
|  |             Kind::Identifier => Statement::Label(p.label()?), | ||||||
|  |             _ => Statement::Insn(p.parse()?), | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Directive<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let parsing = Parsing::Directive; | ||||||
|  |         let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?; | ||||||
|  |         let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) }; | ||||||
|  |         p.take(); | ||||||
|  |         Ok(match lexeme { | ||||||
|  |             ".define" => Directive::Define(p.parse()?), | ||||||
|  |             ".org" => Directive::Org(p.expr()?.into()), | ||||||
|  |             ".word" => Directive::Word(p.parse()?), | ||||||
|  |             ".words" => Directive::Words(p.parse()?), | ||||||
|  |             ".string" => Directive::String(p.string()?), | ||||||
|  |             _ => Err(p.error(Unexpected(Kind::Directive), parsing))?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Vec<Token<'t>> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let parsing = Parsing::Directive; | ||||||
|  |         let mut tokens = vec![]; | ||||||
|  |         loop { | ||||||
|  |             if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             tokens.push(p.next(parsing)?) | ||||||
|  |         } | ||||||
|  |         p.take(); | ||||||
|  |         Ok(tokens) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Instruction<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let start = p.peek(Parsing::Instruction)?.pos.start; | ||||||
|  |         Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for InstructionKind<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         use crate::lexer::token::OneArg; | ||||||
|  |         // an instruction starts with an opcode | ||||||
|  |         Ok(match p.peek(Parsing::Instruction)?.kind() { | ||||||
|  |             Kind::NoEm(_) => Self::NoEm(p.parse()?), | ||||||
|  |             Kind::OneEm(_) => Self::OneEm(p.parse()?), | ||||||
|  |             Kind::Special(Special::Br) => Self::Br(p.parse()?), | ||||||
|  |             Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?), | ||||||
|  |             Kind::OneArg(_) => Self::OneArg(p.parse()?), | ||||||
|  |             Kind::TwoArg(_) => Self::TwoArg(p.parse()?), | ||||||
|  |             Kind::Jump(_) => Self::Jump(p.parse()?), | ||||||
|  |             ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for NoEm { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         match p.next(Parsing::NoEm)?.kind { | ||||||
|  |             Kind::NoEm(opcode) => Ok(Self { opcode }), | ||||||
|  |             ty => Err(p.error(Unexpected(ty), Parsing::NoEm)), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for OneEm<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         Ok(Self { | ||||||
|  |             opcode: match p.next(Parsing::OneEm)?.kind { | ||||||
|  |                 Kind::OneEm(opcode) => opcode, | ||||||
|  |                 ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?, | ||||||
|  |             }, | ||||||
|  |             width: p.parse()?, | ||||||
|  |             dst: p.parse()?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for OneArg<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         Ok(Self { | ||||||
|  |             opcode: match p.next(Parsing::OneArg)?.kind { | ||||||
|  |                 Kind::OneArg(opcode) => opcode, | ||||||
|  |                 ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?, | ||||||
|  |             }, | ||||||
|  |             width: p.parse()?, | ||||||
|  |             src: p.parse()?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for TwoArg<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let parsing = Parsing::TwoArg; | ||||||
|  |         Ok(Self { | ||||||
|  |             opcode: match p.next(parsing)?.kind { | ||||||
|  |                 Kind::TwoArg(opcode) => opcode, | ||||||
|  |                 ty => Err(p.error(Unexpected(ty), parsing))?, | ||||||
|  |             }, | ||||||
|  |             width: p.parse()?, | ||||||
|  |             src: p.parse()?, | ||||||
|  |             dst: p.assert(Kind::Comma, parsing)?.parse()?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Jump<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let parsing = Parsing::Jump; | ||||||
|  |         Ok(Self { | ||||||
|  |             opcode: match p.next(parsing)?.kind { | ||||||
|  |                 Kind::Jump(opcode) => opcode, | ||||||
|  |                 ty => Err(p.error(Unexpected(ty), parsing))?, | ||||||
|  |             }, | ||||||
|  |             dst: p.parse()?, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Reti { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         use crate::lexer::token::OneArg; | ||||||
|  |         p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?; | ||||||
|  |         Ok(Reti) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Br<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         p.assert(Kind::Special(Special::Br), Parsing::Br)?; | ||||||
|  |         Ok(Self { src: p.parse()? }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t> Parsable<'t> for Src<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let parsing = Parsing::Src; | ||||||
|  |         Ok(match p.peek(parsing)?.kind { | ||||||
|  |             Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special | ||||||
|  |             Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?),   // &addr | ||||||
|  |             Kind::At => { | ||||||
|  |                 let reg = match p.then(parsing)?.next(parsing)?.kind { | ||||||
|  |                     Kind::Reg(r) => r, | ||||||
|  |                     ty => Err(p.error(Unexpected(ty), parsing))?, | ||||||
|  |                 }; | ||||||
|  |                 if let Kind::Plus = p.peek(parsing)?.kind { | ||||||
|  |                     p.take(); | ||||||
|  |                     Src::PostInc(reg) | ||||||
|  |                 } else { | ||||||
|  |                     Src::Indirect(reg) | ||||||
|  |                 } | ||||||
|  |             } // @reg+, @reg | ||||||
|  |             Kind::Reg(_) => Src::Direct(p.parse()?), | ||||||
|  |             _ => { | ||||||
|  |                 let expr = p.parse()?; | ||||||
|  |                 match p.peek(parsing)?.kind { | ||||||
|  |                     Kind::OpenParen => Src::Indexed(expr, { | ||||||
|  |                         let reg = p.assert(Kind::OpenParen, parsing)?.reg()?; | ||||||
|  |                         p.assert(Kind::CloseParen, parsing)?; | ||||||
|  |                         reg | ||||||
|  |                     }), | ||||||
|  |                     _ => Src::BareExpr(expr), | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Dst<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let parsing = Parsing::Dst; | ||||||
|  |         Ok(match p.peek(parsing)?.kind { | ||||||
|  |             Kind::Hash => match p.then(parsing)?.next(parsing)?.kind { | ||||||
|  |                 Kind::Number(0, _) => Dst::Special(DstSpecial::Zero), | ||||||
|  |                 Kind::Number(1, _) => Dst::Special(DstSpecial::One), | ||||||
|  |                 Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?, | ||||||
|  |                 ty => Err(p.error(Unexpected(ty), parsing))?, | ||||||
|  |             }, | ||||||
|  |             Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?), | ||||||
|  |             Kind::Reg(_) => Dst::Direct(p.parse()?), | ||||||
|  |             _ => Dst::Indexed(p.expr()?.into(), { | ||||||
|  |                 let reg = p.assert(Kind::OpenParen, parsing)?.reg()?; | ||||||
|  |                 p.assert(Kind::CloseParen, parsing)?; | ||||||
|  |                 reg | ||||||
|  |             }), | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for JumpDst<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let parsing = Parsing::Jump; | ||||||
|  |         let mut neg = false; | ||||||
|  |         let out = loop { | ||||||
|  |             let token = p.peek(parsing)?; | ||||||
|  |             match token.kind { | ||||||
|  |                 Kind::Minus => { | ||||||
|  |                     neg = !neg; | ||||||
|  |                 } | ||||||
|  |                 Kind::Plus => {} | ||||||
|  |                 Kind::Identifier => break Self::Label(token.lexeme), | ||||||
|  |                 Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }), | ||||||
|  |                 ty => Err(p.error(Unexpected(ty), parsing))?, | ||||||
|  |             } | ||||||
|  |             p.take(); | ||||||
|  |         }; | ||||||
|  |         p.take(); | ||||||
|  |         Ok(out) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Width { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let out = match p.peek(Parsing::Width)?.kind() { | ||||||
|  |             Kind::Byte => Width::Byte, | ||||||
|  |             Kind::Word => Width::Word, | ||||||
|  |             _ => return Ok(Width::Word), | ||||||
|  |         }; | ||||||
|  |         p.take(); | ||||||
|  |         Ok(out) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Reg { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let out = match p.peek(Parsing::Reg)?.kind { | ||||||
|  |             Kind::Reg(r) => r, | ||||||
|  |             ty => Err(p.error(Unexpected(ty), Parsing::Reg))?, | ||||||
|  |         }; | ||||||
|  |         p.take(); | ||||||
|  |         Ok(out) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t> Parsable<'t> for Expr<'t> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         p.expr() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t, T: Parsable<'t>> Parsable<'t> for Box<T> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         Ok(Box::new(p.parse()?)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<'t, T: Parsable<'t>> Parsable<'t> for Vec<T> { | ||||||
|  |     fn parse_with(p: &mut Parser<'t>) -> PResult<Self> { | ||||||
|  |         let parsing = Parsing::Vec; | ||||||
|  |         p.assert(Kind::OpenBrace, parsing)?; | ||||||
|  |         let mut out = vec![]; | ||||||
|  |         while Kind::CloseBrace != p.peek(parsing)?.kind { | ||||||
|  |             out.push(p.parse()?) | ||||||
|  |         } | ||||||
|  |         p.assert(Kind::CloseBrace, parsing)?; | ||||||
|  |         Ok(out) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | /// Context-sensitive parsing rules | ||||||
|  | impl<'t> Parser<'t> { | ||||||
|  |     pub fn string(&mut self) -> PResult<&'t str> { | ||||||
|  |         let token = *self.peek(Parsing::Directive)?; | ||||||
|  |         match token.kind { | ||||||
|  |             Kind::String => { | ||||||
|  |                 self.take(); | ||||||
|  |                 Ok(&token.lexeme[1..token.lexeme.len() - 1]) | ||||||
|  |             } | ||||||
|  |             ty => Err(self.error(Unexpected(ty), Parsing::Directive)), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     pub fn label(&mut self) -> PResult<&'t str> { | ||||||
|  |         let p = Parsing::Label; | ||||||
|  |         let token = self.next(p)?; | ||||||
|  |         assert_eq!(Kind::Identifier, token.kind); | ||||||
|  |         self.assert(Kind::Colon, p)?; | ||||||
|  |         Ok(token.lexeme) | ||||||
|  |     } | ||||||
|  |     pub fn reg(&mut self) -> PResult<Reg> { | ||||||
|  |         match self.peek(Parsing::Reg)?.kind { | ||||||
|  |             Kind::Reg(r) => { | ||||||
|  |                 self.take(); | ||||||
|  |                 Ok(r) | ||||||
|  |             } | ||||||
|  |             ty => Err(self.error(Unexpected(ty), Parsing::Reg)), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub mod error { | ||||||
|  |     use super::Kind; | ||||||
|  |     use crate::span::Span; | ||||||
|  |     use std::{fmt::Display, num::TryFromIntError}; | ||||||
|  |  | ||||||
|  |     pub type PResult<T> = Result<T, Error>; | ||||||
|  |  | ||||||
|  |     #[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||||||
|  |     pub struct Error { | ||||||
|  |         pub parsing: Parsing, | ||||||
|  |         pub kind: ErrorKind, | ||||||
|  |         pub loc: Span<usize>, | ||||||
|  |     } | ||||||
|  |     #[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||||||
|  |     pub enum ErrorKind { | ||||||
|  |         LexError, | ||||||
|  |         /// Returned when [Parsing::Expr] fails without consuming | ||||||
|  |         NotExpr, | ||||||
|  |         DivZero, | ||||||
|  |         NonNumeric(Kind), | ||||||
|  |         BadIntForDst(u16), | ||||||
|  |         TryFromIntError(TryFromIntError), | ||||||
|  |         Unexpected(Kind), | ||||||
|  |         BufEmpty, | ||||||
|  |         Todo, | ||||||
|  |     } | ||||||
|  |     #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  |     pub enum Parsing { | ||||||
|  |         File, | ||||||
|  |         Stmt, | ||||||
|  |  | ||||||
|  |         Label, | ||||||
|  |         Directive, | ||||||
|         Instruction, |         Instruction, | ||||||
|     }; |  | ||||||
|     pub use label::Label; |         NoEm, | ||||||
|     pub use line::Line; |         OneEm, | ||||||
|     pub use root::Root; |         Reti, | ||||||
|     // Error |         Br, | ||||||
|     pub use error::ParseError; |         OneArg, | ||||||
|  |         TwoArg, | ||||||
|  |         Jump, | ||||||
|  |  | ||||||
|  |         Width, | ||||||
|  |         Src, | ||||||
|  |         Dst, | ||||||
|  |         Reg, | ||||||
|  |  | ||||||
|  |         Expr, | ||||||
|  |         Vec, | ||||||
|  |     } | ||||||
|  |     impl Display for Error { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for ErrorKind { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 ErrorKind::LexError => write!(f, "lexical error"), | ||||||
|  |                 ErrorKind::TryFromIntError(e) => write!(f, "{e}"), | ||||||
|  |                 ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"), | ||||||
|  |                 ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"), | ||||||
|  |                 ErrorKind::DivZero => write!(f, "Division by zero"), | ||||||
|  |                 ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"), | ||||||
|  |                 ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"), | ||||||
|  |                 ErrorKind::BufEmpty => write!(f, "Peek buffer empty"), | ||||||
|  |                 ErrorKind::Todo => write!(f, "Not yet implemented"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for Parsing { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 Parsing::File => "a file".fmt(f), | ||||||
|  |                 Parsing::Stmt => "a line".fmt(f), | ||||||
|  |                 Parsing::Label => "a label".fmt(f), | ||||||
|  |                 Parsing::Directive => "a directive".fmt(f), | ||||||
|  |                 Parsing::Instruction => "an instruction".fmt(f), | ||||||
|  |                 Parsing::NoEm => "a no-operand emulated instruction".fmt(f), | ||||||
|  |                 Parsing::OneEm => "a one-operand emulated instruction".fmt(f), | ||||||
|  |                 Parsing::Reti => "a `reti` instruction".fmt(f), | ||||||
|  |                 Parsing::Br => "a `br` instruction".fmt(f), | ||||||
|  |                 Parsing::OneArg => "a one-operand instruction".fmt(f), | ||||||
|  |                 Parsing::TwoArg => "a two-operand instruction".fmt(f), | ||||||
|  |                 Parsing::Jump => "a jump instruction".fmt(f), | ||||||
|  |                 Parsing::Width => "an instruction width".fmt(f), | ||||||
|  |                 Parsing::Src => "a source".fmt(f), | ||||||
|  |                 Parsing::Dst => "a destination".fmt(f), | ||||||
|  |                 Parsing::Reg => "a register".fmt(f), | ||||||
|  |                 Parsing::Expr => "a constant expression".fmt(f), | ||||||
|  |                 Parsing::Vec => "a list".fmt(f), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl std::error::Error for Error {} | ||||||
| } | } | ||||||
|  |  | ||||||
| pub mod parsable; | #[cfg(test)] | ||||||
|  | mod tests; | ||||||
| pub mod comment; |  | ||||||
| pub mod directive; |  | ||||||
| pub mod error; |  | ||||||
| pub mod identifier; |  | ||||||
| pub mod instruction; |  | ||||||
| pub mod label; |  | ||||||
| pub mod line; |  | ||||||
| pub mod root; |  | ||||||
|  |  | ||||||
| pub struct Parser { |  | ||||||
|     radix: u32, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parser { |  | ||||||
|     pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result<Root, ParseError> { |  | ||||||
|         Root::parse(&self, &mut stream.ignore(Type::Space)) |  | ||||||
|     } |  | ||||||
|     pub fn parse<T>(self, input: &T) -> Result<Root, ParseError> |  | ||||||
|     where T: AsRef<str> + ?Sized { |  | ||||||
|         Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space)) |  | ||||||
|     } |  | ||||||
|     pub fn parse_file<P>(self, path: &P) -> Result<Root, ParseError> |  | ||||||
|     where P: AsRef<Path> + ?Sized { |  | ||||||
|         self.parse(&std::fs::read_to_string(path.as_ref())?).map(|r| r.set_file(path.as_ref().into())) |  | ||||||
|     } |  | ||||||
|     pub fn parse_one<T>(self, input: &T) -> Result<Line, ParseError> |  | ||||||
|     where T: AsRef<str> + ?Sized { |  | ||||||
|         Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space)) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number] |  | ||||||
|     /// conversion |  | ||||||
|     pub fn radix(mut self, radix: u32) { self.radix = radix; } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Default for Parser { |  | ||||||
|     fn default() -> Self { Self { radix: 16 } } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Debug for Parser { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|   | |||||||
							
								
								
									
										680
									
								
								src/parser/ast.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										680
									
								
								src/parser/ast.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,680 @@ | |||||||
|  | // © 2023-2024 John Breaux | ||||||
|  | //See LICENSE.md for license | ||||||
|  | /// Represents MSP430 instructions, | ||||||
|  | use crate::{ | ||||||
|  |     lexer::token::{self, Reg, Token}, | ||||||
|  |     span::Span, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct Statements<'t> { | ||||||
|  |     pub stmts: Vec<Statement<'t>>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum Statement<'t> { | ||||||
|  |     Label(&'t str), | ||||||
|  |     Insn(Instruction<'t>), | ||||||
|  |     Directive(Directive<'t>), | ||||||
|  |     Comment(&'t str), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum Directive<'t> { | ||||||
|  |     /// TODO: Store define as a vec of tokens. This will require help from the | ||||||
|  |     /// [preprocessor](crate::preprocessor) | ||||||
|  |     Define(Vec<Token<'t>>), | ||||||
|  |     Org(Box<Expr<'t>>), | ||||||
|  |     Word(Box<Expr<'t>>), | ||||||
|  |     Words(Vec<Expr<'t>>), | ||||||
|  |     String(&'t str), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct Instruction<'t> { | ||||||
|  |     pub span: Span<usize>, | ||||||
|  |     pub kind: InstructionKind<'t>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum InstructionKind<'t> { | ||||||
|  |     NoEm(NoEm), | ||||||
|  |     OneEm(OneEm<'t>), | ||||||
|  |     OneArg(OneArg<'t>), | ||||||
|  |     TwoArg(TwoArg<'t>), | ||||||
|  |     Jump(Jump<'t>), | ||||||
|  |     Reti(Reti), | ||||||
|  |     Br(Br<'t>), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct NoEm { | ||||||
|  |     pub opcode: token::NoEm, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct OneEm<'t> { | ||||||
|  |     pub opcode: token::OneEm, | ||||||
|  |     pub width: Width, | ||||||
|  |     pub dst: Dst<'t>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct OneArg<'t> { | ||||||
|  |     pub opcode: token::OneArg, | ||||||
|  |     pub width: Width, | ||||||
|  |     pub src: Src<'t>, | ||||||
|  | } | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct TwoArg<'t> { | ||||||
|  |     pub opcode: token::TwoArg, | ||||||
|  |     pub width: Width, | ||||||
|  |     pub src: Src<'t>, | ||||||
|  |     pub dst: Dst<'t>, | ||||||
|  | } | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct Jump<'t> { | ||||||
|  |     pub opcode: token::Jump, | ||||||
|  |     pub dst: JumpDst<'t>, | ||||||
|  | } | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct Reti; | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub struct Br<'t> { | ||||||
|  |     pub src: Src<'t>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum Width { | ||||||
|  |     #[default] | ||||||
|  |     Word, | ||||||
|  |     Byte, | ||||||
|  | } | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum Src<'t> { | ||||||
|  |     Direct(Reg), | ||||||
|  |     Indexed(Box<Expr<'t>>, Reg), | ||||||
|  |     Indirect(Reg), | ||||||
|  |     PostInc(Reg), | ||||||
|  |     Absolute(Box<Expr<'t>>), | ||||||
|  |     Immediate(Box<Expr<'t>>), | ||||||
|  |     Special(SrcSpecial), | ||||||
|  |     BareExpr(Box<Expr<'t>>), | ||||||
|  | } | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum SrcSpecial { | ||||||
|  |     Zero, | ||||||
|  |     One, | ||||||
|  |     Four, | ||||||
|  |     Two, | ||||||
|  |     Eight, | ||||||
|  |     NegOne, | ||||||
|  | } | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum Dst<'t> { | ||||||
|  |     Direct(Reg), | ||||||
|  |     Indexed(Box<Expr<'t>>, Reg), | ||||||
|  |     Absolute(Box<Expr<'t>>), | ||||||
|  |     Special(DstSpecial), | ||||||
|  | } | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum DstSpecial { | ||||||
|  |     Zero, | ||||||
|  |     One, | ||||||
|  | } | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum JumpDst<'t> { | ||||||
|  |     /// A relative offset, nominally an even number from -0x400..=0x3fe | ||||||
|  |     Rel(i16), | ||||||
|  |     Label(&'t str), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum Expr<'t> { | ||||||
|  |     Binary(Box<Expr<'t>>, Vec<(BinOp, Expr<'t>)>), | ||||||
|  |     Unary(Vec<UnOp>, Box<Expr<'t>>), | ||||||
|  |     Group(Box<Expr<'t>>), | ||||||
|  |     Number(u16), | ||||||
|  |     Ident(&'t str), | ||||||
|  |     AddrOf(&'t str), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum BinOp { | ||||||
|  |     Mul, | ||||||
|  |     Div, | ||||||
|  |     Rem, | ||||||
|  |     Add, | ||||||
|  |     Sub, | ||||||
|  |     Lsh, | ||||||
|  |     Rsh, | ||||||
|  |     And, | ||||||
|  |     Xor, | ||||||
|  |     Or, | ||||||
|  | } | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] | ||||||
|  | pub enum UnOp { | ||||||
|  |     Deref, | ||||||
|  |     Not, | ||||||
|  |     Neg, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub mod conv { | ||||||
|  |     //! Conversions between [ast](super) types, via [From], or via `new` constructor | ||||||
|  |     use super::{InstructionKind as Ik, *}; | ||||||
|  |  | ||||||
|  |     macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$( | ||||||
|  |         impl<'t> From<$src> for $dst { | ||||||
|  |             fn from(value: $src) -> Self { | ||||||
|  |                 $expr(value) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     )*}} | ||||||
|  |     // sure am glad macros aren't hygenic over lifetimes | ||||||
|  |     impl_from! { Ik<'t> { | ||||||
|  |         NoEm => Ik::NoEm, | ||||||
|  |         OneEm<'t> => Ik::OneEm, | ||||||
|  |         OneArg<'t> => Ik::OneArg, | ||||||
|  |         TwoArg<'t> => Ik::TwoArg, | ||||||
|  |         Jump<'t> => Ik::Jump, | ||||||
|  |         Reti => Ik::Reti, | ||||||
|  |         Br<'t> => Ik::Br, | ||||||
|  |     }} | ||||||
|  |     impl_from! { Expr<'t> { | ||||||
|  |         u16 => Expr::Number | ||||||
|  |     }} | ||||||
|  |     impl<'t> From<Dst<'t>> for Src<'t> { | ||||||
|  |         fn from(value: Dst<'t>) -> Self { | ||||||
|  |             match value { | ||||||
|  |                 Dst::Special(v) => Src::Special(v.into()), | ||||||
|  |                 Dst::Absolute(v) => Src::Absolute(v), | ||||||
|  |                 Dst::Indexed(i, r) => Src::Indexed(i, r), | ||||||
|  |                 Dst::Direct(r) => Src::Direct(r), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl From<DstSpecial> for SrcSpecial { | ||||||
|  |         fn from(value: DstSpecial) -> Self { | ||||||
|  |             match value { | ||||||
|  |                 DstSpecial::Zero => SrcSpecial::Zero, | ||||||
|  |                 DstSpecial::One => SrcSpecial::One, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> TwoArg<'t> { | ||||||
|  |         pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self { | ||||||
|  |             Self { opcode, width, src, dst } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | pub mod display { | ||||||
|  |     use super::*; | ||||||
|  |     use std::fmt::Display; | ||||||
|  |  | ||||||
|  |     impl<'t> Display for Statements<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             for stmt in &self.stmts { | ||||||
|  |                 writeln!(f, "{stmt}")?; | ||||||
|  |             } | ||||||
|  |             Ok(()) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for Statement<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 Statement::Label(v) => write!(f, "{v}:"), | ||||||
|  |                 Statement::Insn(v) => write!(f, "{v}"), | ||||||
|  |                 Statement::Directive(v) => write!(f, "{v}"), | ||||||
|  |                 Statement::Comment(v) => write!(f, "{v}"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for Directive<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 Directive::Define(_) => write!(f, ".directive"), | ||||||
|  |                 Directive::Org(e) => write!(f, ".org {e}"), | ||||||
|  |                 Directive::Word(w) => write!(f, ".word {w}"), | ||||||
|  |                 Directive::Words(words) => { | ||||||
|  |                     write!(f, ".words [ ")?; | ||||||
|  |                     for word in words { | ||||||
|  |                         write!(f, "{word} ")?; | ||||||
|  |                     } | ||||||
|  |                     write!(f, "]") | ||||||
|  |                 } | ||||||
|  |                 Directive::String(s) => write!(f, ".string \"{s}\""), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for Instruction<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let Self { span: _, kind } = self; | ||||||
|  |             write!(f, "{kind}") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for InstructionKind<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 InstructionKind::NoEm(v) => v.fmt(f), | ||||||
|  |                 InstructionKind::OneEm(v) => v.fmt(f), | ||||||
|  |                 InstructionKind::OneArg(v) => v.fmt(f), | ||||||
|  |                 InstructionKind::TwoArg(v) => v.fmt(f), | ||||||
|  |                 InstructionKind::Jump(v) => v.fmt(f), | ||||||
|  |                 InstructionKind::Reti(v) => v.fmt(f), | ||||||
|  |                 InstructionKind::Br(v) => v.fmt(f), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for NoEm { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let Self { opcode } = self; | ||||||
|  |             write!(f, "{opcode}") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for OneEm<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let Self { opcode, width, dst } = self; | ||||||
|  |             write!(f, "{opcode}{width}\t{dst}") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for OneArg<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let Self { opcode, width, src } = self; | ||||||
|  |             write!(f, "{opcode}{width}\t{src}") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for TwoArg<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let Self { opcode, width, src, dst } = self; | ||||||
|  |             write!(f, "{opcode}{width}\t{src}, {dst}") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for Jump<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let Self { opcode, dst } = self; | ||||||
|  |             write!(f, "{opcode}\t{dst}") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for Reti { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             write!(f, "reti") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for Br<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             let Self { src } = self; | ||||||
|  |             write!(f, "br\t{src}") | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl<'t> Display for Src<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 Src::Direct(r) => write!(f, "{r}"), | ||||||
|  |                 Src::Indexed(e, r) => write!(f, "{e}({r})"), | ||||||
|  |                 Src::Indirect(r) => write!(f, "@{r}"), | ||||||
|  |                 Src::PostInc(r) => write!(f, "@{r}+"), | ||||||
|  |                 Src::Absolute(e) => write!(f, "&{e}"), | ||||||
|  |                 Src::Immediate(e) => write!(f, "#{e}"), | ||||||
|  |                 Src::Special(i) => write!(f, "#{i}"), | ||||||
|  |                 Src::BareExpr(id) => write!(f, "{id}"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for SrcSpecial { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 SrcSpecial::Zero => write!(f, "0"), | ||||||
|  |                 SrcSpecial::One => write!(f, "1"), | ||||||
|  |                 SrcSpecial::Four => write!(f, "4"), | ||||||
|  |                 SrcSpecial::Two => write!(f, "2"), | ||||||
|  |                 SrcSpecial::Eight => write!(f, "8"), | ||||||
|  |                 SrcSpecial::NegOne => write!(f, "-1"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for Dst<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 Dst::Direct(r) => write!(f, "{r}"), | ||||||
|  |                 Dst::Indexed(e, r) => write!(f, "{e}({r})"), | ||||||
|  |                 Dst::Absolute(e) => write!(f, "&{e}"), | ||||||
|  |                 Dst::Special(i) => write!(f, "#{i}"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for DstSpecial { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 DstSpecial::Zero => write!(f, "0"), | ||||||
|  |                 DstSpecial::One => write!(f, "1"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for JumpDst<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 JumpDst::Rel(i) => write!(f, "{i}"), | ||||||
|  |                 JumpDst::Label(l) => write!(f, "{l}"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Display for Expr<'t> { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 Expr::Binary(head, tail) => { | ||||||
|  |                     write!(f, "{head}")?; | ||||||
|  |                     for (op, tail) in tail { | ||||||
|  |                         write!(f, "{op}{tail}")?; | ||||||
|  |                     } | ||||||
|  |                     Ok(()) | ||||||
|  |                 } | ||||||
|  |                 Expr::Unary(ops, tail) => { | ||||||
|  |                     for op in ops { | ||||||
|  |                         write!(f, "{op}")? | ||||||
|  |                     } | ||||||
|  |                     write!(f, "{tail}") | ||||||
|  |                 } | ||||||
|  |                 Expr::Group(e) => write!(f, "({e})"), | ||||||
|  |                 Expr::Number(n) => write!(f, "{n:x}"), | ||||||
|  |                 Expr::Ident(n) => write!(f, "{n}"), | ||||||
|  |                 Expr::AddrOf(n) => write!(f, "&{n}"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for BinOp { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 BinOp::Mul => write!(f, "*"), | ||||||
|  |                 BinOp::Div => write!(f, "/"), | ||||||
|  |                 BinOp::Rem => write!(f, "%"), | ||||||
|  |                 BinOp::Add => write!(f, "+"), | ||||||
|  |                 BinOp::Sub => write!(f, "-"), | ||||||
|  |                 BinOp::Lsh => write!(f, "<<"), | ||||||
|  |                 BinOp::Rsh => write!(f, ">>"), | ||||||
|  |                 BinOp::And => write!(f, "&"), | ||||||
|  |                 BinOp::Xor => write!(f, "^"), | ||||||
|  |                 BinOp::Or => write!(f, "|"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for UnOp { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 UnOp::Deref => write!(f, "*"), | ||||||
|  |                 UnOp::Not => write!(f, "!"), | ||||||
|  |                 UnOp::Neg => write!(f, "-"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Display for Width { | ||||||
|  |         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |             match self { | ||||||
|  |                 Width::Word => Ok(()), | ||||||
|  |                 Width::Byte => write!(f, ".b"), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | pub mod canonical { | ||||||
|  |     use std::iter; | ||||||
|  |  | ||||||
|  |     use super::*; | ||||||
|  |     use token::TwoArg::*; | ||||||
|  |     pub trait Canonicalize { | ||||||
|  |         /// The output after canonicalization | ||||||
|  |         type Output; | ||||||
|  |         /// Transmutes Self into its "canonical" form. "Emulated" instructions are converted | ||||||
|  |         /// into their respective non-emulated forms. | ||||||
|  |         fn to_canonical(self) -> Self::Output; | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for Statements<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for Statement<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             match self { | ||||||
|  |                 Statement::Insn(i) => Self::Insn(i.to_canonical()), | ||||||
|  |                 _ => self, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for Instruction<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             Self { kind: self.kind.to_canonical(), ..self } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for InstructionKind<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             match self { | ||||||
|  |                 Self::NoEm(v) => Self::TwoArg(v.to_canonical()), | ||||||
|  |                 Self::OneEm(v) => Self::TwoArg(v.to_canonical()), | ||||||
|  |                 Self::Reti(v) => Self::Reti(v.to_canonical()), | ||||||
|  |                 Self::Br(v) => Self::TwoArg(v.to_canonical()), | ||||||
|  |                 Self::OneArg(v) => Self::OneArg(v.to_canonical()), | ||||||
|  |                 Self::TwoArg(v) => Self::TwoArg(v.to_canonical()), | ||||||
|  |                 Self::Jump(v) => Self::Jump(v.to_canonical()), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Canonicalize for NoEm { | ||||||
|  |         type Output = TwoArg<'static>; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             let Self { opcode } = self; | ||||||
|  |             use SrcSpecial::*; | ||||||
|  |             use Width::*; | ||||||
|  |             match opcode { | ||||||
|  |                 token::NoEm::Nop => { | ||||||
|  |                     TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Ret => { | ||||||
|  |                     TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Clrc => { | ||||||
|  |                     TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Clrz => { | ||||||
|  |                     TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Clrn => { | ||||||
|  |                     TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Setc => { | ||||||
|  |                     TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Setz => { | ||||||
|  |                     TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Setn => { | ||||||
|  |                     TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Dint => { | ||||||
|  |                     TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR)) | ||||||
|  |                 } | ||||||
|  |                 token::NoEm::Eint => { | ||||||
|  |                     TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR)) | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for OneEm<'t> { | ||||||
|  |         type Output = TwoArg<'t>; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             use SrcSpecial::*; | ||||||
|  |             let Self { opcode, width, dst } = self; | ||||||
|  |             match opcode { | ||||||
|  |                 token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst), | ||||||
|  |                 token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst), | ||||||
|  |                 token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst), | ||||||
|  |                 token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst), | ||||||
|  |                 token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst), | ||||||
|  |                 token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst), | ||||||
|  |                 token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst), | ||||||
|  |                 token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst), | ||||||
|  |                 token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst), | ||||||
|  |                 token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst), | ||||||
|  |                 token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst), | ||||||
|  |                 token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst), | ||||||
|  |                 token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for OneArg<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             let Self { opcode, width, src } = self; | ||||||
|  |             Self { | ||||||
|  |                 opcode, | ||||||
|  |                 width: match opcode { | ||||||
|  |                     token::OneArg::Call => Width::Word, | ||||||
|  |                     _ => width, | ||||||
|  |                 }, | ||||||
|  |                 src: src.to_canonical(), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for TwoArg<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             let Self { opcode, width, src, dst } = self; | ||||||
|  |             Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for Jump<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             let Self { opcode, dst } = self; | ||||||
|  |             Self { | ||||||
|  |                 opcode: match opcode { | ||||||
|  |                     token::Jump::Jnz => token::Jump::Jne, | ||||||
|  |                     token::Jump::Jz => token::Jump::Jeq, | ||||||
|  |                     token::Jump::Jnc => token::Jump::Jlo, | ||||||
|  |                     token::Jump::Jc => token::Jump::Jhs, | ||||||
|  |                     t => t, | ||||||
|  |                 }, | ||||||
|  |                 dst: dst.to_canonical(), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl Canonicalize for Reti { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             self | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for Br<'t> { | ||||||
|  |         type Output = TwoArg<'t>; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             let Self { src } = self; | ||||||
|  |             TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl<'t> Canonicalize for Src<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             use SrcSpecial::*; | ||||||
|  |             match self { | ||||||
|  |                 Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self, | ||||||
|  |                 Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r), | ||||||
|  |                 Src::Absolute(e) => Src::Absolute(e.to_canonical().into()), | ||||||
|  |                 Src::Immediate(e) => match e.to_canonical() { | ||||||
|  |                     Expr::Number(0) => Src::Special(Zero), | ||||||
|  |                     Expr::Number(1) => Src::Special(One), | ||||||
|  |                     Expr::Number(2) => Src::Special(Two), | ||||||
|  |                     Expr::Number(4) => Src::Special(Four), | ||||||
|  |                     Expr::Number(8) => Src::Special(Eight), | ||||||
|  |                     Expr::Number(0xffff) => Src::Special(NegOne), | ||||||
|  |                     expr => Src::Immediate(expr.into()), | ||||||
|  |                 }, | ||||||
|  |                 Src::BareExpr(_) => self, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for Dst<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             match self { | ||||||
|  |                 Dst::Direct(_) | Dst::Special(_) => self, | ||||||
|  |                 Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r), | ||||||
|  |                 Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for JumpDst<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             self | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     impl<'t> Canonicalize for Expr<'t> { | ||||||
|  |         type Output = Self; | ||||||
|  |         /// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number], | ||||||
|  |         /// this returns a single [Expr::Number]. If not, it evaluates until | ||||||
|  |         /// it runs into an unevaluatable leaf. | ||||||
|  |         fn to_canonical(self) -> Self::Output { | ||||||
|  |             match self { | ||||||
|  |                 Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self, | ||||||
|  |                 Expr::Group(e) => e.to_canonical(), | ||||||
|  |                 Expr::Unary(ops, tail) => { | ||||||
|  |                     let mut tail = match tail.to_canonical() { | ||||||
|  |                         Expr::Number(n) => n, | ||||||
|  |                         other => return other, | ||||||
|  |                     }; | ||||||
|  |                     // If the tail is dereferenced, canonicalization must halt, | ||||||
|  |                     // since we have no knowledge of memory layout | ||||||
|  |                     let mut ops = ops.into_iter(); | ||||||
|  |                     for op in ops.by_ref() { | ||||||
|  |                         tail = match op { | ||||||
|  |                             UnOp::Deref => { | ||||||
|  |                                 return Expr::Unary( | ||||||
|  |                                     iter::once(op).chain(ops).collect(), | ||||||
|  |                                     Box::new(tail.into()), | ||||||
|  |                                 ) | ||||||
|  |                             } | ||||||
|  |                             UnOp::Not => !tail, | ||||||
|  |                             UnOp::Neg => 0u16.wrapping_sub(tail), | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                     Expr::Number(tail) | ||||||
|  |                 } | ||||||
|  |                 Expr::Binary(head, tails) => { | ||||||
|  |                     let mut head = match head.to_canonical() { | ||||||
|  |                         Expr::Number(n) => n, | ||||||
|  |                         head => return Expr::Binary(head.into(), tails), | ||||||
|  |                     }; | ||||||
|  |                     let mut tails = tails.into_iter(); | ||||||
|  |                     for (op, tail) in &mut tails { | ||||||
|  |                         let tail = tail.to_canonical(); | ||||||
|  |                         // If the canonical tail isn't a number, rebuild and return | ||||||
|  |                         let Expr::Number(tail) = tail else { | ||||||
|  |                             return Expr::Binary( | ||||||
|  |                                 Box::new(head.into()), | ||||||
|  |                                 iter::once((op, tail)).chain(tails).collect(), | ||||||
|  |                             ); | ||||||
|  |                         }; | ||||||
|  |                         head = match op { | ||||||
|  |                             BinOp::Mul => head.wrapping_mul(tail), | ||||||
|  |                             BinOp::Div => head.wrapping_div(tail), | ||||||
|  |                             BinOp::Rem => head.wrapping_rem(tail), | ||||||
|  |                             BinOp::Add => head.wrapping_add(tail), | ||||||
|  |                             BinOp::Sub => head.wrapping_sub(tail), | ||||||
|  |                             BinOp::Lsh => head.wrapping_shl(tail as u32), | ||||||
|  |                             BinOp::Rsh => head.wrapping_shr(tail as u32), | ||||||
|  |                             BinOp::And => head & tail, | ||||||
|  |                             BinOp::Xor => head ^ tail, | ||||||
|  |                             BinOp::Or => head | tail, | ||||||
|  |                         }; | ||||||
|  |                     } | ||||||
|  |                     Expr::Number(head) | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -1,15 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`Comment`] stores the contents of a line comment, including the preceding `;` or `//` |  | ||||||
| use super::*; |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Comment(pub String); |  | ||||||
|  |  | ||||||
| impl Parsable for Comment { |  | ||||||
|     fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         Ok(Self(stream.expect(Type::Comment)?.lexeme().to_string())) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| impl Display for Comment { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) } |  | ||||||
| } |  | ||||||
| @@ -1,90 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and |  | ||||||
| //! [Linker](crate::Linker) |  | ||||||
|  |  | ||||||
| use std::path::PathBuf; |  | ||||||
|  |  | ||||||
| use super::*; |  | ||||||
| use crate::lexer::token::OwnedToken; |  | ||||||
|  |  | ||||||
| // TODO: Parse each kind of *postprocessor* directive into an AST node |  | ||||||
| //  - .org 8000:                Directive::Org { base: Number } |  | ||||||
| //  - .define ident tt...       Directive::Define { } ; should this be in the AST? How do I put this |  | ||||||
| //    in the AST? |  | ||||||
| //  - .include "<filename>"     Directive::Include { Root } ; should this include an entire AST in |  | ||||||
| //    the AST? |  | ||||||
| //  - .word 8000                Directive::Word(Number) |  | ||||||
| //  - .words dead beef          Directive::Words(Vec<u16>|Vec<Number>) |  | ||||||
| //  - .byte ff                  Directive::Byte(Number) |  | ||||||
| //  - .bytes de, ad, be, ef     Directive::Bytes(Vec<u8>) |  | ||||||
| //  - .string "string"          Directive::String(String) |  | ||||||
| //  - .ascii "string"           Directive::Ascii(Vec<u8>) |  | ||||||
|  |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub enum Directive { |  | ||||||
|     Org(Number), |  | ||||||
|     Define(Vec<OwnedToken>), |  | ||||||
|     Include(Root), |  | ||||||
|     Byte(Number), |  | ||||||
|     Bytes(Vec<Number>), |  | ||||||
|     Word(Number), |  | ||||||
|     Words(Vec<Number>), |  | ||||||
|     String(String), |  | ||||||
|     Strings(Vec<String>), |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Directive {} |  | ||||||
|  |  | ||||||
| impl Parsable for Directive { |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         let d = stream.expect(Type::Directive)?; |  | ||||||
|         // match on the directive |  | ||||||
|         Ok(match d.lexeme() { |  | ||||||
|             ".org" => Self::Org(Number::parse(p, stream)?), |  | ||||||
|             ".define" => { |  | ||||||
|                 let mut tokens = vec![]; |  | ||||||
|                 loop { |  | ||||||
|                     match stream.peek().variant() { |  | ||||||
|                         Type::Endl | Type::EndOfFile => break, |  | ||||||
|                         _ => tokens.push(stream.next().unwrap_or_default().into()), |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 Self::Define(tokens) |  | ||||||
|             } |  | ||||||
|             ".include" => { |  | ||||||
|                 // Try to get path |  | ||||||
|                 Self::Include(Parser::default().parse_file(&PathBuf::parse(p, stream)?)?) |  | ||||||
|             } |  | ||||||
|             ".byte" => Self::Byte(Number::parse(p, stream)?), |  | ||||||
|             ".bytes" => Self::Bytes(Vec::<Number>::parse(p, stream)?), |  | ||||||
|             ".word" => Self::Word(Number::parse(p, stream)?), |  | ||||||
|             ".words" => Self::Words(Vec::<Number>::parse(p, stream)?), |  | ||||||
|             ".string" => Self::String(String::parse(p, stream)?), |  | ||||||
|             ".strings" => Self::Strings(Vec::<String>::parse(p, stream)?), |  | ||||||
|             e => Err(ParseError::UnrecognizedDirective(e.into()))?, |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for Directive { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Directive::Org(num) => write!(f, ".org {num}"), |  | ||||||
|             Directive::Define(rep) => { |  | ||||||
|                 write!(f, ".define")?; |  | ||||||
|                 for t in rep { |  | ||||||
|                     write!(f, " {t}")?; |  | ||||||
|                 } |  | ||||||
|                 Ok(()) |  | ||||||
|             } |  | ||||||
|             Directive::Include(r) => Display::fmt(r, f), |  | ||||||
|             Directive::Byte(num) => write!(f, ".org {num}"), |  | ||||||
|             Directive::Bytes(v) => write!(f, ".bytes {v:?}"), |  | ||||||
|             Directive::Word(num) => write!(f, ".org {num}"), |  | ||||||
|             Directive::Words(v) => write!(f, ".bytes {v:?}"), |  | ||||||
|             Directive::String(s) => write!(f, ".string \"{s}\""), |  | ||||||
|             Directive::Strings(s) => write!(f, ".string \"{s:?}\""), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,74 +0,0 @@ | |||||||
| // © 2023 John Breauxs |  | ||||||
| use super::*; |  | ||||||
| use crate::lexer::error::LexError; |  | ||||||
|  |  | ||||||
| #[derive(Debug)] |  | ||||||
| pub enum ParseError { |  | ||||||
|     /// Produced by [lexer](crate::lexer) |  | ||||||
|     LexError(LexError), |  | ||||||
|     /// Produced by [std::io] |  | ||||||
|     IoError(std::io::Error), |  | ||||||
|     /// Produced by [Number](Number)[::parse()](Parsable::parse()) |  | ||||||
|     /// when the parsed number contains digits too high for the specified radix |  | ||||||
|     UnexpectedDigits(String, u32), |  | ||||||
|     /// Produced by [Opcode](Opcode)[::parse()](Parsable::parse()) |  | ||||||
|     /// when the opcode passed lexing but did not match recognized opcodes. |  | ||||||
|     /// |  | ||||||
|     /// This is always a lexer bug. |  | ||||||
|     UnrecognizedOpcode(String), |  | ||||||
|     /// Produced by [Directive](Directive)[::parse()](Parsable::parse()) |  | ||||||
|     /// when an unknown or unimplemented directive is used |  | ||||||
|     UnrecognizedDirective(String), |  | ||||||
|     /// Produced by [Register] when attempting to convert from a [str] |  | ||||||
|     /// that isn't a register (pc, sp, sr, cg, or r{number}) |  | ||||||
|     NotARegister(String), |  | ||||||
|     /// Produced by [Register] when the r{number} is outside the range 0-15 |  | ||||||
|     RegisterTooHigh(u16), |  | ||||||
|     /// Produced by [SecondaryOperand] when the joke "secondary immediate" form |  | ||||||
|     /// is out of range 0..=1 |  | ||||||
|     FatSecondaryImmediate(isize), |  | ||||||
|     /// Produced by a [Number] too wide to fit in 16 bits |  | ||||||
|     /// (outside the range `(-2^15) .. (2^16-1)` ) |  | ||||||
|     NumberTooWide(isize), |  | ||||||
|     /// Produced by [JumpTarget](parser::preamble::JumpTarget) |  | ||||||
|     /// when the jump offset is outside the range (-0x3ff..0x3fc) |  | ||||||
|     JumpedTooFar(isize), |  | ||||||
|     /// Produced by [JumpTarget](parser::preamble::JumpTarget) |  | ||||||
|     JumpedOdd(isize), |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<LexError> for ParseError { |  | ||||||
|     fn from(value: LexError) -> Self { Self::LexError(value) } |  | ||||||
| } |  | ||||||
| impl From<std::io::Error> for ParseError { |  | ||||||
|     fn from(value: std::io::Error) -> Self { Self::IoError(value) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for ParseError { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Self::LexError(error) => Display::fmt(error, f), |  | ||||||
|             Self::IoError(error) => Display::fmt(error, f), |  | ||||||
|             Self::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."), |  | ||||||
|             Self::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"), |  | ||||||
|             Self::UnrecognizedDirective(d) => write!(f, "{d} is not a directive."), |  | ||||||
|             Self::NotARegister(reg) => write!(f, "{reg} is not a register"), |  | ||||||
|             Self::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"), |  | ||||||
|             Self::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"), |  | ||||||
|             Self::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"), |  | ||||||
|             Self::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"), |  | ||||||
|             Self::JumpedOdd(num) => { |  | ||||||
|                 write!(f, "Jump targets only encode even numbers: {num} must not be odd.") |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| impl std::error::Error for ParseError { |  | ||||||
|     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |  | ||||||
|         match self { |  | ||||||
|             Self::LexError(e) => Some(e), |  | ||||||
|             Self::IoError(e) => Some(e), |  | ||||||
|             _ => None, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,26 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! An [Identifier] stores the hash of an identifier |  | ||||||
| use super::*; |  | ||||||
| use std::rc::Rc; |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Identifier { |  | ||||||
|     str: Rc<str>, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Identifier { |  | ||||||
|     fn str<T: AsRef<str>>(s: T) -> Self { Self { str: s.as_ref().to_owned().into() } } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for Identifier { |  | ||||||
|     fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         let token = stream.expect(Type::Identifier)?; |  | ||||||
|         match token.variant() { |  | ||||||
|             Type::Identifier => Ok(Self::str(token.lexeme())), |  | ||||||
|             _ => unreachable!("Expected identifier, got {token:?}"), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| impl Display for Identifier { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.str, f) } |  | ||||||
| } |  | ||||||
| @@ -1,52 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! An [`Instruction`] contains the [`Opcode`] and [`Encoding`] information for a single msp430 |  | ||||||
| //! instruction |  | ||||||
| //! |  | ||||||
| //! |  | ||||||
| //! Note: [`Opcode`] and [`Encoding`] are very tightly coupled, because they represent |  | ||||||
| //! interdependent parts of the same instruction. This is why [`Opcode`]::resolve() returns an |  | ||||||
| //! [`EncodingParser`] -- otherwise, there's an explosion of states that I can't really cope with on |  | ||||||
| //! my own. Really, there's about 9 valid classes of instruction, some of which are only used for |  | ||||||
| //! one or two of the MSP430's instructions. |  | ||||||
|  |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| pub mod encoding; |  | ||||||
| pub mod opcode; |  | ||||||
|  |  | ||||||
| /// Contains the [Opcode] and [Encoding] information for a single msp430 instruction |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Instruction(Opcode, Encoding); |  | ||||||
|  |  | ||||||
| impl Instruction { |  | ||||||
|     pub fn opcode(&self) -> &Opcode { &self.0 } |  | ||||||
|     pub fn encoding(&self) -> &Encoding { &self.1 } |  | ||||||
|     /// Gets the Instruction as a [u16] |  | ||||||
|     pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() } |  | ||||||
|     /// Gets the [extension words] |  | ||||||
|     pub fn ext_words(&self) -> [Option<u16>; 2] { self.1.extwords() } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for Instruction { |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where |  | ||||||
|         Self: Sized, |  | ||||||
|         T: crate::TokenStream<'text>, |  | ||||||
|     { |  | ||||||
|         // parse an opcode |  | ||||||
|         let opcode: Opcode = Opcode::parse(p, stream)?; |  | ||||||
|         // resolve the opcode to a final opcode and an encoding |  | ||||||
|         let (opcode, encoding) = opcode.resolve(); |  | ||||||
|         // parse the encoding |  | ||||||
|         let encoding = encoding.parse(p, stream)?; |  | ||||||
|         Ok(Self(opcode, encoding)) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<Instruction> for u16 { |  | ||||||
|     fn from(value: Instruction) -> Self { value.word() } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for Instruction { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) } |  | ||||||
| } |  | ||||||
| @@ -1,81 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! An [`Encoding`] represents the set of arguments for a given [msp430 opcode](Opcode) |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| pub mod number; |  | ||||||
| pub mod register; |  | ||||||
| pub mod width; |  | ||||||
|  |  | ||||||
| pub mod jump_target; |  | ||||||
| pub mod primary_operand; |  | ||||||
| pub mod secondary_operand; |  | ||||||
|  |  | ||||||
| mod builder; |  | ||||||
| pub mod encoding_parser; |  | ||||||
|  |  | ||||||
| use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder}; |  | ||||||
| use encoding_parser::EncodingParser; |  | ||||||
|  |  | ||||||
| /// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html) |  | ||||||
| /// |  | ||||||
| /// # Examples |  | ||||||
| /// ```rust |  | ||||||
| /// use msp430_asm::{preamble::*, parser::preamble::*}; |  | ||||||
| /// // Create a token sequence |  | ||||||
| /// let asm_file = r".b 8000(r15)"; |  | ||||||
| /// // Create a single-operand encoding parser |  | ||||||
| /// let single: EncodingParser = Encoding::single().end(); |  | ||||||
| /// // Parse an Encoding from it |  | ||||||
| /// let encoding: Encoding = single |  | ||||||
| ///     .parse(&Default::default(), &mut Tokenizer::new(asm_file).ignore_spaces()) |  | ||||||
| ///     .unwrap(); |  | ||||||
| /// // Print the Encoding |  | ||||||
| /// println!("{encoding}"); |  | ||||||
| /// ``` |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub enum Encoding { |  | ||||||
|     Single { width: Width, dst: PrimaryOperand }, |  | ||||||
|     Jump { target: JumpTarget }, |  | ||||||
|     Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand }, |  | ||||||
| } |  | ||||||
| impl Encoding { |  | ||||||
|     /// Returns a builder for [Encoding::Single] |  | ||||||
|     pub fn single() -> SingleBuilder { Default::default() } |  | ||||||
|     /// Returns a builder for [Encoding::Jump] |  | ||||||
|     pub fn jump() -> JumpBuilder { Default::default() } |  | ||||||
|     /// Returns a builder for [Encoding::Double] |  | ||||||
|     pub fn double() -> DoubleBuilder { Default::default() } |  | ||||||
|     /// Returns a builder for [Encoding::Double] |  | ||||||
|     /// |  | ||||||
|     /// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and |  | ||||||
|     /// dst are the same |  | ||||||
|     pub fn reflexive() -> ReflexiveBuilder { Default::default() } |  | ||||||
|     /// |  | ||||||
|     pub fn word(&self) -> u16 { |  | ||||||
|         match self { |  | ||||||
|             Encoding::Single { width, dst } => u16::from(*width) | dst.mode() | dst.register() as u16, |  | ||||||
|             Encoding::Jump { target } => target.word().unwrap_or_default(), |  | ||||||
|             Encoding::Double { width, src, dst } => { |  | ||||||
|                 u16::from(*width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     /// Returns extwords for instruction |  | ||||||
|     pub fn extwords(&self) -> [Option<u16>; 2] { |  | ||||||
|         match self { |  | ||||||
|             Encoding::Double { src, dst, .. } => [src.ext_word(), dst.ext_word()], |  | ||||||
|             Encoding::Single { dst, .. } => [dst.ext_word(), None], |  | ||||||
|             Encoding::Jump { .. } => [None, None], |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for Encoding { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Encoding::Single { width, dst } => write!(f, "{width} {dst}"), |  | ||||||
|             Encoding::Jump { target } => write!(f, " {target}"), |  | ||||||
|             Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,76 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! Builder API for [`EncodingParser`] |  | ||||||
| use super::*; |  | ||||||
| #[derive(Debug, Default)] |  | ||||||
| pub struct SingleBuilder { |  | ||||||
|     width: Option<Width>, |  | ||||||
|     dst: Option<PrimaryOperand>, |  | ||||||
| } |  | ||||||
| impl SingleBuilder { |  | ||||||
|     pub fn width(mut self, width: bool) -> Self { |  | ||||||
|         self.width = Some(width.into()); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     /// Sets the [PrimaryOperand] field |  | ||||||
|     pub fn operand(mut self, dst: PrimaryOperand) -> Self { |  | ||||||
|         self.dst = Some(dst); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     /// Build |  | ||||||
|     pub fn end(self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, Default)] |  | ||||||
| pub struct JumpBuilder { |  | ||||||
|     target: Option<JumpTarget>, |  | ||||||
| } |  | ||||||
| impl JumpBuilder { |  | ||||||
|     pub fn target(mut self, target: JumpTarget) -> Self { |  | ||||||
|         self.target = Some(target); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     pub fn end(self) -> EncodingParser { EncodingParser::Jump { target: self.target } } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, Default)] |  | ||||||
| pub struct DoubleBuilder { |  | ||||||
|     width: Option<Width>, |  | ||||||
|     src: Option<PrimaryOperand>, |  | ||||||
|     dst: Option<SecondaryOperand>, |  | ||||||
| } |  | ||||||
| impl DoubleBuilder { |  | ||||||
|     /// Sets the [Width] field |  | ||||||
|     pub fn width(mut self, width: bool) -> Self { |  | ||||||
|         self.width = Some(width.into()); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     /// Sets the [PrimaryOperand] field |  | ||||||
|     pub fn src(mut self, src: PrimaryOperand) -> Self { |  | ||||||
|         self.src = Some(src); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     /// Sets the [PrimaryOperand] field |  | ||||||
|     pub fn dst(mut self, dst: SecondaryOperand) -> Self { |  | ||||||
|         self.dst = Some(dst); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     pub fn end(self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, Default)] |  | ||||||
| pub struct ReflexiveBuilder { |  | ||||||
|     width: Option<Width>, |  | ||||||
|     reg: Option<SecondaryOperand>, |  | ||||||
| } |  | ||||||
| impl ReflexiveBuilder { |  | ||||||
|     /// Sets the [Width] field |  | ||||||
|     pub fn width(mut self, width: bool) -> Self { |  | ||||||
|         self.width = Some(width.into()); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     pub fn reg(mut self, reg: SecondaryOperand) -> Self { |  | ||||||
|         self.reg = Some(reg); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     pub fn end(self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } } |  | ||||||
| } |  | ||||||
| @@ -1,37 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`] |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| #[derive(Clone, Debug)] |  | ||||||
| /// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream] |  | ||||||
| pub enum EncodingParser { |  | ||||||
|     Single { width: Option<Width>, dst: Option<PrimaryOperand> }, |  | ||||||
|     Jump { target: Option<JumpTarget> }, |  | ||||||
|     Double { width: Option<Width>, src: Option<PrimaryOperand>, dst: Option<SecondaryOperand> }, |  | ||||||
|     Reflexive { width: Option<Width>, reg: Option<SecondaryOperand> }, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl EncodingParser { |  | ||||||
|     /// Constructs an [Encoding] from this [EncodingParser], filling holes |  | ||||||
|     /// with the tokenstream |  | ||||||
|     pub fn parse<'text, T>(self, p: &Parser, stream: &mut T) -> Result<Encoding, ParseError> |  | ||||||
|     where T: crate::TokenStream<'text> { |  | ||||||
|         Ok(match self { |  | ||||||
|             Self::Single { width, dst } => Encoding::Single { |  | ||||||
|                 width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)), |  | ||||||
|                 dst: if let Some(dst) = dst { dst } else { PrimaryOperand::parse(p, stream)? }, |  | ||||||
|             }, |  | ||||||
|             Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) }, |  | ||||||
|             Self::Double { width, src, dst } => Encoding::Double { |  | ||||||
|                 width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)), |  | ||||||
|                 src: if let Some(src) = src { src } else { PrimaryOperand::parse(p, stream)? }, |  | ||||||
|                 dst: if let Some(dst) = dst { dst } else { SecondaryOperand::parse(p, stream)? }, |  | ||||||
|             }, |  | ||||||
|             Self::Reflexive { width, reg } => { |  | ||||||
|                 let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default()); |  | ||||||
|                 let reg = if let Some(reg) = reg { reg } else { SecondaryOperand::parse(p, stream)? }; |  | ||||||
|                 Encoding::Double { width, src: reg.clone().into(), dst: reg } |  | ||||||
|             } |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,58 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`JumpTarget`] contains the [pc-relative offset](Number) or [label](Identifier) |  | ||||||
| //! for a [Jump](Encoding::Jump) [instruction] |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| /// Contains the [pc-relative offset](Number) or [label](Identifier) |  | ||||||
| /// for a [Jump](Encoding::Jump) [Instruction] |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub enum JumpTarget { |  | ||||||
|     Number(Number), |  | ||||||
|     Identifier(Identifier), |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl JumpTarget { |  | ||||||
|     pub fn word(&self) -> Option<u16> { |  | ||||||
|         match self { |  | ||||||
|             JumpTarget::Number(n) => Some(u16::from(*n) & 0x3ff), |  | ||||||
|             JumpTarget::Identifier(_) => None, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     pub fn squish(value: isize) -> Result<u16, ParseError> { |  | ||||||
|         match value { |  | ||||||
|             i if i % 2 != 0 => Err(ParseError::JumpedOdd(i))?, |  | ||||||
|             i if (-1024..=1022).contains(&(i - 2)) => Ok(((value >> 1) - 1) as u16 & 0x3ff), |  | ||||||
|             i => Err(ParseError::JumpedTooFar(i))?, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     pub fn unsquish(value: u16) -> isize { (value as isize + 1) << 1 } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for JumpTarget { |  | ||||||
|     // - Identifier |  | ||||||
|     // - Number |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: crate::TokenStream<'text> { |  | ||||||
|         // Try to parse a number |  | ||||||
|         if let Some(num) = Number::try_parse(p, stream)? { |  | ||||||
|             Self::try_from(num) |  | ||||||
|         } else { |  | ||||||
|             // if that fails, try to parse an identifier instead |  | ||||||
|             Ok(Self::Identifier(Identifier::parse(p, stream)?)) |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl TryFrom<Number> for JumpTarget { |  | ||||||
|     type Error = ParseError; |  | ||||||
|     fn try_from(value: Number) -> Result<Self, Self::Error> { Ok(Self::Number(Self::squish(value.into())?.into())) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for JumpTarget { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Self::Number(num) => write!(f, "{:x}", Self::unsquish(u16::from(*num))), |  | ||||||
|             Self::Identifier(id) => write!(f, "{id}"), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,81 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`Number`] represents a 16-bit signed or unsigned word |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Number(isize, u32); // (value, radix) |  | ||||||
|  |  | ||||||
| impl Parsable for Number { |  | ||||||
|     // A number is: |  | ||||||
|     // [Minus|Plus]? RadixMarker[Hex|Dec|Oct|Bin]? Number |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         use Type as Ty; |  | ||||||
|         // The number is negative when it begins with a Minus, but Plus is also acceptable. |  | ||||||
|         let negative = stream.expect_any_of([Ty::Minus, Ty::Plus]).map_or(false, |t| t.is_variant(Ty::Minus)); |  | ||||||
|         let radix = match stream |  | ||||||
|             .expect_any_of([Ty::RadixMarkerHex, Ty::RadixMarkerDec, Ty::RadixMarkerOct, Ty::RadixMarkerBin]) |  | ||||||
|             .ok() |  | ||||||
|             .map(|t| t.variant()) |  | ||||||
|         { |  | ||||||
|             Some(Ty::RadixMarkerHex) => 16, |  | ||||||
|             Some(Ty::RadixMarkerDec) => 10, |  | ||||||
|             Some(Ty::RadixMarkerOct) => 8, |  | ||||||
|             Some(Ty::RadixMarkerBin) => 2, |  | ||||||
|             _ => p.radix, |  | ||||||
|         }; |  | ||||||
|         let number = stream.expect(Ty::Number)?; |  | ||||||
|         // TODO: Reintroduce error context |  | ||||||
|         let number = isize::from_str_radix(number.lexeme(), radix) |  | ||||||
|             .map_err(|_| ParseError::UnexpectedDigits(number.lexeme().into(), radix))? |  | ||||||
|             * if negative { -1 } else { 1 }; |  | ||||||
|         // Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit) |  | ||||||
|         Ok(Self( |  | ||||||
|             if (-0x8000..0x10000).contains(&number) { number } else { Err(ParseError::NumberTooWide(number))? }, |  | ||||||
|             radix, |  | ||||||
|         )) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<isize> for Number { |  | ||||||
|     fn from(value: isize) -> Self { Self(value, 16) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<Number> for isize { |  | ||||||
|     fn from(value: Number) -> Self { value.0 as Self } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<u16> for Number { |  | ||||||
|     fn from(value: u16) -> Self { Self(value as isize, 16) } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<Number> for u16 { |  | ||||||
|     fn from(value: Number) -> Self { value.0 as Self } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::ops::Sub<isize> for Number { |  | ||||||
|     type Output = Self; |  | ||||||
|     fn sub(mut self, rhs: isize) -> Self::Output { |  | ||||||
|         self.0 -= rhs; |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::ops::Shr<usize> for Number { |  | ||||||
|     type Output = Self; |  | ||||||
|     fn shr(mut self, rhs: usize) -> Self::Output { |  | ||||||
|         self.0 >>= rhs; |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::fmt::Display for Number { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self.1 { |  | ||||||
|             2 => std::fmt::Binary::fmt(&self.0, f), |  | ||||||
|             8 => std::fmt::Octal::fmt(&self.0, f), |  | ||||||
|             16 => std::fmt::LowerHex::fmt(&self.0, f), |  | ||||||
|             _ => std::fmt::Display::fmt(&self.0, f), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,146 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`PrimaryOperand`] contains the first [`Register`], addressing mode, and Extension |  | ||||||
| //! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [`Instruction`] |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| /// Contains the first [Register], addressing mode, and Extension Word for a |  | ||||||
| /// [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction] |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub enum PrimaryOperand { |  | ||||||
|     Direct(Register), |  | ||||||
|     Indirect(Register), |  | ||||||
|     PostInc(Register), |  | ||||||
|     Indexed(Register, Number), |  | ||||||
|     Relative(Identifier), |  | ||||||
|     Absolute(Number), |  | ||||||
|     Immediate(Number), |  | ||||||
|     Four, |  | ||||||
|     Eight, |  | ||||||
|     Zero, |  | ||||||
|     One, |  | ||||||
|     Two, |  | ||||||
|     MinusOne, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl PrimaryOperand { |  | ||||||
|     /// Returns the mode bits |  | ||||||
|     pub fn mode(&self) -> u16 { |  | ||||||
|         use PrimaryOperand::*; |  | ||||||
|         match self { |  | ||||||
|             Direct(_) | Zero => 0, |  | ||||||
|             Indexed(_, _) | Relative(_) | Absolute(_) | One => 1 << 4, |  | ||||||
|             Indirect(_) | Two | Four => 2 << 4, |  | ||||||
|             PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     /// Gets the register |  | ||||||
|     pub fn register(&self) -> Register { |  | ||||||
|         use PrimaryOperand::*; |  | ||||||
|         match self { |  | ||||||
|             Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r, |  | ||||||
|             Immediate(_) | Relative(_) => Register::pc, |  | ||||||
|             Absolute(_) | Four | Eight => Register::sr, |  | ||||||
|             Zero | One | Two | MinusOne => Register::cg, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     /// Gets the extension word, if present |  | ||||||
|     pub fn ext_word(&self) -> Option<u16> { |  | ||||||
|         use PrimaryOperand::*; |  | ||||||
|         match self { |  | ||||||
|             Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()), |  | ||||||
|             _ => None, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for PrimaryOperand { |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: crate::TokenStream<'text> { |  | ||||||
|         // Try parsing as Register (Direct) |  | ||||||
|         if let Some(r) = Register::try_parse(p, stream)? { |  | ||||||
|             return Ok(Self::Direct(r)); |  | ||||||
|         } |  | ||||||
|         // Try parsing as Number (Indexed) |  | ||||||
|         if let Some(idx) = Number::try_parse(p, stream)? { |  | ||||||
|             stream.expect(Type::LParen)?; |  | ||||||
|             let reg = Register::parse(p, stream)?; |  | ||||||
|             stream.expect(Type::RParen)?; |  | ||||||
|             return Ok(Self::Indexed(reg, idx)); |  | ||||||
|         } |  | ||||||
|         // Try parsing as Identifier (Relative, label mode) |  | ||||||
|         if let Some(id) = Identifier::try_parse(p, stream)? { |  | ||||||
|             return Ok(Self::Relative(id)); |  | ||||||
|         } |  | ||||||
|         // Or directly match any of the valid prefix markers |  | ||||||
|         // Register, Number, and Identifier are included here to make error messages clearer. |  | ||||||
|         // their inclusion will cause a negligible slowdown when the next token is not a prefix marker |  | ||||||
|         // (a failure condition) |  | ||||||
|         let token = stream.expect_any_of([ |  | ||||||
|             Type::Indirect, |  | ||||||
|             Type::Absolute, |  | ||||||
|             Type::Immediate, |  | ||||||
|             Type::Register, |  | ||||||
|             Type::Number, |  | ||||||
|             Type::Identifier, |  | ||||||
|         ])?; |  | ||||||
|         Ok(match token.variant() { |  | ||||||
|             Type::Indirect => { |  | ||||||
|                 let reg = Register::parse(p, stream)?; |  | ||||||
|                 match stream.expect(Type::Plus) { |  | ||||||
|                     Ok(_) => Self::PostInc(reg), |  | ||||||
|                     Err(_) => Self::Indirect(reg), |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             Type::Absolute => Self::Absolute(Number::parse(p, stream)?), |  | ||||||
|             Type::Immediate => { |  | ||||||
|                 let number = Number::parse(p, stream)?; |  | ||||||
|                 match number.into() { |  | ||||||
|                     // There are two representations for the all-ones constant, since Number preserves |  | ||||||
|                     // signedness. |  | ||||||
|                     -1_isize | 0xffff => Self::MinusOne, |  | ||||||
|                     0 => Self::Zero, |  | ||||||
|                     1 => Self::One, |  | ||||||
|                     2 => Self::Two, |  | ||||||
|                     4 => Self::Four, |  | ||||||
|                     8 => Self::Eight, |  | ||||||
|                     _ => Self::Immediate(number), |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             _ => unreachable!("Token {token:?} passed expectation but failed match!"), |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<SecondaryOperand> for PrimaryOperand { |  | ||||||
|     fn from(value: SecondaryOperand) -> Self { |  | ||||||
|         match value { |  | ||||||
|             SecondaryOperand::Direct(r) => Self::Direct(r), |  | ||||||
|             SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n), |  | ||||||
|             SecondaryOperand::Absolute(n) => Self::Absolute(n), |  | ||||||
|             SecondaryOperand::Relative(id) => Self::Relative(id), |  | ||||||
|             SecondaryOperand::Zero => Self::Zero, |  | ||||||
|             SecondaryOperand::One => Self::One, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for PrimaryOperand { |  | ||||||
|     // Turn the operand back into a form which parses into the same type |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Self::Direct(r) => Display::fmt(r, f), |  | ||||||
|             Self::Indirect(r) => write!(f, "@{r}"), |  | ||||||
|             Self::PostInc(r) => write!(f, "@{r}+"), |  | ||||||
|             Self::Indexed(r, idx) => write!(f, "{idx}({r})"), |  | ||||||
|             Self::Relative(id) => Display::fmt(id, f), |  | ||||||
|             Self::Absolute(n) => write!(f, "&{n}"), |  | ||||||
|             Self::Immediate(n) => write!(f, "#{n}"), |  | ||||||
|             Self::Four => Display::fmt("#4", f), |  | ||||||
|             Self::Eight => Display::fmt("#8", f), |  | ||||||
|             Self::Zero => Display::fmt("#0", f), |  | ||||||
|             Self::One => Display::fmt("#1", f), |  | ||||||
|             Self::Two => Display::fmt("#2", f), |  | ||||||
|             Self::MinusOne => Display::fmt("#-1", f), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,112 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`Register`] represents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html) |  | ||||||
| use super::*; |  | ||||||
| use std::str::FromStr; |  | ||||||
|  |  | ||||||
| /// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html) |  | ||||||
| #[allow(non_camel_case_types)] |  | ||||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub enum Register { |  | ||||||
|     /// Program Counter |  | ||||||
|     pc, |  | ||||||
|     /// Stack Pointer |  | ||||||
|     sp, |  | ||||||
|     /// Status Register |  | ||||||
|     sr, |  | ||||||
|     /// Constant Generator |  | ||||||
|     cg, |  | ||||||
|     r4, |  | ||||||
|     r5, |  | ||||||
|     r6, |  | ||||||
|     r7, |  | ||||||
|     r8, |  | ||||||
|     r9, |  | ||||||
|     r10, |  | ||||||
|     r11, |  | ||||||
|     r12, |  | ||||||
|     r13, |  | ||||||
|     r14, |  | ||||||
|     r15, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for Register { |  | ||||||
|     fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: crate::TokenStream<'text> { |  | ||||||
|         stream.expect(Type::Register)?.lexeme().parse() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<Register> for u16 { |  | ||||||
|     fn from(value: Register) -> Self { value as u16 } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl TryFrom<u16> for Register { |  | ||||||
|     type Error = ParseError; |  | ||||||
|     fn try_from(value: u16) -> Result<Self, Self::Error> { |  | ||||||
|         use Register::*; |  | ||||||
|         Ok(match value { |  | ||||||
|             0 => pc, |  | ||||||
|             1 => sp, |  | ||||||
|             2 => sr, |  | ||||||
|             3 => cg, |  | ||||||
|             4 => r4, |  | ||||||
|             5 => r5, |  | ||||||
|             6 => r6, |  | ||||||
|             7 => r7, |  | ||||||
|             8 => r8, |  | ||||||
|             9 => r9, |  | ||||||
|             10 => r10, |  | ||||||
|             11 => r11, |  | ||||||
|             12 => r12, |  | ||||||
|             13 => r13, |  | ||||||
|             14 => r14, |  | ||||||
|             15 => r15, |  | ||||||
|             _ => return Err(ParseError::RegisterTooHigh(value)), |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl FromStr for Register { |  | ||||||
|     type Err = ParseError; |  | ||||||
|  |  | ||||||
|     fn from_str(s: &str) -> Result<Self, Self::Err> { |  | ||||||
|         use Register::*; |  | ||||||
|         match s { |  | ||||||
|             "pc" => Ok(pc), |  | ||||||
|             "sp" => Ok(sp), |  | ||||||
|             "sr" => Ok(sr), |  | ||||||
|             "cg" => Ok(cg), |  | ||||||
|             _ => { |  | ||||||
|                 str::parse::<u16>(&s[1..]).map_err(|_| -> Self::Err { ParseError::NotARegister(s.into()) })?.try_into() |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl From<Register> for &str { |  | ||||||
|     fn from(value: Register) -> Self { |  | ||||||
|         use Register::*; |  | ||||||
|         match value { |  | ||||||
|             pc => "pc", |  | ||||||
|             sp => "sp", |  | ||||||
|             sr => "sr", |  | ||||||
|             cg => "cg", |  | ||||||
|             r4 => "r4", |  | ||||||
|             r5 => "r5", |  | ||||||
|             r6 => "r6", |  | ||||||
|             r7 => "r7", |  | ||||||
|             r8 => "r8", |  | ||||||
|             r9 => "r9", |  | ||||||
|             r10 => "r10", |  | ||||||
|             r11 => "r11", |  | ||||||
|             r12 => "r12", |  | ||||||
|             r13 => "r13", |  | ||||||
|             r14 => "r14", |  | ||||||
|             r15 => "r15", |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::fmt::Display for Register { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) } |  | ||||||
| } |  | ||||||
| @@ -1,105 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`SecondaryOperand`] contains the second [`Register`], addressing mode, and Extension |  | ||||||
| //! Word for a [two-operand](Encoding::Double) [instruction] |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| /// The destination of a [Double](Encoding::Double) |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub enum SecondaryOperand { |  | ||||||
|     Direct(Register), |  | ||||||
|     Indexed(Register, Number), |  | ||||||
|     Relative(Identifier), |  | ||||||
|     Absolute(Number), |  | ||||||
|     // Joke encodings? |  | ||||||
|     Zero, |  | ||||||
|     One, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| use SecondaryOperand as So; |  | ||||||
|  |  | ||||||
| impl SecondaryOperand { |  | ||||||
|     pub fn mode(&self) -> u16 { |  | ||||||
|         match self { |  | ||||||
|             So::Direct(_) | So::Zero => 0, |  | ||||||
|             So::Indexed(_, _) | So::Relative(_) | So::Absolute(_) | So::One => 1 << 7, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     pub fn register(&self) -> Register { |  | ||||||
|         use SecondaryOperand::*; |  | ||||||
|         match self { |  | ||||||
|             Direct(r) | Indexed(r, _) => *r, |  | ||||||
|             Relative(_) => Register::pc, |  | ||||||
|             Absolute(_) => Register::sr, |  | ||||||
|             Zero | One => Register::cg, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     /// This is the only way to have an extension word |  | ||||||
|     pub fn ext_word(&self) -> Option<u16> { |  | ||||||
|         use SecondaryOperand::*; |  | ||||||
|         match self { |  | ||||||
|             Indexed(_, w) | Absolute(w) => Some((*w).into()), |  | ||||||
|             _ => None, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for SecondaryOperand { |  | ||||||
|     // Separator |  | ||||||
|     // - Register  => Direct |  | ||||||
|     // - Number    => Indexed |  | ||||||
|     //     - OpenIdx |  | ||||||
|     //         - Register |  | ||||||
|     //             - CloseIdx |  | ||||||
|     // - Absolute |  | ||||||
|     //   - Number |  | ||||||
|     // - Immediate |  | ||||||
|     //   - Number == 0, 1 |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: crate::TokenStream<'text> { |  | ||||||
|         use SecondaryOperand::*; |  | ||||||
|         stream.allow(Type::Separator); |  | ||||||
|         // Try parsing as Register (Direct) |  | ||||||
|         if let Some(r) = Register::try_parse(p, stream)? { |  | ||||||
|             return Ok(Self::Direct(r)); |  | ||||||
|         } |  | ||||||
|         // Try parsing as Number (Indexed) |  | ||||||
|         if let Some(idx) = Number::try_parse(p, stream)? { |  | ||||||
|             stream.expect(Type::LParen)?; |  | ||||||
|             let reg = Register::parse(p, stream)?; |  | ||||||
|             stream.expect(Type::RParen)?; |  | ||||||
|             return Ok(Self::Indexed(reg, idx)); |  | ||||||
|         } |  | ||||||
|         // Try parsing as Identifier (Relative, label mode) |  | ||||||
|         if let Some(id) = Identifier::try_parse(p, stream)? { |  | ||||||
|             return Ok(Self::Relative(id)); |  | ||||||
|         } |  | ||||||
|         // Register, Number, and Identifier are included here to make error messages clearer. |  | ||||||
|         // their inclusion will cause a negligible slowdown when the next token is not a prefix marker |  | ||||||
|         // (a failure condition) but should not match a token |  | ||||||
|         let token = |  | ||||||
|             stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number, Type::Identifier])?; |  | ||||||
|         Ok(match token.variant() { |  | ||||||
|             Type::Absolute => Absolute(Number::parse(p, stream)?), |  | ||||||
|             // TODO: Reintroduce error context |  | ||||||
|             Type::Immediate => match Number::parse(p, stream)?.into() { |  | ||||||
|                 0 => Zero, |  | ||||||
|                 1 => One, |  | ||||||
|                 n => Err(ParseError::FatSecondaryImmediate(n))?, |  | ||||||
|             }, |  | ||||||
|             _ => unreachable!("Token {token:?} passed expectation but failed match!"), |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for SecondaryOperand { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Self::Direct(r) => Display::fmt(r, f), |  | ||||||
|             Self::Indexed(r, idx) => write!(f, "{idx}({r})"), |  | ||||||
|             Self::Relative(id) => Display::fmt(id, f), |  | ||||||
|             Self::Absolute(n) => write!(f, "&{n}"), |  | ||||||
|             Self::Zero => Display::fmt("#0", f), |  | ||||||
|             Self::One => Display::fmt("#1", f), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,32 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`Width`] represents whether an instruction operates on whole words or bytes |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| /// Represents an instruction's operand width. |  | ||||||
| /// |  | ||||||
| /// Evaluates to false when instruction takes word-sized operands, or true when |  | ||||||
| /// instruction takes byte-sized operands |  | ||||||
| #[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Width(bool); |  | ||||||
|  |  | ||||||
| impl Parsable for Width { |  | ||||||
|     fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else { |  | ||||||
|             return Ok(Self(false)); |  | ||||||
|         }; |  | ||||||
|         Ok(Self(token.is_variant(Type::ByteWidth))) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| impl From<Width> for u16 { |  | ||||||
|     fn from(value: Width) -> Self { (value.0 as Self) << 6 } |  | ||||||
| } |  | ||||||
| impl From<Width> for bool { |  | ||||||
|     fn from(value: Width) -> Self { value.0 } |  | ||||||
| } |  | ||||||
| impl From<bool> for Width { |  | ||||||
|     fn from(value: bool) -> Self { Width(value) } |  | ||||||
| } |  | ||||||
| impl std::fmt::Display for Width { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) } |  | ||||||
| } |  | ||||||
| @@ -1,261 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! An [`Opcode`] encodes an msp430 operation |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| use std::str::FromStr; |  | ||||||
|  |  | ||||||
| /// Opcode from the [MSPGCC Manual][1] |  | ||||||
| /// |  | ||||||
| /// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will |  | ||||||
| /// extract from a [TokenStream] only the required arguments for that call. |  | ||||||
| /// |  | ||||||
| /// [1]: https://mspgcc.sourceforge.net/manual/x223.html |  | ||||||
| #[allow(clippy::identity_op)] |  | ||||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub enum Opcode { |  | ||||||
|     // "Emulated" opcodes |  | ||||||
|     Nop, |  | ||||||
|     Pop, |  | ||||||
|     Br, |  | ||||||
|     Ret, |  | ||||||
|     Clrc, |  | ||||||
|     Setc, |  | ||||||
|     Clrz, |  | ||||||
|     Setz, |  | ||||||
|     Clrn, |  | ||||||
|     Setn, |  | ||||||
|     Dint, |  | ||||||
|     Eint, |  | ||||||
|     Rla, |  | ||||||
|     Rlc, |  | ||||||
|     Inv, |  | ||||||
|     Clr, |  | ||||||
|     Tst, |  | ||||||
|     Dec, |  | ||||||
|     Decd, |  | ||||||
|     Inc, |  | ||||||
|     Incd, |  | ||||||
|     Adc, |  | ||||||
|     Dadc, |  | ||||||
|     Sbc, |  | ||||||
|     // Single |  | ||||||
|     Rrc  = 0x1000 | 0 << 7, |  | ||||||
|     Swpb = 0x1000 | 1 << 7, |  | ||||||
|     Rra  = 0x1000 | 2 << 7, |  | ||||||
|     Sxt  = 0x1000 | 3 << 7, |  | ||||||
|     Push = 0x1000 | 4 << 7, |  | ||||||
|     Call = 0x1000 | 5 << 7, |  | ||||||
|     Reti = 0x1000 | 6 << 7, |  | ||||||
|     // Jump |  | ||||||
|     Jnz  = 0x2000 | 0 << 10, |  | ||||||
|     Jz   = 0x2000 | 1 << 10, |  | ||||||
|     Jnc  = 0x2000 | 2 << 10, |  | ||||||
|     Jc   = 0x2000 | 3 << 10, |  | ||||||
|     Jn   = 0x2000 | 4 << 10, |  | ||||||
|     Jge  = 0x2000 | 5 << 10, |  | ||||||
|     Jl   = 0x2000 | 6 << 10, |  | ||||||
|     Jmp  = 0x2000 | 7 << 10, |  | ||||||
|     // Double |  | ||||||
|     Mov  = 0x4000, |  | ||||||
|     Add  = 0x5000, |  | ||||||
|     Addc = 0x6000, |  | ||||||
|     Subc = 0x7000, |  | ||||||
|     Sub  = 0x8000, |  | ||||||
|     Cmp  = 0x9000, |  | ||||||
|     Dadd = 0xa000, |  | ||||||
|     Bit  = 0xb000, |  | ||||||
|     Bic  = 0xc000, |  | ||||||
|     Bis  = 0xd000, |  | ||||||
|     Xor  = 0xe000, |  | ||||||
|     And  = 0xf000, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Opcode { |  | ||||||
|     /// Resolve an Opcode into an [Opcode] and an [EncodingParser] |  | ||||||
|     pub fn resolve(self) -> (Opcode, EncodingParser) { |  | ||||||
|         use super::Encoding as Enc; |  | ||||||
|         use Register as Reg; |  | ||||||
|         use {PrimaryOperand as Src, SecondaryOperand as Dst}; |  | ||||||
|         match self { |  | ||||||
|             Self::Rrc | Self::Rra | Self::Push => (self, Enc::single().end()), |  | ||||||
|             // these instructions do not take a width specifier (though they may still behave properly) |  | ||||||
|             Self::Swpb | Self::Sxt | Self::Call => (self, Enc::single().width(false).end()), |  | ||||||
|             // `reti` does not take any operands. |  | ||||||
|             Self::Reti => (self, Enc::single().operand(Src::Direct(Reg::pc)).end()), |  | ||||||
|             Self::Jnz | Self::Jz | Self::Jnc | Self::Jc | Self::Jn | Self::Jge | Self::Jl | Self::Jmp => { |  | ||||||
|                 (self, Enc::jump().end()) |  | ||||||
|             } |  | ||||||
|             Self::Mov |  | ||||||
|             | Self::Add |  | ||||||
|             | Self::Addc |  | ||||||
|             | Self::Subc |  | ||||||
|             | Self::Sub |  | ||||||
|             | Self::Cmp |  | ||||||
|             | Self::Dadd |  | ||||||
|             | Self::Bit |  | ||||||
|             | Self::Bic |  | ||||||
|             | Self::Bis |  | ||||||
|             | Self::Xor |  | ||||||
|             | Self::And => (self, Enc::double().end()), |  | ||||||
|             Self::Nop => (Self::Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()), |  | ||||||
|             Self::Pop => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).end()), |  | ||||||
|             Self::Br => (Self::Mov, Enc::double().dst(Dst::Direct(Reg::pc)).end()), |  | ||||||
|             Self::Ret => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).dst(Dst::Direct(Reg::pc)).end()), |  | ||||||
|             Self::Clrc => (Self::Bic, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()), |  | ||||||
|             Self::Setc => (Self::Bis, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()), |  | ||||||
|             Self::Clrz => (Self::Bic, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()), |  | ||||||
|             Self::Setz => (Self::Bis, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()), |  | ||||||
|             Self::Clrn => (Self::Bic, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()), |  | ||||||
|             Self::Setn => (Self::Bis, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()), |  | ||||||
|             Self::Dint => (Self::Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()), |  | ||||||
|             Self::Eint => (Self::Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()), |  | ||||||
|             Self::Rla => (Self::Add, Enc::reflexive().end()), |  | ||||||
|             Self::Rlc => (Self::Addc, Enc::reflexive().end()), |  | ||||||
|             Self::Inv => (Self::Xor, Enc::double().src(Src::MinusOne).end()), |  | ||||||
|             Self::Clr => (Self::Mov, Enc::double().src(Src::Zero).end()), |  | ||||||
|             Self::Tst => (Self::Cmp, Enc::double().src(Src::Zero).end()), |  | ||||||
|             Self::Dec => (Self::Sub, Enc::double().src(Src::One).end()), |  | ||||||
|             Self::Decd => (Self::Sub, Enc::double().src(Src::Two).end()), |  | ||||||
|             Self::Inc => (Self::Add, Enc::double().src(Src::One).end()), |  | ||||||
|             Self::Incd => (Self::Add, Enc::double().src(Src::Two).end()), |  | ||||||
|             Self::Adc => (Self::Addc, Enc::double().src(Src::Zero).end()), |  | ||||||
|             Self::Dadc => (Self::Dadd, Enc::double().src(Src::Zero).end()), |  | ||||||
|             Self::Sbc => (Self::Subc, Enc::double().src(Src::Zero).end()), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for Opcode { |  | ||||||
|     fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         // TODO: Reintroduce error context |  | ||||||
|         stream.expect(Type::Insn)?.parse() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl FromStr for Opcode { |  | ||||||
|     type Err = ParseError; |  | ||||||
|     fn from_str(s: &str) -> Result<Self, Self::Err> { |  | ||||||
|         //TODO: Reduce allocations here? |  | ||||||
|         let s = s.to_ascii_lowercase(); |  | ||||||
|         Ok(match s.as_str() { |  | ||||||
|             "rrc" => Self::Rrc, |  | ||||||
|             "swpb" => Self::Swpb, |  | ||||||
|             "rra" => Self::Rra, |  | ||||||
|             "sxt" => Self::Sxt, |  | ||||||
|             "push" => Self::Push, |  | ||||||
|             "call" => Self::Call, |  | ||||||
|             "reti" => Self::Reti, |  | ||||||
|  |  | ||||||
|             "jne" | "jnz" => Self::Jnz, |  | ||||||
|             "jeq" | "jz" => Self::Jz, |  | ||||||
|             "jnc" | "jlo" => Self::Jnc, |  | ||||||
|             "jc" | "jhs" => Self::Jc, |  | ||||||
|             "jn" => Self::Jn, |  | ||||||
|             "jge" => Self::Jge, |  | ||||||
|             "jl" => Self::Jl, |  | ||||||
|             "jmp" => Self::Jmp, |  | ||||||
|  |  | ||||||
|             "mov" => Self::Mov, |  | ||||||
|             "add" => Self::Add, |  | ||||||
|             "addc" => Self::Addc, |  | ||||||
|             "subc" => Self::Subc, |  | ||||||
|             "sub" => Self::Sub, |  | ||||||
|             "cmp" => Self::Cmp, |  | ||||||
|             "dadd" => Self::Dadd, |  | ||||||
|             "bit" => Self::Bit, |  | ||||||
|             "bic" => Self::Bic, |  | ||||||
|             "bis" => Self::Bis, |  | ||||||
|             "xor" => Self::Xor, |  | ||||||
|             "and" => Self::And, |  | ||||||
|  |  | ||||||
|             "nop" => Self::Nop, |  | ||||||
|             "pop" => Self::Pop, |  | ||||||
|             "br" => Self::Br, |  | ||||||
|             "ret" => Self::Ret, |  | ||||||
|             "clrc" => Self::Clrc, |  | ||||||
|             "setc" => Self::Setc, |  | ||||||
|             "clrz" => Self::Clrz, |  | ||||||
|             "setz" => Self::Setz, |  | ||||||
|             "clrn" => Self::Clrn, |  | ||||||
|             "setn" => Self::Setn, |  | ||||||
|             "dint" => Self::Dint, |  | ||||||
|             "eint" => Self::Eint, |  | ||||||
|             "rla" => Self::Rla, |  | ||||||
|             "rlc" => Self::Rlc, |  | ||||||
|             "inv" => Self::Inv, |  | ||||||
|             "clr" => Self::Clr, |  | ||||||
|             "tst" => Self::Tst, |  | ||||||
|             "dec" => Self::Dec, |  | ||||||
|             "decd" => Self::Decd, |  | ||||||
|             "inc" => Self::Inc, |  | ||||||
|             "incd" => Self::Incd, |  | ||||||
|             "adc" => Self::Adc, |  | ||||||
|             "dadc" => Self::Dadc, |  | ||||||
|             "sbc" => Self::Sbc, |  | ||||||
|             _ => Err(ParseError::UnrecognizedOpcode(s))?, |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for Opcode { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         write!( |  | ||||||
|             f, |  | ||||||
|             "{}", |  | ||||||
|             match self { |  | ||||||
|                 Self::Nop => "nop", |  | ||||||
|                 Self::Pop => "pop", |  | ||||||
|                 Self::Br => "br", |  | ||||||
|                 Self::Ret => "ret", |  | ||||||
|                 Self::Clrc => "clrc", |  | ||||||
|                 Self::Setc => "setc", |  | ||||||
|                 Self::Clrz => "clrz", |  | ||||||
|                 Self::Setz => "setz", |  | ||||||
|                 Self::Clrn => "clrn", |  | ||||||
|                 Self::Setn => "setn", |  | ||||||
|                 Self::Dint => "dint", |  | ||||||
|                 Self::Eint => "eint", |  | ||||||
|                 Self::Rla => "rla", |  | ||||||
|                 Self::Rlc => "rlc", |  | ||||||
|                 Self::Inv => "inv", |  | ||||||
|                 Self::Clr => "clr", |  | ||||||
|                 Self::Tst => "tst", |  | ||||||
|                 Self::Dec => "dec", |  | ||||||
|                 Self::Decd => "decd", |  | ||||||
|                 Self::Inc => "inc", |  | ||||||
|                 Self::Incd => "incd", |  | ||||||
|                 Self::Adc => "adc", |  | ||||||
|                 Self::Dadc => "dadc", |  | ||||||
|                 Self::Sbc => "sbc", |  | ||||||
|                 Self::Rrc => "rrc", |  | ||||||
|                 Self::Swpb => "swpb", |  | ||||||
|                 Self::Rra => "rra", |  | ||||||
|                 Self::Sxt => "sxt", |  | ||||||
|                 Self::Push => "push", |  | ||||||
|                 Self::Call => "call", |  | ||||||
|                 Self::Reti => "reti", |  | ||||||
|                 Self::Jnz => "jnz", |  | ||||||
|                 Self::Jz => "jz", |  | ||||||
|                 Self::Jnc => "jnc", |  | ||||||
|                 Self::Jc => "jc", |  | ||||||
|                 Self::Jn => "jn", |  | ||||||
|                 Self::Jge => "jge", |  | ||||||
|                 Self::Jl => "jl", |  | ||||||
|                 Self::Jmp => "jmp", |  | ||||||
|                 Self::Mov => "mov", |  | ||||||
|                 Self::Add => "add", |  | ||||||
|                 Self::Addc => "addc", |  | ||||||
|                 Self::Subc => "subc", |  | ||||||
|                 Self::Sub => "sub", |  | ||||||
|                 Self::Cmp => "cmp", |  | ||||||
|                 Self::Dadd => "dadd", |  | ||||||
|                 Self::Bit => "bit", |  | ||||||
|                 Self::Bic => "bic", |  | ||||||
|                 Self::Bis => "bis", |  | ||||||
|                 Self::Xor => "xor", |  | ||||||
|                 Self::And => "and", |  | ||||||
|             } |  | ||||||
|         ) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,21 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! The definition of a label |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| /// The definition of a label |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Label(pub Identifier); |  | ||||||
|  |  | ||||||
| impl Parsable for Label { |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         Ok(Self(Identifier::parse(p, stream).and_then(|t| { |  | ||||||
|             stream.require(Type::Label)?; |  | ||||||
|             Ok(t) |  | ||||||
|         })?)) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for Label { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) } |  | ||||||
| } |  | ||||||
| @@ -1,72 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same |  | ||||||
| //! document line will be treated as if they took up multiple [`Line`s](Line). |  | ||||||
| //! |  | ||||||
| //! A line contains one of: |  | ||||||
| //! - [`Label`] |  | ||||||
| //! - [`Instruction`] |  | ||||||
| //! - [`Directive`] |  | ||||||
| //! - [`Comment`] |  | ||||||
| //! - [Nothing](Line::Empty) |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| /// A line contains any one of: |  | ||||||
| /// - [`Label`] (definition) |  | ||||||
| /// - [`Instruction`] |  | ||||||
| /// - [`Directive`] |  | ||||||
| /// - [`Comment`] |  | ||||||
| /// - Nothing at all |  | ||||||
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub enum Line { |  | ||||||
|     Empty, |  | ||||||
|     Insn(Instruction), |  | ||||||
|     Comment(Comment), |  | ||||||
|     Directive(Directive), |  | ||||||
|     Label(Label), |  | ||||||
|     EndOfFile, // Expected end of file |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for Line { |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         Ok( |  | ||||||
|             match stream |  | ||||||
|                 .peek_expect_any_of([ |  | ||||||
|                     Type::Endl, |  | ||||||
|                     Type::Insn, |  | ||||||
|                     Type::Comment, |  | ||||||
|                     Type::Directive, |  | ||||||
|                     Type::Identifier, |  | ||||||
|                     Type::EndOfFile, |  | ||||||
|                 ])? |  | ||||||
|                 .variant() |  | ||||||
|             { |  | ||||||
|                 Type::Endl => { |  | ||||||
|                     stream.next(); |  | ||||||
|                     Self::Empty |  | ||||||
|                 } |  | ||||||
|                 Type::Insn => Self::Insn(Instruction::parse(p, stream)?), |  | ||||||
|                 Type::Comment => Self::Comment(Comment::parse(p, stream)?), |  | ||||||
|                 Type::Directive => Self::Directive(Directive::parse(p, stream)?), |  | ||||||
|                 Type::Identifier => Self::Label(Label::parse(p, stream)?), |  | ||||||
|                 Type::EndOfFile => { |  | ||||||
|                     stream.next(); |  | ||||||
|                     Self::EndOfFile |  | ||||||
|                 } |  | ||||||
|                 _ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"), |  | ||||||
|             }, |  | ||||||
|         ) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| impl Display for Line { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         match self { |  | ||||||
|             Self::Empty => writeln!(f, "\n"), |  | ||||||
|             Self::Label(arg0) => Display::fmt(arg0, f), |  | ||||||
|             Self::Insn(arg0) => Display::fmt(arg0, f), |  | ||||||
|             Self::Directive(arg0) => Display::fmt(arg0, f), |  | ||||||
|             Self::Comment(arg0) => Display::fmt(arg0, f), |  | ||||||
|             Self::EndOfFile => write!(f, "; End of file."), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,85 +0,0 @@ | |||||||
| // © 2023 John Breaux |  | ||||||
| //! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html) |  | ||||||
| use super::*; |  | ||||||
| /// Parses tokens from [stream](TokenStream) into Self node |  | ||||||
| pub trait Parsable { |  | ||||||
|     /// Parses tokens from [TokenStream](TokenStream) into Self nodes |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where |  | ||||||
|         Self: Sized, |  | ||||||
|         T: TokenStream<'text>; |  | ||||||
|  |  | ||||||
|     /// Attempts to parse tokens from [stream](TokenStream) into Self nodes. |  | ||||||
|     /// |  | ||||||
|     /// Masks failed expectations. |  | ||||||
|     fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, ParseError> |  | ||||||
|     where |  | ||||||
|         Self: Sized, |  | ||||||
|         T: TokenStream<'text>, |  | ||||||
|     { |  | ||||||
|         match Self::parse(p, stream) { |  | ||||||
|             Ok(some) => Ok(Some(some)), |  | ||||||
|             Err(ParseError::LexError(_)) => Ok(None), |  | ||||||
|             Err(e) => Err(e), |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn parse_and<'text, T, R>( |  | ||||||
|         p: &Parser, |  | ||||||
|         stream: &mut T, |  | ||||||
|         f: fn(p: &Parser, &mut T) -> R, |  | ||||||
|     ) -> Result<(Self, R), ParseError> |  | ||||||
|     where |  | ||||||
|         Self: Sized, |  | ||||||
|         T: TokenStream<'text>, |  | ||||||
|     { |  | ||||||
|         Ok((Self::parse(p, stream)?, f(p, stream))) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Attempts to parse tokens from [stream](TokenStream) into Self nodes. |  | ||||||
|     /// |  | ||||||
|     /// Returns [`Self::default()`](Default::default()) on error |  | ||||||
|     fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self |  | ||||||
|     where |  | ||||||
|         Self: Sized + Default, |  | ||||||
|         T: TokenStream<'text>, |  | ||||||
|     { |  | ||||||
|         Self::parse(p, stream).unwrap_or_default() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| macro_rules! parsable_str_types { |  | ||||||
|     ($($t:ty),*$(,)?) => {$( |  | ||||||
|         impl Parsable for $t { |  | ||||||
|             fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|             where T: TokenStream<'text> { |  | ||||||
|                 Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into()) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     )*}; |  | ||||||
| } |  | ||||||
| use std::{path::PathBuf, rc::Rc}; |  | ||||||
| parsable_str_types![String, Rc<str>, Box<str>, PathBuf]; |  | ||||||
|  |  | ||||||
| /// Vectors of arbitrary parsables are cool |  | ||||||
| impl<P: Parsable> Parsable for Vec<P> { |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         // [dead beef] |  | ||||||
|         // [A, B,] |  | ||||||
|         // [c d e f] |  | ||||||
|         // [ something |  | ||||||
|         //   else      ] |  | ||||||
|  |  | ||||||
|         stream.require(Type::LBracket)?; |  | ||||||
|         stream.allow(Type::Endl); |  | ||||||
|         let mut out = vec![]; |  | ||||||
|         while let Some(t) = P::try_parse(p, stream)? { |  | ||||||
|             out.push(t); |  | ||||||
|             stream.allow(Type::Separator); |  | ||||||
|             stream.allow(Type::Endl); |  | ||||||
|         } |  | ||||||
|         stream.require(Type::RBracket)?; |  | ||||||
|         Ok(out) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,51 +0,0 @@ | |||||||
| use std::path::{Path, PathBuf}; |  | ||||||
|  |  | ||||||
| // © 2023 John Breaux |  | ||||||
| use super::*; |  | ||||||
|  |  | ||||||
| /// Contains the entire AST |  | ||||||
| #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] |  | ||||||
| pub struct Root(Option<PathBuf>, Vec<(usize, Line)>); |  | ||||||
| // pub struct Root { pub path: PathBuf, pub lines: Vec<Line> } |  | ||||||
|  |  | ||||||
| impl Root { |  | ||||||
|     pub fn file(&self) -> Option<&Path> { self.0.as_deref() } |  | ||||||
|     pub(crate) fn set_file(mut self, path: PathBuf) -> Self { |  | ||||||
|         self.0 = Some(path); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|     pub fn lines(&self) -> &[(usize, Line)] { &self.1 } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Parsable for Root { |  | ||||||
|     fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError> |  | ||||||
|     where T: TokenStream<'text> { |  | ||||||
|         let mut lines = vec![]; |  | ||||||
|         loop { |  | ||||||
|             let number = stream.context().line(); |  | ||||||
|             match Line::parse(p, stream)? { |  | ||||||
|                 Line::EndOfFile => break, |  | ||||||
|                 line => lines.push((number, line)), |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         Ok(Root(None, lines)) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Display for Root { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         for (num, line) in &self.1 { |  | ||||||
|             f.pad(&format!("{num:3}: {line} "))?; |  | ||||||
|         } |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl Debug for Root { |  | ||||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |  | ||||||
|         for line in self.0.iter() { |  | ||||||
|             Debug::fmt(line, f)?; |  | ||||||
|         } |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
							
								
								
									
										256
									
								
								src/parser/tests.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										256
									
								
								src/parser/tests.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,256 @@ | |||||||
|  | #![allow(non_upper_case_globals)] | ||||||
|  | use super::*; | ||||||
|  | use crate::lexer::token; | ||||||
|  |  | ||||||
|  | /// Because [assert_matches](core::assert_matches::assert_matches) is unstable | ||||||
|  | macro_rules! assert_matches { | ||||||
|  |     ($e: expr, $($p: pat $(if $condition:expr)?)* ) => { | ||||||
|  |         match $e { | ||||||
|  |             $($p $(if $condition)? => (),)* | ||||||
|  |             _ => panic!("{}", stringify!($e did not match $($p),*)), | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Simplified grammar for constructing an expression | ||||||
|  | macro_rules! expr { | ||||||
|  |     ($ident:ident) => { | ||||||
|  |         Expr::Ident(stringify!($ident)).into() | ||||||
|  |     }; | ||||||
|  |     ($lit:literal) => { | ||||||
|  |         Expr::Number($lit).into() | ||||||
|  |     }; | ||||||
|  |     (& $lit:literal) => { | ||||||
|  |         Expr::AddrOf($lit).into() | ||||||
|  |     }; | ||||||
|  |     (($($t:tt)*)) => { | ||||||
|  |         Expr::Group(expr!($($t)*)) | ||||||
|  |     }; | ||||||
|  |     ([$($op:tt)*] $($t:tt)*) => { | ||||||
|  |         Expr::Unary(vec![$(UnOp::$op),*], expr!($($t)*)) | ||||||
|  |     }; | ||||||
|  |     (($($a:tt)*) $($op:tt ($($b:tt)*))+) => { | ||||||
|  |         Expr::Binary(expr!($($a)*), vec![$((BinOp::$op, expr!($($b)*))),+]) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | macro_rules! passert { | ||||||
|  |     ($expected:expr, $text:literal) => { | ||||||
|  |         assert_eq!($expected, Parsable::parse($text).unwrap()) | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // #[test] | ||||||
|  | // fn statements() { | ||||||
|  | // passert!(, ""); | ||||||
|  | // } | ||||||
|  | // #[test] | ||||||
|  | // fn statement() { | ||||||
|  | // passert!(, ""); | ||||||
|  | // } | ||||||
|  | #[test] | ||||||
|  | fn directive() { | ||||||
|  |     passert!(Directive::Org(expr!(0x8000)), ".org 0x8000"); | ||||||
|  |     passert!(Directive::String("Hello, world!"), ".string \"Hello, world!\""); | ||||||
|  |     assert_eq!(Directive::parse(".word 0x40").unwrap(), Directive::Word(expr!(0x40))); | ||||||
|  |     passert!( | ||||||
|  |         Directive::Words(vec![expr!(0x40), expr!(0x41), expr!(0x42), expr!(0x43)]), | ||||||
|  |         ".words [ 0x40 0x41 0x42 0x43 ]" | ||||||
|  |     ); | ||||||
|  | } | ||||||
|  | // #[test] | ||||||
|  | // fn instruction() { | ||||||
|  | // passert!(, ""); | ||||||
|  | // } | ||||||
|  | #[test] | ||||||
|  | fn instruction_kind() { | ||||||
|  |     assert_matches!(Parsable::parse("nop").unwrap(), InstructionKind::NoEm(NoEm { .. })); | ||||||
|  |  | ||||||
|  |     assert_matches!(Parsable::parse("pop sp").unwrap(), InstructionKind::OneEm(OneEm { .. })); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn no_em() { | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Nop }, "nop"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Ret }, "ret"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Clrc }, "clrc"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Clrz }, "clrz"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Clrn }, "clrn"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Setc }, "setc"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Setz }, "setz"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Setn }, "setn"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Dint }, "dint"); | ||||||
|  |     passert!(NoEm { opcode: token::NoEm::Eint }, "eint"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn one_em() { | ||||||
|  |     const dst: Dst = Dst::Direct(Reg::R15); | ||||||
|  |     let width = Width::Word; | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc r15"); | ||||||
|  |  | ||||||
|  |     let width = Width::Byte; | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc.b r15"); | ||||||
|  |     passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc.b r15"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn one_arg() { | ||||||
|  |     const src: Src = Src::Direct(Reg::PC); | ||||||
|  |     let width = Width::Word; | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call pc"); | ||||||
|  |  | ||||||
|  |     let width = Width::Byte; | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc.b pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb.b pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra.b pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt.b pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push.b pc"); | ||||||
|  |     passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call.b pc"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn two_arg() { | ||||||
|  |     const src: Src = Src::Direct(Reg::R14); | ||||||
|  |     const dst: Dst = Dst::Direct(Reg::R15); | ||||||
|  |     let width = Width::Word; | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and r14, r15"); | ||||||
|  |  | ||||||
|  |     let width = Width::Byte; | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor.b r14, r15"); | ||||||
|  |     passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and.b r14, r15"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn jump() { | ||||||
|  |     const dst100: JumpDst = JumpDst::Rel(100); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jne, dst: dst100 }, "jne 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jnz, dst: dst100 }, "jnz 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jeq, dst: dst100 }, "jeq 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jz, dst: dst100 }, "jz 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jnc, dst: dst100 }, "jnc 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jlo, dst: dst100 }, "jlo 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jc, dst: dst100 }, "jc 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jhs, dst: dst100 }, "jhs 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jn, dst: dst100 }, "jn 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jge, dst: dst100 }, "jge 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jl, dst: dst100 }, "jl 100"); | ||||||
|  |     passert!(Jump { opcode: token::Jump::Jmp, dst: dst100 }, "jmp 100"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn reti() { | ||||||
|  |     passert!(Reti, "reti"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn br() { | ||||||
|  |     passert!(Br { src: Src::Direct(Reg::R15) }, "br r15"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn width() { | ||||||
|  |     passert!(Width::Byte, ".b"); | ||||||
|  |     passert!(Width::Word, ".w"); | ||||||
|  |     passert!(Width::Word, ""); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn src() { | ||||||
|  |     passert!(Src::Direct(Reg::R15), "r15"); | ||||||
|  |     passert!(Src::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)"); | ||||||
|  |     passert!(Src::Indirect(Reg::R15), "@r15"); | ||||||
|  |     passert!(Src::PostInc(Reg::R15), "@r15+"); | ||||||
|  |     passert!(Src::Absolute(expr!(0x1000)), "&0x1000"); | ||||||
|  |     passert!(Src::Immediate(expr!(0x1000)), "#0x1000"); | ||||||
|  |     passert!(Src::BareExpr(expr!(foo)), "foo"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn dst() { | ||||||
|  |     passert!(Dst::Direct(Reg::R15), "r15"); | ||||||
|  |     passert!(Dst::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)"); | ||||||
|  |     passert!(Dst::Absolute(expr!(0x1000)), "&0x1000"); | ||||||
|  |     passert!(Dst::Special(DstSpecial::Zero), "#0"); | ||||||
|  |     passert!(Dst::Special(DstSpecial::One), "#1"); | ||||||
|  | } | ||||||
|  | #[test] | ||||||
|  | fn jump_dst() { | ||||||
|  |     passert!(JumpDst::Rel(100), "100"); | ||||||
|  |     passert!(JumpDst::Rel(-100), "-100"); | ||||||
|  |     passert!(JumpDst::Label("foo"), "foo"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn expr() { | ||||||
|  |     // Terms= | ||||||
|  |     passert!(expr!((1) Mul(2) Rem(3) Div(4)), "1 * 2 % 3 / 4"); | ||||||
|  |     // Factors | ||||||
|  |     passert!(expr!((1) Add(2) Sub(3)), "1 + 2 - 3"); | ||||||
|  |     // Shift | ||||||
|  |     passert!(expr!((1) Lsh(2) Rsh(3)), "1 << 2 >> 3"); | ||||||
|  |     // Bitwise logic | ||||||
|  |     passert!(expr!((1) And(2) Or(3) Xor(4)), "1 & 2 | 3 ^ 4"); | ||||||
|  |     // Unary | ||||||
|  |     passert!(expr!([Deref Neg Not] 1), "*-!1"); | ||||||
|  |     // Number | ||||||
|  |     passert!(Expr::Number(42), "42"); | ||||||
|  |     // Identifier | ||||||
|  |     passert!(Expr::Ident("foo"), "foo"); | ||||||
|  |     // Addrof | ||||||
|  |     passert!(Expr::AddrOf("bar"), "&bar"); | ||||||
|  |     // Group | ||||||
|  |     passert!(expr!((42)), "(42)"); | ||||||
|  |     // All of the above | ||||||
|  |     passert!( | ||||||
|  |         expr!( | ||||||
|  |             (4) Mul( | ||||||
|  |                 (3) Add( | ||||||
|  |                     (2) Lsh( | ||||||
|  |                         (1) And([Neg] 1) | ||||||
|  |                     ) Rsh([Deref] 2) | ||||||
|  |                 ) Add([Not] 3) | ||||||
|  |             ) Mul(4) | ||||||
|  |         ), | ||||||
|  |         "4 * 3 + 2 << 1 & -1 >> *2 + !3 * 4" | ||||||
|  |     ); | ||||||
|  | } | ||||||
							
								
								
									
										89
									
								
								src/preprocessor.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								src/preprocessor.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | |||||||
|  | // © 2023-2024 John Breaux | ||||||
|  | //See LICENSE.md for license | ||||||
|  | //! Preprocesses a token stream by removing and replacing tokens according to `.define` directives | ||||||
|  |  | ||||||
|  | use crate::{ | ||||||
|  |     lexer::{ | ||||||
|  |         token::{Token, TokenKind as Kind}, | ||||||
|  |         Lexer, | ||||||
|  |     }, | ||||||
|  |     span::Span, | ||||||
|  | }; | ||||||
|  | use std::collections::{HashMap, VecDeque}; | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug)] | ||||||
|  | pub struct Preprocessor<'t> { | ||||||
|  |     lexer: Lexer<'t>, | ||||||
|  |     buf: VecDeque<Token<'t>>, | ||||||
|  |     defn: HashMap<&'t str, Vec<Token<'t>>>, | ||||||
|  |     /// Location for injected tokens | ||||||
|  |     pos: Span<usize>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t> Preprocessor<'t> { | ||||||
|  |     pub fn new(text: &'t str) -> Self { | ||||||
|  |         Self { | ||||||
|  |             lexer: Lexer::new(text), | ||||||
|  |             buf: Default::default(), | ||||||
|  |             defn: Default::default(), | ||||||
|  |             pos: Default::default(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     pub fn with_lexer(lexer: Lexer<'t>) -> Self { | ||||||
|  |         Self { lexer, buf: Default::default(), defn: Default::default(), pos: Default::default() } | ||||||
|  |     } | ||||||
|  |     pub fn scan(&mut self) -> Option<Token<'t>> { | ||||||
|  |         self.buf.pop_front().or_else(|| self.next()).inspect(|t| self.pos = t.pos) | ||||||
|  |     } | ||||||
|  |     pub fn start(&self) -> usize { | ||||||
|  |         self.lexer.location() | ||||||
|  |     } | ||||||
|  |     /// Grabs a token from the lexer, and attempts to match its lexeme | ||||||
|  |     fn next(&mut self) -> Option<Token<'t>> { | ||||||
|  |         let token = self.lexer.scan()?; | ||||||
|  |         if let Some(tokens) = self.defn.get(token.lexeme) { | ||||||
|  |             self.buf.extend(tokens.iter().copied().map(|mut t| { | ||||||
|  |                 t.pos = self.pos; | ||||||
|  |                 t | ||||||
|  |             })); | ||||||
|  |             return self.scan(); | ||||||
|  |         } else { | ||||||
|  |             match token.kind { | ||||||
|  |                 Kind::Directive => self.directive(token), | ||||||
|  |                 Kind::Newline => return self.scan(), | ||||||
|  |                 _ => {} | ||||||
|  |             } | ||||||
|  |             Some(token) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     /// Passes a token through while parsing a directive | ||||||
|  |     fn tee(&mut self) -> Option<Token<'t>> { | ||||||
|  |         let token = self.lexer.scan()?; | ||||||
|  |         self.buf.push_back(token); | ||||||
|  |         // self.buf.push_back(token); | ||||||
|  |         Some(token) | ||||||
|  |     } | ||||||
|  |     /// Parses and executes a directive | ||||||
|  |     pub fn directive(&mut self, token: Token<'t>) { | ||||||
|  |         if ".define" == token.lexeme { | ||||||
|  |             self.define() | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     pub fn define(&mut self) { | ||||||
|  |         let Some(key) = self.tee() else { | ||||||
|  |             return; | ||||||
|  |         }; | ||||||
|  |         let mut value = vec![]; | ||||||
|  |         while let Some(token) = self.tee() { | ||||||
|  |             match token.kind { | ||||||
|  |                 Kind::Comment => { | ||||||
|  |                     self.buf.push_back(token); | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |                 Kind::Newline => break, | ||||||
|  |                 _ => value.push(token), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         self.defn.insert(key.lexeme, value); | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										45
									
								
								src/span.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								src/span.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | |||||||
|  | // © 2023-2024 John Breaux | ||||||
|  | //See LICENSE.md for license | ||||||
|  | //! A [Span] is a [Range] that does not implement [Iterator]. It is a [Copy] type. | ||||||
|  | use std::{ | ||||||
|  |     fmt::{Debug, Display}, | ||||||
|  |     ops::{Index, Range}, | ||||||
|  | }; | ||||||
|  | /// A <code> [Clone] + [Copy] + [!Iterator](Iterator) </code> version of a [Range] | ||||||
|  | #[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub struct Span<Idx> { | ||||||
|  |     pub start: Idx, | ||||||
|  |     pub end: Idx, | ||||||
|  | } | ||||||
|  | impl<Idx> From<Span<Idx>> for Range<Idx> { | ||||||
|  |     fn from(value: Span<Idx>) -> Self { | ||||||
|  |         value.start..value.end | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<Idx> From<Range<Idx>> for Span<Idx> { | ||||||
|  |     fn from(value: Range<Idx>) -> Self { | ||||||
|  |         Self { start: value.start, end: value.end } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<T> Index<Span<usize>> for [T] { | ||||||
|  |     type Output = [T]; | ||||||
|  |     fn index(&self, index: Span<usize>) -> &Self::Output { | ||||||
|  |         self.index(Range::from(index)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl Index<Span<usize>> for str { | ||||||
|  |     type Output = str; | ||||||
|  |     fn index(&self, index: Span<usize>) -> &Self::Output { | ||||||
|  |         self.index(Range::from(index)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<Idx: Debug> Debug for Span<Idx> { | ||||||
|  |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |         write!(f, "{:?}..{:?}", self.start, self.end) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | impl<Idx: Display> Display for Span<Idx> { | ||||||
|  |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |         write!(f, "{}..{}", self.start, self.end) | ||||||
|  |     } | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user