v0.3.0 #1

Merged
j merged 12 commits from v0.3.0 into main 2024-02-01 20:11:02 +00:00
48 changed files with 3764 additions and 3133 deletions

View File

@@ -1,12 +1,12 @@
unstable_features = true unstable_features = true
max_width = 120 max_width = 100
wrap_comments = true wrap_comments = true
comment_width = 100 comment_width = 100
# Allow structs to fill an entire line # Allow structs to fill an entire line
use_small_heuristics = "Max" use_small_heuristics = "Max"
# Allow small functions on single line # Allow small functions on single line
fn_single_line = true # fn_single_line = true
# Alignment # Alignment
enum_discrim_align_threshold = 12 enum_discrim_align_threshold = 12

View File

@@ -1,23 +1,24 @@
[package] [workspace]
name = "msp430-asm" members = ["msp430-asm"]
version = "0.2.0" # default-members = ["msp430-asm"]
edition = "2021"
rust-version = "1.70" [workspace.package]
authors = ["John Breaux <j@soft.fish>"] authors = ["John Breaux <j@soft.fish>"]
version = "0.3.0"
license = "MIT"
edition = "2021"
publish = false publish = false
[features] [package]
default = [] name = "libmsp430"
authors.workspace = true
version.workspace = true
license.workspace = true
edition.workspace = true
publish.workspace = true
[[example]]
name = "msp430-asm"
path = "examples/msp430-asm/main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
regex = "1.9.3" # Provides very quick boolean tests for XID_START and XID_CONTINUE
# TODO: Remove dependency on regex unicode-ident = "1.0.12"
[dev-dependencies]
anes = { version = "0.1.6" }
argp = { version = "0.3.0" }

9
LICENSE.md Normal file
View File

@@ -0,0 +1,9 @@
The MIT License (MIT)
Copyright © 2023-2024 John Breaux
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -1,222 +1,224 @@
//! TODO: rewrite for v0.3.0
use super::*; use super::*;
/// Creates a [Parsable] implementation for an enum whose variants
/// are named after other [Parsable] items
macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) {
$( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* }
impl ::msp430_asm::parser::parsable::Parsable for $id {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
$(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )*
{ Err(ParseError::UnrecognizedDirective("".into())) }
}
}
impl TryFrom<&str> for $id {
type Error = ParseError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed())
}
}
}
make_parsable! { // /// Creates a [Parsable] implementation for an enum whose variants
#[derive(Debug)] // /// are named after other [Parsable] items
pub enum SyntaxFragment { // macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) {
Opcode, // $( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* }
PrimaryOperand, // impl ::msp430_asm::parser::parsable::Parsable for $id {
Number, // fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
} // where T: TokenStream<'text> {
} // $(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )*
// { Err(ParseError::UnrecognizedDirective("".into())) }
// }
// }
// impl TryFrom<&str> for $id {
// type Error = ParseError;
// fn try_from(value: &str) -> Result<Self, Self::Error> {
// Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed())
// }
// }
// }
impl SyntaxFragment { // make_parsable! {
pub fn info(&self) { // #[derive(Debug)]
match self { // pub enum SyntaxFragment {
SyntaxFragment::Opcode(o) => Self::opcode_info(o), // Opcode,
SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o), // PrimaryOperand,
SyntaxFragment::Number(n) => println!("The number {n}"), // Number,
} // }
} // }
fn opcode_info(o: &Opcode) {
let (desc, as_rust) = usage(o);
println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o));
footer!("https://mspgcc.sourceforge.net/manual/x223.html");
}
// TODO: re-enable full instruction decoding
// fn encoding_info(e: &Encoding) {
// match e {
// Encoding::Single { dst, .. } => Self::operand_info(dst),
// Encoding::Jump { target } => println!("Jumps to (pc + {target})"),
// Encoding::Double { src, dst, .. } => {
// Self::operand_info(src);
// Self::operand_info(&dst.clone().into())
// }
// }
// }
fn operand_info(o: &PrimaryOperand) {
match o {
PrimaryOperand::Direct(r) => Self::register_info(r),
PrimaryOperand::Indirect(r) => {
Self::register_info(r);
println!("Indirect addressing mode: use data pointed to by {r}");
}
PrimaryOperand::PostInc(r) => {
Self::register_info(r);
println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}");
}
PrimaryOperand::Indexed(r, n) => {
Self::register_info(r);
println!("Indexed mode: use the data at {r}[{n}]");
}
PrimaryOperand::Relative(_) => return,
PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"),
PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"),
PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"),
PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"),
PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"),
PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"),
PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"),
PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"),
}
footer!("https://mspgcc.sourceforge.net/manual/x82.html");
}
fn register_info(r: &Register) {
use Register as Re;
match r {
Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."),
Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."),
Re::sr => println!(
"sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC",
),
Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."),
Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => {
println!("{r} is a callee-saved general purpose register.")
}
Re::r12 | Re::r13 | Re::r14 | Re::r15 => {
println!("{r} is a caller-saved general purpose register, allowed for return values.")
}
}
}
}
// Gets parameter usage information from the opcode's EncodingParser // impl SyntaxFragment {
pub fn params(opcode: &Opcode) -> &'static str { // pub fn info(&self) {
match opcode.resolve().1 { // match self {
EncodingParser::Jump { target: None } => " target (relative address or label)", // SyntaxFragment::Opcode(o) => Self::opcode_info(o),
EncodingParser::Single { width: None, dst: None } => "[.b] dst", // SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o),
EncodingParser::Single { dst: None, .. } => " dst", // SyntaxFragment::Number(n) => println!("The number {n}"),
EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst", // }
EncodingParser::Double { src: None, .. } => "[.b] src", // }
EncodingParser::Double { dst: None, .. } => "[.b] dst", // fn opcode_info(o: &Opcode) {
EncodingParser::Double { .. } => "[.b]", // let (desc, as_rust) = usage(o);
EncodingParser::Reflexive { reg: None, .. } => "[.b] dst", // println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o));
_ => "", // footer!("https://mspgcc.sourceforge.net/manual/x223.html");
} // }
} // // TODO: re-enable full instruction decoding
// // fn encoding_info(e: &Encoding) {
// // match e {
// // Encoding::Single { dst, .. } => Self::operand_info(dst),
// // Encoding::Jump { target } => println!("Jumps to (pc + {target})"),
// // Encoding::Double { src, dst, .. } => {
// // Self::operand_info(src);
// // Self::operand_info(&dst.clone().into())
// // }
// // }
// // }
// fn operand_info(o: &PrimaryOperand) {
// match o {
// PrimaryOperand::Direct(r) => Self::register_info(r),
// PrimaryOperand::Indirect(r) => {
// Self::register_info(r);
// println!("Indirect addressing mode: use data pointed to by {r}");
// }
// PrimaryOperand::PostInc(r) => {
// Self::register_info(r);
// println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}");
// }
// PrimaryOperand::Indexed(r, n) => {
// Self::register_info(r);
// println!("Indexed mode: use the data at {r}[{n}]");
// }
// PrimaryOperand::Relative(_) => return,
// PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"),
// PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"),
// PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"),
// PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"),
// PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"),
// PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"),
// PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"),
// PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"),
// }
// footer!("https://mspgcc.sourceforge.net/manual/x82.html");
// }
// fn register_info(r: &Register) {
// use Register as Re;
// match r {
// Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."),
// Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."),
// Re::sr => println!(
// "sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC",
// ),
// Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."),
// Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => {
// println!("{r} is a callee-saved general purpose register.")
// }
// Re::r12 | Re::r13 | Re::r14 | Re::r15 => {
// println!("{r} is a caller-saved general purpose register, allowed for return values.")
// }
// }
// }
// }
pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) { // // Gets parameter usage information from the opcode's EncodingParser
match opcode { // pub fn params(opcode: &Opcode) -> &'static str {
// Single // match opcode.resolve().1 {
Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"), // EncodingParser::Jump { target: None } => " target (relative address or label)",
Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"), // EncodingParser::Single { width: None, dst: None } => "[.b] dst",
Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"), // EncodingParser::Single { dst: None, .. } => " dst",
Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"), // EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst",
Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"), // EncodingParser::Double { src: None, .. } => "[.b] src",
Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"), // EncodingParser::Double { dst: None, .. } => "[.b] dst",
Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"), // EncodingParser::Double { .. } => "[.b]",
// Jump // EncodingParser::Reflexive { reg: None, .. } => "[.b] dst",
Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"), // _ => "",
Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"), // }
Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"), // }
Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"),
Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"),
Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"),
Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"),
Opcode::Jmp => ("Jump unconditionally", "pc += target"),
// Double
Opcode::Mov => ("Copy src into dst", "dst = src"),
Opcode::Add => ("Add src to dst", "dst += src"),
Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"),
Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"),
Opcode::Sub => ("Subtract src from dst", "dst -= src"),
Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"),
Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"),
Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"),
Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"),
Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"),
Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"),
Opcode::And => ("Bitwise And src into dst", "dst &= src"),
// Emulated
Opcode::Nop => ("Does nothing", "{}"),
Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"),
Opcode::Br => ("Branches to the absolute address in src", "pc = src"),
Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"),
Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"),
Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"),
Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"),
Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"),
Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"),
Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"),
Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"),
Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"),
Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"),
Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"),
Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"),
Opcode::Clr => ("Sets dst to 0", "dst = 0"),
Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""),
Opcode::Dec => ("Decrements dst", "dst -= 1"),
Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"),
Opcode::Inc => ("Increments dst", "dst += 1"),
Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"),
Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"),
Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"),
Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"),
}
}
const SINGLE: [Opcode; 7] = // pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) {
[Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti]; // match opcode {
// // Single
// Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"),
// Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"),
// Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"),
// Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"),
// Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"),
// Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"),
// Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"),
// // Jump
// Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"),
// Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"),
// Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"),
// Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"),
// Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"),
// Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"),
// Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"),
// Opcode::Jmp => ("Jump unconditionally", "pc += target"),
// // Double
// Opcode::Mov => ("Copy src into dst", "dst = src"),
// Opcode::Add => ("Add src to dst", "dst += src"),
// Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"),
// Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"),
// Opcode::Sub => ("Subtract src from dst", "dst -= src"),
// Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"),
// Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"),
// Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"),
// Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"),
// Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"),
// Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"),
// Opcode::And => ("Bitwise And src into dst", "dst &= src"),
// // Emulated
// Opcode::Nop => ("Does nothing", "{}"),
// Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"),
// Opcode::Br => ("Branches to the absolute address in src", "pc = src"),
// Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"),
// Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"),
// Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"),
// Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"),
// Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"),
// Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"),
// Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"),
// Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"),
// Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"),
// Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"),
// Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"),
// Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"),
// Opcode::Clr => ("Sets dst to 0", "dst = 0"),
// Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""),
// Opcode::Dec => ("Decrements dst", "dst -= 1"),
// Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"),
// Opcode::Inc => ("Increments dst", "dst += 1"),
// Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"),
// Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"),
// Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"),
// Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"),
// }
// }
const JUMP: [Opcode; 8] = // const SINGLE: [Opcode; 7] =
[Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp]; // [Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti];
#[rustfmt::skip] // const JUMP: [Opcode; 8] =
const DOUBLE: [Opcode; 12] = [ // [Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp];
Opcode::Mov, Opcode::Add, Opcode::Addc, Opcode::Subc, Opcode::Sub, Opcode::Cmp,
Opcode::Dadd, Opcode::Bit, Opcode::Bic, Opcode::Bis, Opcode::Xor, Opcode::And,
];
#[rustfmt::skip]
const SIMULATED: [Opcode; 24] = [
Opcode::Nop, Opcode::Pop, Opcode::Br, Opcode::Ret, Opcode::Clrc, Opcode::Setc,
Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint,
Opcode::Rla, Opcode::Rlc, Opcode::Inv, Opcode::Clr, Opcode::Tst, Opcode::Dec,
Opcode::Decd, Opcode::Inc, Opcode::Incd, Opcode::Adc, Opcode::Dadc, Opcode::Sbc,
];
pub fn list_opcodes() { // #[rustfmt::skip]
let mut stdout = std::io::stdout().lock(); // const DOUBLE: [Opcode; 12] = [
header!(stdout, "Single-operand instructions:"); // Opcode::Mov, Opcode::Add, Opcode::Addc, Opcode::Subc, Opcode::Sub, Opcode::Cmp,
let _ = write_opcode_list(&mut stdout, &SINGLE); // Opcode::Dadd, Opcode::Bit, Opcode::Bic, Opcode::Bis, Opcode::Xor, Opcode::And,
header!(stdout, "Relative Jump instructions:"); // ];
let _ = write_opcode_list(&mut stdout, &JUMP); // #[rustfmt::skip]
header!(stdout, "Double-operand instructions:"); // const SIMULATED: [Opcode; 24] = [
let _ = write_opcode_list(&mut stdout, &DOUBLE); // Opcode::Nop, Opcode::Pop, Opcode::Br, Opcode::Ret, Opcode::Clrc, Opcode::Setc,
header!(stdout, "Simulated instructions:"); // Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint,
let _ = write_opcode_list(&mut stdout, &SIMULATED); // Opcode::Rla, Opcode::Rlc, Opcode::Inv, Opcode::Clr, Opcode::Tst, Opcode::Dec,
} // Opcode::Decd, Opcode::Inc, Opcode::Incd, Opcode::Adc, Opcode::Dadc, Opcode::Sbc,
// ];
fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> { // pub fn list_opcodes() {
for (idx, opcode) in list.iter().enumerate() { // let mut stdout = std::io::stdout().lock();
write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?; // header!(stdout, "Single-operand instructions:");
} // let _ = write_opcode_list(&mut stdout, &SINGLE);
if list.len() % 6 != 0 { // header!(stdout, "Relative Jump instructions:");
writeln!(f)?; // let _ = write_opcode_list(&mut stdout, &JUMP);
} // header!(stdout, "Double-operand instructions:");
Ok(()) // let _ = write_opcode_list(&mut stdout, &DOUBLE);
} // header!(stdout, "Simulated instructions:");
// let _ = write_opcode_list(&mut stdout, &SIMULATED);
// }
macro header ($f:ident, $($x: expr),+) { // fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> {
{write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();} // for (idx, opcode) in list.iter().enumerate() {
} // write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?;
macro footer ($($x: expr),+) { // }
{print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);} // if list.len() % 6 != 0 {
} // writeln!(f)?;
// }
// Ok(())
// }
// macro header ($f:ident, $($x: expr),+) {
// {write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();}
// }
// macro footer ($($x: expr),+) {
// {print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);}
// }

View File

@@ -3,59 +3,63 @@
// https://mspgcc.sourceforge.net/manual/ln16.html // https://mspgcc.sourceforge.net/manual/ln16.html
#![feature(decl_macro)] #![feature(decl_macro)]
use anes::{Color, ResetAttributes, SetForegroundColor}; fn main() {
use msp430_asm::parser::preamble::*; println!("Hello, world!")
use msp430_asm::preamble::*;
use std::{
error::Error,
io::{stdin, IsTerminal, Write},
};
type AsmResult<T> = Result<T, Box<dyn Error>>;
mod data;
fn main() -> AsmResult<()> {
if stdin().is_terminal() {
hello();
}
repl()
} }
fn hello() { // use anes::{Color, ResetAttributes, SetForegroundColor};
println!( // use msp430_asm::parser::preamble::*;
"{}{} v{} // use msp430_asm::preamble::*;
This software contains instruction and register descriptions adapted from // use std::{
the mspgcc project's fantastic documentation, which is licensed under the GPL. // error::Error,
https://mspgcc.sourceforge.net/manual/book1.html{}\n", // io::{stdin, IsTerminal, Write},
SetForegroundColor(Color::DarkGray), // };
env!("CARGO_BIN_NAME"),
env!("CARGO_PKG_VERSION"),
ResetAttributes
);
}
fn repl() -> AsmResult<()> { // type AsmResult<T> = Result<T, Box<dyn Error>>;
printflush!("> ");
let mut line = String::new();
while let Ok(len) = stdin().read_line(&mut line) {
match len {
0 => break, // No newline (reached EOF)
1 => (), // Line is empty
_ => {
if line.starts_with('?') || line.starts_with("help") {
data::list_opcodes()
} else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) {
sf.info();
}
}
}
printflush!("> ");
line.clear();
}
Ok(())
}
macro printflush ($($x: expr),+) { // mod data;
{print!($($x),+); let _ = ::std::io::stdout().flush();}
} // fn main() -> AsmResult<()> {
// if stdin().is_terminal() {
// hello();
// }
// repl()
// }
// fn hello() {
// println!(
// "{}{} v{}
// This software contains instruction and register descriptions adapted from
// the mspgcc project's fantastic documentation, which is licensed under the GPL.
// https://mspgcc.sourceforge.net/manual/book1.html{}\n",
// SetForegroundColor(Color::DarkGray),
// env!("CARGO_BIN_NAME"),
// env!("CARGO_PKG_VERSION"),
// ResetAttributes
// );
// }
// fn repl() -> AsmResult<()> {
// printflush!("> ");
// let mut line = String::new();
// while let Ok(len) = stdin().read_line(&mut line) {
// match len {
// 0 => break, // No newline (reached EOF)
// 1 => (), // Line is empty
// _ => {
// if line.starts_with('?') || line.starts_with("help") {
// data::list_opcodes()
// } else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) {
// sf.info();
// }
// }
// }
// printflush!("> ");
// line.clear();
// }
// Ok(())
// }
// macro printflush ($($x: expr),+) {
// {print!($($x),+); let _ = ::std::io::stdout().flush();}
// }

47
grammar.ebnf Normal file
View File

@@ -0,0 +1,47 @@
(* Partical grammar for msp430-asm *)
Line = Label | Directive | Insn ;
Insn = NoEm | OneEm | Special | OneArg | TwoArg | Jump ;
(* Instruction formats *)
NoEm = OpNoEm ;
OneEm = OpOneEm Dst ;
Special = "reti" | "br" Src ;
OneArg = OpOneArg Src ;
TwoArg = OpTwoArg Src ','? Dst ;
Jump = OpJump Expr ;
(* Addressing modes *)
Src = '#' (SrcSpecial | '-'? Expr)
| Absolute
| PostInc
| Indexed
| Direct ;
Dst = '#' (SrcSpecial | Expr)
| Absolute
| Indirect
| Indexed
| Direct ;
Direct = Reg ;
Indirect = '@' Reg ;
PostInc = Indirect '+'? ;
Indexed = Number '(' Reg ')' ;
Immediate = '#' Expr ;
Absolute = '&' Expr ;
SrcSpecial = 0 | 1 | '-' 1 | 0xffff | 2 | 4 | 8 ;
DstSpecial = 0 | 1 ;
Expr = '-'? Number ;
(* Pseudo-terminals *)
Reg = "pc" | "sp" | "sr" | "cg"
| "r0" | "r1" | "r2" | "r3"
| "r4" | "r5" | "r6" | "r7"
| "r8" | "r9" | "r10" | "r11"
| "r12" | "r13" | "r14" | "r15" ;
Identifier = ID_START ID_CONTINUE* ;
Number = '-'? DIGIT ;

13
msp430-asm/Cargo.toml Normal file
View File

@@ -0,0 +1,13 @@
[package]
name = "msp430-asm"
authors.workspace = true
version.workspace = true
license.workspace = true
edition.workspace = true
publish.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
libmsp430 = { path = ".." }
argp = { version = "0.3.0" }

135
msp430-asm/src/lib.rs Normal file
View File

@@ -0,0 +1,135 @@
//! Helper library for msp430-asm
#![feature(decl_macro)]
pub mod split_twice {
/// Slices a collection into a beginning, middle, and end, based on two unordered indices
pub trait SplitTwice<'t> {
type Slice;
type Idx;
/// Splits a collection into a beginning, middle, and end slice,
/// based on two unordered indices
///
/// # Examples
/// ```rust
/// # use msp430_asm::split_twice::SplitTwice;
/// let string = "foo,bar,baz";
/// let (foo, bar, baz) = string.split_twice(4, 8);
/// assert_eq!(foo, "foo,");
/// assert_eq!(bar, "bar,");
/// assert_eq!(baz, "baz");
/// ```
fn split_twice(
&'t self,
a: Self::Idx,
b: Self::Idx,
) -> (Self::Slice, Self::Slice, Self::Slice);
}
impl<'t, T: 't> SplitTwice<'t> for [T] {
type Slice = &'t [T];
type Idx = usize;
fn split_twice(
&'t self,
a: Self::Idx,
b: Self::Idx,
) -> (Self::Slice, Self::Slice, Self::Slice) {
let (a, b) = if a < b { (a, b) } else { (b, a) };
let (mid, end) =
if b < self.len() { self.split_at(b) } else { (self, Default::default()) };
let (start, mid) =
if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) };
(start, mid, end)
}
}
impl<'t> SplitTwice<'t> for str {
type Slice = &'t str;
type Idx = usize;
fn split_twice(
&'t self,
a: Self::Idx,
b: Self::Idx,
) -> (Self::Slice, Self::Slice, Self::Slice) {
let (a, b) = if a < b { (a, b) } else { (b, a) };
let (mid, end) =
if b < self.len() { self.split_at(b) } else { (self, Default::default()) };
let (start, mid) =
if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) };
(start, mid, end)
}
}
}
pub mod cursor {
use std::fmt::{Arguments, Display};
/// Moves to the {line}th previous line
pub macro previous($line:literal) {
csi!("{}F", $line)
}
/// Injects a Command Sequence Introducer
pub macro csi($($t:tt)*) {
format_args!("\x1b[{}", format_args!($($t)*))
}
/// Formats the args with a foreground [Color]
pub macro fg($fg:expr, $($t:tt)*) {
Colorized::new(Some($fg), None, format_args!($($t)*))
}
/// Formats the args with a background [Color]
pub macro bg($bg:expr, $(t:tt)*) {
Colorized::new(None, Some($bg), format_args!($($t)*))
}
/// Formats the args with both a foreground and background [Color]
pub macro color($fg:expr, $bg:expr, $($t:tt)*) {
Colorized::new(Some($fg), Some($bg), format_args!($($t)*))
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Color {
#[default]
Black = 30,
Red,
Green,
Yellow,
Blue,
Magenta,
Cyan,
Gray,
DarkGray = 90,
Pink,
Lime,
Sunflower,
SkyBlue,
HotPink,
Turquoise,
White,
}
#[derive(Clone, Copy, Debug)]
pub struct Colorized<'args> {
fg: Option<Color>,
bg: Option<Color>,
args: Arguments<'args>,
}
impl<'t> Colorized<'t> {
pub fn new(fg: Option<Color>, bg: Option<Color>, args: Arguments<'t>) -> Self {
Self { fg, bg, args }
}
}
impl<'t> Display for Colorized<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let &Self { fg, bg, args } = self;
if let Some(fg) = fg {
write!(f, "{}", csi!("{}m", fg as u8))?;
}
if let Some(bg) = bg {
write!(f, "{}", csi!("{}m", bg as u8 + 10))?;
}
write!(f, "{args}{}", csi!("0m"))
}
}
}

View File

@@ -1,7 +1,17 @@
// © 2023-2024 John Breaux
//See LICENSE.md for license
//! Simple frontend for the assembler //! Simple frontend for the assembler
#![feature(decl_macro)] #![feature(decl_macro)]
use argp::parse_args_or_exit; use argp::parse_args_or_exit;
use msp430_asm::preamble::*; use libmsp430::{
assembler::Assemble,
parser::ast::{canonical::Canonicalize, *},
parser::{error::Error as PError, Parser},
};
use msp430_asm::{
cursor::{fg, Color::*},
split_twice::SplitTwice,
};
use std::{ use std::{
error::Error, error::Error,
io::{stdin, IsTerminal, Read}, io::{stdin, IsTerminal, Read},
@@ -11,16 +21,30 @@ fn main() -> Result<(), Box<dyn Error>> {
let mut buf = String::new(); let mut buf = String::new();
if let Some(file) = parse_args_or_exit::<args::Args>(argp::DEFAULT).file { if let Some(file) = parse_args_or_exit::<args::Args>(argp::DEFAULT).file {
buf = std::fs::read_to_string(file)?; buf = std::fs::read_to_string(file)?;
} else if !stdin().is_terminal() { } else if stdin().is_terminal() {
// if stdin is not a terminal, don't parsecheck each line.
stdin().lock().read_to_string(&mut buf)?;
} else {
// if stdin is a terminal, enter parse-checked REPL mode. // if stdin is a terminal, enter parse-checked REPL mode.
repl::repl(&mut buf)?; repl::repl(&mut buf)?;
} else {
// if stdin is not a terminal, don't parsecheck each line.
stdin().lock().read_to_string(&mut buf)?;
} }
asm(&buf) asm(&buf)
} }
// Parses and assembles a buffer, then prints it in hex to stdout
fn asm(buf: &str) -> Result<(), Box<dyn Error>> {
match Parser::new(buf).parse::<Statements>()?.to_canonical().assemble() {
Err(error) => println!("{error}"),
Ok(out) => {
for word in out {
print!("{:04x} ", word.swap_bytes())
}
println!();
}
}
Ok(())
}
mod args { mod args {
use argp::FromArgs; use argp::FromArgs;
use std::path::PathBuf; use std::path::PathBuf;
@@ -34,41 +58,29 @@ mod args {
pub file: Option<PathBuf>, pub file: Option<PathBuf>,
} }
} }
mod repl { mod repl {
//! The REPL reads, evaluates, and prints in a loop
use super::*; use super::*;
use anes::{Color, MoveCursorToPreviousLine, ResetAttributes, SetForegroundColor}; use msp430_asm::cursor::*;
use msp430_asm::{
assembler::error::AssemblyError, error::Error as MspError, lexer::error::LexError, parser::error::ParseError,
};
use std::io::{stderr, Write}; use std::io::{stderr, Write};
macro color ($color: expr, $fmt: literal, $($str: expr),*) { /// Formats the line number
format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)* ::anes::ResetAttributes)
}
macro linenr($n: expr) { macro linenr($n: expr) {
format_args!("{:4}: ", $n) format_args!("{:4}: ", $n)
} }
macro printflush ($($x: expr),+) { /// [println], but without the newline
macro printfl ($($x: expr),+) {
{print!($($x),+); let _ = ::std::io::stdout().flush();} {print!($($x),+); let _ = ::std::io::stdout().flush();}
} }
macro move_cursor($x:expr, $y:expr) { /// Runs the read-evaluate-print loop
format_args!("{}{}", ::anes::MoveCursorToPreviousLine($x), "")
}
pub fn repl(buf: &mut String) -> Result<(), Box<dyn Error>> { pub fn repl(buf: &mut String) -> Result<(), Box<dyn Error>> {
let mut line = String::new(); let mut line = String::new();
let mut linenr = 1; let mut linenr = 1;
println!( println!("{}", fg!(DarkGray, "{} v{}", env!("CARGO_BIN_NAME"), env!("CARGO_PKG_VERSION")));
"{}{} v{}{}", printfl!("{}", linenr!(linenr));
SetForegroundColor(Color::DarkGray),
env!("CARGO_BIN_NAME"),
env!("CARGO_PKG_VERSION"),
ResetAttributes
);
printflush!("{}", linenr!(linenr));
while let Ok(len) = stdin().read_line(&mut line) { while let Ok(len) = stdin().read_line(&mut line) {
match len { match len {
0 => break, // No newline (reached EOF) 0 => break, // No newline (reached EOF)
@@ -76,61 +88,36 @@ mod repl {
_ => (), _ => (),
} }
// Try to parse this line in isolation (this restricts preprocessing) // Try to parse this line in isolation (this restricts preprocessing)
match Parser::default().parse(&line) { match Parser::new(&line).parse::<Statements>() {
Err(error) => errpp(&line, linenr, &error.into()), Err(error) => format_err(&line, linenr, &error),
Ok(_) => { Ok(_) => {
okpp(&line, linenr); format_ok(&line, linenr);
*buf += &line; *buf += &line;
linenr += 1; linenr += 1;
} }
} }
line.clear(); line.clear();
printflush!("{}", linenr!(linenr)); printfl!("{}", linenr!(linenr));
} }
println!(); println!("{}", fg!(DarkGray, "[EOF]"));
Ok(()) Ok(())
} }
fn okpp(line: &str, linenr: i32) { /// Rewrites the line in OK format, with a green linenr
println!("{}{}{}", move_cursor!(1, 5), color!(Color::Green, "{:4}", linenr!(linenr)), line.trim_end(),); fn format_ok(line: &str, linenr: i32) {
println!("{}{}{}", previous!(1), fg!(Lime, "{:4}", linenr!(linenr)), line.trim_end(),);
} }
/// Pretty-prints a line error /// Pretty-prints a line error
fn errpp(line: &str, linenr: i32, err: &msp430_asm::error::Error) { fn format_err(line: &str, linenr: i32, err: &PError) {
let loc = err.loc;
if stderr().is_terminal() { if stderr().is_terminal() {
let line = line.trim_end(); let line = line.trim_end();
eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Color::Red, "{}", linenr!(linenr))); eprint!("{}{}", previous!(1), fg!(Red, "{}", linenr!(linenr)));
match err { let (start, mid, end) = line.split_twice(loc.start, loc.end);
// TODO: use a recursive enum to store all valid states eprintln!("{start}{}{end} {}", fg!(Red, "{}", mid), fg!(DarkGray, "; {}", err));
MspError::LexError(LexError::Contextual(c, e))
| MspError::ParseError(ParseError::LexError(LexError::Contextual(c, e)))
| MspError::AssemblyError(AssemblyError::ParseError(ParseError::LexError(LexError::Contextual(
c,
e,
)))) => {
let (start, end) = line.split_at(c.position() - 1);
eprintln!("{start}{} ({e})", color!(Color::Red, "{}", end));
}
_ => {
eprintln!("{} ({err})", color!(Color::Red, "{}", line));
}
}
} else { } else {
eprintln!("{} ({err})", line.trim()) eprintln!("{} ({err})", line.trim())
} }
} }
} }
// Parses and assembles a buffer, then prints it in hex to stdout
fn asm(buf: &str) -> Result<(), Box<dyn Error>> {
match Assembler::assemble(&Parser::default().parse(&buf)?) {
Err(error) => println!("{error}"),
Ok(out) => {
for word in out {
print!("{:04x} ", word.swap_bytes())
}
println!();
}
}
Ok(())
}

99
sample-asm/shellcode.asm Normal file
View File

@@ -0,0 +1,99 @@
; © 2023-2024 John Breaux
; Comtains spoilers for Microcorruption Halifax! Be warned!
; just hash the first 0x140 B and stick them in memory
const:
.define msize 0x1 ; length of each hash in bytes
.define hsize 0x3 ; bytes kept per hash (only needs to be 3 to determine 1 byte of sram)
.define sr_len 0x140 ; number of bytes in sram to dump
.define ha_len 0x3c0 ; number of bytes in hash array (hsize * sr_len)
.define haddr 0x7000 ; address of the big hash array
.define iaddr 0x8000 ; address of the sram input buffer
.define kaddr 0x9000 ; address of the key buffer
external_data:
.define HEX_LUT 0x4710; "0123456789ABCDEF"
external_func:
; INT(int interrupt, ...)
.define INT #0x4550
; getsn(void *dest, size_t len)
.define getsn #0x4568
; putchar(char character)
.define putchar #0x4578
; puts(char *str)
.define puts #0x4586
; memcpy(void *dest, void *src, size_t len)
.define memcpy #0x45a4
; sha256_internal(void * sram_addr, size_t sr_len, void * sha_buf)
.define sha256_internal #0x45b6
; memset(void* buf, char value, size_t length)
.define memset #0x45c8
get_sram_hashes:
clr r11 ; loop variable in r11
mov #msize, r14 ; r14 = 1
mov #haddr, r13 ; set destination to 0x8000
sr_loop:
mov r11, r15 ; mov addr r15
call sha256_internal ; <sha256_internal>
add #hsize, r13 ; keep 3 bytes of the output
inc r11 ; inc r11
cmp #sr_len, r11 ; do that 0x1000 times
jnc sr_loop
print_hex:
clr r11;
ph_loop:
mov.b haddr(r11), r14
mov.b r14, r15
rra r15 ; using rra here instead of rra.b means the value won't roll into the highest bit
rra r15 ; which negates the need to and 0xf, r15
rra r15
rra r15
clrc
and #0xf, r14
mov.b HEX_LUT(r15), r15
call putchar ; <putchar>
mov.b HEX_LUT(r14), r15
call putchar ; <putchar>
inc r11 ; inc r11
cmp #ha_len, r11 ; do that sram_length*3 times
jnc ph_loop
mov.b #0xa, r15 ; '\n'
call #0x4578 ; putchar ('\n')
take_input:
mov #sr_len, r14
mov #iaddr, r15
call getsn ; <getsn>
check_all_passwords:
;for i in 0..sr_len:
clr r9
pw_loop:
; memcpy(kaddr, iaddr + i, len)
mov #0x10, r13
mov #iaddr, r14
add r9, r14
mov #kaddr, r15
call memcpy
; INT (0x42, key)
push #kaddr
push #0x42
call INT
add #4, sp
; INT(7f)
unlock7f:
push #0
push #0
push #0x7f
call INT
add #6, sp
inc r9
cmp #sr_len, r9
jl pw_loop
end:
ret

View File

@@ -9,11 +9,11 @@ jmp main
.string "ABA" .string "ABA"
.string "ABAB" .string "ABAB"
.word 0b0101101001011010 .word 0b0101101001011010
.words [dead beef] .words [0xdead 0xbeef 0x0000]
main: main:
; testing defines ; testing defines
.define asdfgh #1000 .define asdfgh #0x1000
.define qwerty @sp+ .define qwerty @sp+
br asdfgh br asdfgh
mov qwerty, r15 mov qwerty, r15
@@ -88,73 +88,74 @@ mov @r13+, r14
mov @r14+, r15 mov @r14+, r15
.define special r6 .define special r6
;mov @pc+, r15 ; This is how mov-immediate is encoded, and is not valid ; mov , r14
;mov @sp+, r15 ; pop r15 ; mov @pc+, r15 ; This is a mov-immediate, and may corrupt your output
;mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] mov @sp+, r15 ; pop r15
;mov @cg+, r15 mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
mov @cg+, r15
indexed_mode: indexed_mode:
.define numbered r7 .define numbered r7
mov.b 10(r0), r1 mov.b 0x10(r0), r1
mov 10(r1), r2 mov 0x10(r1), r2
;mov 10(r2), r3 ; Invalid: cannot index relative to sr ;mov 10(r2), r3 ; Invalid: cannot index relative to sr
;mov 10(r3), r4 ; Invalid: cannot index relative to cg ;mov 10(r3), r4 ; Invalid: cannot index relative to cg
mov 10(r4), r5 mov 0x10(r4), r5
mov 10(r5), r6 mov 0x10(r5), r6
mov 10(r6), r7 mov 0x10(r6), r7
mov 10(r7), r8 mov 0x10(r7), r8
mov 10(r8), r9 mov 0x10(r8), r9
mov 10(r9), r10 mov 0x10(r9), r10
mov 10(r10), r11 mov 0x10(r10), r11
mov 10(r11), r12 mov 0x10(r11), r12
mov 10(r12), r13 mov 0x10(r12), r13
mov 10(r13), r14 mov 0x10(r13), r14
mov 10(r14), r15 mov 0x10(r14), r15
.define special r8 .define special r8
mov 10(pc), r15 mov 0x10(pc), r15
mov 10(sp), r15 mov 0x10(sp), r15
;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8] ;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
;mov 10(cg), r15 ;mov 10(cg), r15
_immediate_mode: _immediate_mode:
.define numbered r9 .define numbered r9
mov #beef, r0 mov #0xbeef, r0
mov #beef, r1 mov #0xbeef, r1
mov #beef, r2 mov #0xbeef, r2
mov #beef, r3 mov #0xbeef, r3
mov #beef, r4 mov #0xbeef, r4
mov #beef, r5 mov #0xbeef, r5
mov #beef, r6 mov #0xbeef, r6
mov #beef, r7 mov #0xbeef, r7
mov #beef, r8 mov #0xbeef, r8
mov #beef, r9 mov #0xbeef, r9
mov #beef, r10 mov #0xbeef, r10
mov #beef, r11 mov #0xbeef, r11
mov #beef, r12 mov #0xbeef, r12
mov #beef, r13 mov #0xbeef, r13
mov #beef, r14 mov #0xbeef, r14
mov #beef, r15 mov #0xbeef, r15
.define special r10 .define special r10
mov #beef, pc mov #0xbeef, pc
mov #beef, sp mov #0xbeef, sp
mov #beef, sr mov #0xbeef, sr
mov #beef, cg mov #0xbeef, cg
jmp _register_mode jmp _register_mode
jmp 3fe jmp 0x3fe
jmp -3fc jmp -0x3fc
ret ret
; Funky encodings ; Funky encodings
mov r6, r4 mov r6, r4
mov @r6, r4 mov @r6, r4
mov @r6+, r4 mov @r6+, r4
mov 0(r6), r4 mov 0x0(r6), r4
mov 4141(r6), r4 mov 0x4141(r6), r4
mov #-1, r4 mov #-1, r4
mov #ffff, r4 mov #0xffff, r4
mov #0, r4 mov #0, r4
mov #1, r4 mov #1, r4
mov #2, r4 mov #2, r4
@@ -164,33 +165,33 @@ mov r6, 0(r4)
mov @r6, 0(r4) mov @r6, 0(r4)
mov @r6+, 0(r4) mov @r6+, 0(r4)
mov 0(r6), 0(r4) mov 0(r6), 0(r4)
mov 4141(r6), 0(r4) mov 0x4141(r6), 0(r4)
mov #-1, 0(r4) mov #-1, 0(r4)
mov #ffff, 0(r4) mov #0xffff, 0(r4)
mov #0, 0(r4) mov #0, 0(r4)
mov #1, 0(r4) mov #1, 0(r4)
mov #2, 0(r4) mov #2, 0(r4)
mov #4, 0(r4) mov #4, 0(r4)
mov #8, 0(r4) mov #8, 0(r4)
mov r6, 4141(r4) mov r6, 0x4141(r4)
mov @r6, 4141(r4) mov @r6, 0x4141(r4)
mov @r6+, 4141(r4) mov @r6+, 0x4141(r4)
mov 0(r6), 4141(r4) mov 0(r6), 0x4141(r4)
mov 4141(r6), 4141(r4) mov 0x4141(r6), 0x4141(r4)
mov #-1, 4141(r4) mov #-1, 0x4141(r4)
mov #ffff, 4141(r4) mov #0xffff, 0x4141(r4)
mov #0, 4141(r4) mov #0, 0x4141(r4)
mov #1, 4141(r4) mov #1, 0x4141(r4)
mov #2, 4141(r4) mov #2, 0x4141(r4)
mov #4, 4141(r4) mov #4, 0x4141(r4)
mov #8, 4141(r4) mov #8, 0x4141(r4)
mov r6, #0 mov r6, #0
mov @r6, #0 mov @r6, #0
mov @r6+, #0 mov @r6+, #0
mov 0(r6), #0 mov 0(r6), #0
mov 4141(r6), #0 mov 0x4141(r6), #0
mov #-1, #0 mov #-1, #0
mov #ffff, #0 mov #0xffff, #0
mov #0, #0 mov #0, #0
mov #1, #0 mov #1, #0
mov #2, #0 mov #2, #0
@@ -200,9 +201,9 @@ mov r6, #1
mov @r6, #1 mov @r6, #1
mov @r6+, #1 mov @r6+, #1
mov 0(r6), #1 mov 0(r6), #1
mov 4141(r6), #1 mov 0x4141(r6), #1
mov #-1, #1 mov #-1, #1
mov #ffff, #1 mov #0xffff, #1
mov #0, #1 mov #0, #1
mov #1, #1 mov #1, #1
mov #2, #1 mov #2, #1
@@ -211,14 +212,14 @@ mov #8, #1
; Instruction exercise ; Instruction exercise
; Jumps ; Jumps
jne 10 jne 0x10
jeq 10 jeq 0x10
jlo 10 jlo 0x10
jhs 10 jhs 0x10
jn 10 jn 0x10
jge 10 jge 0x10
jl 10 jl 0x10
jmp 10 jmp 0x10
; Two-ops ; Two-ops
mov r14, r15 mov r14, r15
@@ -232,7 +233,7 @@ bit r14, r15
bic r14, r15 bic r14, r15
bis r14, r15 bis r14, r15
xor r14, r15 xor r14, r15
and r14, 10(r15) and r14, 0x10(r15)
; One-ops ; One-ops
rrc r15 rrc r15
@@ -241,13 +242,14 @@ rra r15
sxt r15 sxt r15
push r15 push r15
call r15 call r15
reti r15 ; reti is special
reti
; Jump aliases ; Jump aliases
jnc 10 jnc 0x10
jnz 10 jnz 0x10
jc 10 jc 0x10
jz 10 jz 0x10
; "emulated" no-op instructions ; "emulated" no-op instructions
ret ret

View File

@@ -1,197 +1,425 @@
// © 2023 John Breaux // © 2023-2024 John Breaux
//! Traverses an AST, assembling instructions. //See LICENSE.md for license
//! //! Assembles a binary using the given [AST](crate::parser::ast)
//! [Assembler] carries *some* state
use crate::parser::preamble::*; use error::{AResult, ErrorKind::*};
use error::AssemblyError;
use std::collections::HashMap; use std::collections::HashMap;
use std::path::Path;
pub mod error; use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, span::Span};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] use self::error::{Error, ErrorKind};
pub enum IdentType {
Word,
Jump,
}
/// Takes in an AST's [Root], and outputs a sequence of bytes /// Assembles a binary using the given [Assemble]-able item
#[derive(Clone, Debug, Default, PartialEq, Eq)] #[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Assembler { pub struct Assembler<'t> {
out: Vec<u16>, /// The assembled output
/// A map from Labels' [Identifier]s to their location in the binary output: Vec<u16>,
labels: HashMap<Identifier, usize>, /// Table of labels, for backpatching
/// A list of all referenced [Identifier]s in the binary, and their locations labels: HashMap<&'t str, usize>,
identifiers: Vec<(usize, Identifier, IdentType)>, /// Backpatch table for jump instructions
jump_queue: Vec<(usize, &'t str)>,
/// Backpatch table for immediate values
expr_queue: Vec<(usize, Expr<'t>)>,
/// Base address from .org directives
org_base: usize,
/// Last seen index in input
loc: Span<usize>,
} }
impl Assembler { impl<'t> Assembler<'t> {
pub fn assemble(r: &Root) -> Result<Vec<u16>, AssemblyError> { pub fn new() -> Self {
let mut out = Self::default(); Default::default()
out.visit_root(r)?; }
Ok(out.out) pub fn assemble<T: Assemble<'t>>(&mut self, t: &T) -> AResult<&mut Self> {
t.assemble_in(self)
}
/// Gets the address of a label
pub fn addrof(&self, name: &str) -> Option<u16> {
self.labels.get(name).map(|v| *v as u16)
}
/// Gets the value at a label
pub fn valueof(&self, name: &str) -> Option<u16> {
self.output.get(self.addrof(name)? as usize).copied()
}
fn push(&mut self, word: u16) {
self.output.push(word)
}
fn error(&self, kind: ErrorKind) -> Error {
Error { span: self.loc, kind }
}
/// Backpatches everything, and yoinks the output buffer.
pub fn out(&mut self) -> AResult<Vec<u16>> {
// Resolve jumps
for (idx, key) in &self.jump_queue {
// eprintln!("Patching jump at {idx} with key {key}");
match self.labels.get(key).map(|addr| addr.wrapping_sub(*idx as _) as i16) {
None => Err(self.error(UndefinedLabel(key.to_string())))?,
Some(value @ -0x3ff..=0x3fc) => self.output[*idx] |= (value - 1) as u16 & 0x3ff,
Some(value) => Err(self.error(LongJump(value)))?,
}
}
// Resolve immediates through late expression evaluation.
for (idx, expr) in &self.expr_queue {
// eprintln!("Patching immediate at {idx} with expression {expr:?}");
self.output[*idx] = self.eval(expr)?;
}
let out = std::mem::take(&mut self.output);
*self = Default::default();
Ok(out)
} }
pub fn load(&mut self, r: &Root) -> Result<(), AssemblyError> { self.visit_root(r) }
pub fn out(self) -> Vec<u16> { self.out }
fn last_mut(&mut self) -> Result<&mut u16, AssemblyError> { self.out.last_mut().ok_or(AssemblyError::EmptyBuffer) } pub fn add_label(&mut self, label: &'t str) -> AResult<()> {
fn push_default(&mut self) -> usize { if *self.labels.entry(label).or_insert(self.output.len()) != self.output.len() {
self.out.push(Default::default()); Err(self.error(RedefinedLabel(label.into())))?
self.out.len() - 1 }
Ok(())
}
/// Appends an expr as an extword, deferring its calculation for later
pub fn defer_expr(&mut self, e: Expr<'t>) {
self.expr_queue.push((self.output.len(), e));
self.push(0);
}
/// Defers resolution of a jump label until output time
/// The jump label will be later resolved to the NEXT word.
pub fn defer_jump(&mut self, label: &'t str) {
self.jump_queue.push((self.output.len(), label))
} }
} }
impl Assembler { pub trait Assemble<'t> {
/// Visits the [Root] node of a parse tree fn assemble(&self) -> AResult<Vec<u16>> {
fn visit_root(&mut self, r: &Root) -> Result<(), AssemblyError> { self.assemble_in(&mut Default::default())?.out()
// Visit the entire tree
for (num, line) in r.lines() {
self.visit_line(line).map_err(|e| e.ctx(r.file().unwrap_or(Path::new("stdin")), *num))?;
} }
// Link identifiers fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>>;
for (idx, id, id_type) in self.identifiers.iter() { }
let Some(&num) = self.labels.get(id) else { return Err(AssemblyError::UnresolvedIdentifier(id.clone())) };
let offset = (num as isize - *idx as isize) * 2; impl<'t> Assemble<'t> for Statements<'t> {
*self.out.get_mut(*idx).expect("idx should be a valid index into out") |= match id_type { fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
IdentType::Word => offset as u16, for stmt in &self.stmts {
IdentType::Jump => JumpTarget::squish(offset)?, stmt.assemble_in(a)?;
}
Ok(a)
}
}
impl<'t> Assemble<'t> for Statement<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
Statement::Label(label) => a.add_label(label).map(|_| a),
Statement::Insn(i) => i.assemble_in(a),
Statement::Directive(d) => d.assemble_in(a),
Statement::Comment(_) => Ok(a),
}
}
}
impl<'t> Assemble<'t> for Directive<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
Directive::Define(_) => {}
Directive::Org(base) => a.org_base = a.eval(base)? as usize,
Directive::Word(expr) => a.defer_expr(*expr.clone()),
Directive::Words(exprs) => {
for expr in exprs {
a.defer_expr(expr.clone())
}
}
Directive::String(str) => {
str.assemble_in(a)?;
}
}
Ok(a)
}
}
impl<'t> Assemble<'t> for &'t str {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
for chunk in self.as_bytes().chunks(2) {
match chunk.len() {
0 => a.push(0),
1 => {
a.push(chunk[0] as u16);
return Ok(a);
}
2 => a.push((chunk[1] as u16) << 8 | chunk[0] as u16),
n => unreachable!("expected chunks of length 2, got length {n}"),
}
}
a.push(0);
Ok(a)
}
}
impl<'t> Assemble<'t> for Instruction<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { span, kind } = self;
a.loc = *span;
kind.assemble_in(a)
}
}
impl<'t> Assemble<'t> for InstructionKind<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
InstructionKind::NoEm(v) => v.assemble_in(a),
InstructionKind::OneEm(v) => v.assemble_in(a),
InstructionKind::OneArg(v) => v.assemble_in(a),
InstructionKind::TwoArg(v) => v.assemble_in(a),
InstructionKind::Jump(v) => v.assemble_in(a),
InstructionKind::Reti(v) => v.assemble_in(a),
InstructionKind::Br(v) => v.assemble_in(a),
}
}
}
impl<'t> Assemble<'t> for NoEm {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
impl<'t> Assemble<'t> for OneEm<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
impl<'t> Assemble<'t> for OneArg<'t> {
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// `[ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src } = self;
let (src_reg, src_mode, src_ext) = source(src);
a.push(
0b000100 << 10 | one_arg(*opcode) << 7 | (*width as u16) << 6 | src_mode << 4 | src_reg,
);
if let Some(expr) = src_ext {
a.defer_expr(expr)
}
Ok(a)
}
}
impl<'t> Assemble<'t> for TwoArg<'t> {
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// `[ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src, dst } = self;
let (src_reg, src_mode, src_ext) = source(src);
let (dst_reg, dst_mode, dst_ext) = destination(dst);
a.push(
two_arg(*opcode) << 12
| src_reg << 8
| dst_mode << 7
| (*width as u16) << 6
| src_mode << 4
| dst_reg,
);
if let Some(expr) = src_ext {
a.defer_expr(expr)
}
if let Some(expr) = dst_ext {
a.defer_expr(expr)
}
Ok(a)
}
}
impl<'t> Assemble<'t> for Jump<'t> {
/// `[ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]`
/// `[ 0 0 1 [cond:3] +- [word_offset:10 ] ]`
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, dst } = self;
let word = 1 << 13
| jump(*opcode) << 10
| match *dst {
JumpDst::Rel(value) if value & 1 == 1 => return Err(a.error(OddJump(value))),
JumpDst::Rel(value) if !(-0x3fe..=0x400).contains(&value) => {
return Err(a.error(LongJump(value)))
}
JumpDst::Rel(value) => (value - 1) as u16 >> 1 & 0x3ff,
JumpDst::Label(label) => {
a.defer_jump(label);
0
}
} & 0x3ff;
a.push(word);
Ok(a)
}
}
impl<'t> Assemble<'t> for Reti {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
a.output.push(0b0001_0011_0000_0000);
Ok(a)
}
}
impl<'t> Assemble<'t> for Br<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
pub fn one_arg(opcode: token::OneArg) -> u16 {
opcode as u16
}
pub fn two_arg(opcode: token::TwoArg) -> u16 {
opcode as u16 + 4
}
pub fn jump(opcode: token::Jump) -> u16 {
use token::Jump;
match opcode {
Jump::Jne | Jump::Jnz => 0,
Jump::Jeq | Jump::Jz => 1,
Jump::Jnc | Jump::Jlo => 2,
Jump::Jc | Jump::Jhs => 3,
Jump::Jn => 4,
Jump::Jge => 5,
Jump::Jl => 6,
Jump::Jmp => 7,
}
}
/// Returns a tuple of (Reg, AddrMode, extword)
pub fn source<'t>(src: &Src<'t>) -> (u16, u16, Option<Expr<'t>>) {
use SrcSpecial::*;
match src {
Src::Special(Four) => (2, 2, None),
Src::Special(Eight) => (2, 3, None),
Src::Special(Zero) => (3, 0, None),
Src::Special(One) => (3, 1, None),
Src::Special(Two) => (3, 2, None),
Src::Special(NegOne) => (3, 3, None),
Src::Immediate(e) => (0, 3, Some(*e.clone())),
Src::Absolute(e) => (2, 1, Some(*e.clone())),
Src::Direct(r) => (*r as u16, 0, None),
Src::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
Src::Indirect(r) => (*r as u16, 2, None),
Src::PostInc(r) => (*r as u16, 3, None),
Src::BareExpr(e) => (0, 3, Some(*e.clone())),
}
}
/// Returns a tuple of (Reg, AddrMode, Extword)
pub fn destination<'t>(dst: &Dst<'t>) -> (u16, u16, Option<Expr<'t>>) {
use DstSpecial::*;
match dst {
Dst::Special(Zero) => (3, 0, None),
Dst::Special(One) => (3, 1, None),
Dst::Absolute(e) => (2, 1, Some(*e.clone())),
Dst::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
Dst::Direct(r) => (*r as u16, 0, None),
}
}
impl<'t> Assembler<'t> {
/// Evaluates an [Expr] using labels and constants defined in the current program
fn eval(&self, expr: &Expr) -> AResult<u16> {
match expr {
Expr::Binary(head, tails) => {
let mut head = self.eval(head)?;
for (op, tail) in tails {
let tail = self.eval(tail)?;
head = match op {
BinOp::Mul => head.wrapping_mul(tail),
BinOp::Div => head.wrapping_div(tail),
BinOp::Rem => head.wrapping_rem(tail),
BinOp::Add => head.wrapping_add(tail),
BinOp::Sub => head.wrapping_sub(tail),
BinOp::Lsh => head.wrapping_shl(tail as u32),
BinOp::Rsh => head.wrapping_shr(tail as u32),
BinOp::And => head & tail,
BinOp::Xor => head ^ tail,
BinOp::Or => head | tail,
}; };
} }
Ok(()) Ok(head)
} }
Expr::Unary(ops, tail) => {
/// visit a [Line] let mut tail = self.eval(tail)?;
fn visit_line(&mut self, line: &Line) -> Result<(), AssemblyError> { for op in ops {
match line { tail = match op {
Line::Insn(insn) => self.visit_instruction(insn), UnOp::Not => !tail,
Line::Label(label) => self.visit_label(label), UnOp::Neg => 0u16.wrapping_sub(tail),
Line::Directive(d) => self.visit_directive(d), UnOp::Deref => *self
_ => Ok(()), .output
.get(tail.wrapping_sub(self.org_base as u16) as usize >> 1)
.ok_or_else(|| self.error(OobRead(tail)))?,
} }
} }
Ok(tail)
/// Visits a [Directive] }
fn visit_directive(&mut self, node: &Directive) -> Result<(), AssemblyError> { Expr::Group(e) => self.eval(e),
match node { Expr::Number(n) => Ok(*n),
Directive::Org(_) => todo!("{node}"), Expr::Ident(name) => {
Directive::Define(..) => (), self.valueof(name).ok_or_else(|| self.error(UndefinedLabel(name.to_string())))
Directive::Include(r) => self.visit_root(r)?, }
Directive::Byte(word) | Directive::Word(word) => self.out.push((*word).into()), Expr::AddrOf(name) => self
Directive::Bytes(words) | Directive::Words(words) => { .addrof(name)
for word in words { .map(|p| (p << 1).wrapping_add(self.org_base as u16))
self.out.push((*word).into()); .ok_or_else(|| self.error(UndefinedLabel(name.to_string()))),
}
}
}
pub mod error {
use std::fmt::Display;
use crate::span::Span;
pub type AResult<T> = Result<T, Error>;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Error {
pub span: Span<usize>,
pub kind: ErrorKind,
}
impl std::error::Error for Error {}
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum ErrorKind {
#[default]
Todo,
/// A label was used, but not defined
UndefinedLabel(String),
RedefinedLabel(String),
OobRead(u16),
OddJump(i16),
LongJump(i16),
/// A plethora of [Error]s
Errors(Vec<Error>),
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { kind, span } = self;
write!(f, "[{span}]: ")?;
write!(f, "Error: {kind}")
}
}
impl Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorKind::Todo => write!(f, "Not yet implemented"),
ErrorKind::UndefinedLabel(label) => write!(f, "Label '{label}' not defined"),
ErrorKind::RedefinedLabel(label) => write!(f, "Label '{label}' already defined"),
ErrorKind::OobRead(addr) => {
write!(f, "Out of bounds read in constant expression: {addr}")
}
ErrorKind::OddJump(to) => write!(f, "Cannot jump to odd location: {to}"),
ErrorKind::LongJump(to) => {
write!(f, "Jump target ({to}) outside of range -0x400..=0x3fe")
}
ErrorKind::Errors(errors) => {
writeln!(f, "Could not complete assembly:")?;
for error in errors {
writeln!(f, "{error}")?;
}
Ok(())
} }
} }
Directive::String(s) => self.visit_string(s)?,
Directive::Strings(strs) => {
for s in strs {
self.visit_string(s)?;
} }
} }
};
Ok(())
}
/// Visits a [Label]
fn visit_label(&mut self, node: &Label) -> Result<(), AssemblyError> {
// Register the label
match self.labels.insert(node.0.to_owned(), self.out.len()) {
Some(_) => Err(AssemblyError::RedefinedLabel(node.0.to_owned())),
_ => Ok(()),
}
}
/// Visits an [Instruction]
fn visit_instruction(&mut self, insn: &Instruction) -> Result<(), AssemblyError> {
self.push_default();
self.visit_opcode(insn.opcode())?;
self.visit_encoding(insn.encoding())?;
Ok(())
}
/// Visits an [Opcode]
fn visit_opcode(&mut self, node: &Opcode) -> Result<(), AssemblyError> {
*self.last_mut()? |= *node as u16;
Ok(())
}
/// Visits an [Encoding]
fn visit_encoding(&mut self, node: &Encoding) -> Result<(), AssemblyError> {
*self.last_mut()? |= node.word();
match node {
Encoding::Single { dst, .. } => {
self.visit_primary_operand(dst)?;
}
Encoding::Jump { target } => {
self.visit_jump_target(target)?;
}
Encoding::Double { src, dst, .. } => {
self.visit_primary_operand(src)?;
self.visit_secondary_operand(dst)?;
}
}
Ok(())
}
/// Visits a [JumpTarget]
fn visit_jump_target(&mut self, node: &JumpTarget) -> Result<(), AssemblyError> {
match node {
JumpTarget::Number(num) => self.visit_number(num),
JumpTarget::Identifier(id) => {
self.visit_identifier(id, self.out.len() - 1, IdentType::Jump)?;
Ok(())
}
}
}
/// Visits a [SecondaryOperand]
fn visit_secondary_operand(&mut self, node: &SecondaryOperand) -> Result<(), AssemblyError> {
use SecondaryOperand as O;
if let O::Indexed(_, num) | O::Absolute(num) = node {
self.push_default();
self.visit_number(num)?;
}
Ok(())
}
/// Visits a [PrimaryOperand]
fn visit_primary_operand(&mut self, node: &PrimaryOperand) -> Result<(), AssemblyError> {
use PrimaryOperand as O;
match node {
O::Indexed(_, num) | O::Absolute(num) | O::Immediate(num) => {
self.push_default();
self.visit_number(num)?;
}
O::Relative(id) => {
let addr = self.push_default();
self.visit_identifier(id, addr, IdentType::Word)?;
}
_ => (),
}
Ok(())
}
/// Visits a number and writes it into the last index
fn visit_number(&mut self, node: &Number) -> Result<(), AssemblyError> {
*self.last_mut()? |= u16::from(*node);
Ok(())
}
/// Visits a number and appends it to the output buffer
fn visit_string(&mut self, node: &str) -> Result<(), AssemblyError> {
for (idx, byte) in node.bytes().chain([0u8].into_iter()).enumerate() {
if idx % 2 == 0 {
self.push_default();
}
*self.last_mut()? |= (byte as u16) << (8 * (idx % 2));
}
Ok(())
}
/// Visits an [Identifier], and registers it to the identifier list
fn visit_identifier(&mut self, node: &Identifier, addr: usize, ty: IdentType) -> Result<(), AssemblyError> {
self.identifiers.push((addr, node.clone(), ty));
Ok(())
}
} }

View File

@@ -1,56 +0,0 @@
// © 2023 John Breauxs
use crate::parser::{error::ParseError, preamble::*};
use std::{
fmt::Display,
path::{Path, PathBuf},
};
#[derive(Debug)]
pub enum AssemblyError {
UnresolvedIdentifier(Identifier),
RedefinedLabel(Identifier),
JumpedTooFar(Identifier, isize),
ParseError(ParseError),
// TODO: This, better'
Context(Box<AssemblyError>, PathBuf, usize),
EmptyBuffer,
}
impl AssemblyError {
pub(super) fn ctx<P: AsRef<Path> + ?Sized>(self, file: &P, line: usize) -> Self {
Self::Context(self.into(), file.as_ref().into(), line)
}
}
impl From<ParseError> for AssemblyError {
fn from(value: ParseError) -> Self { Self::ParseError(value) }
}
impl Display for AssemblyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnresolvedIdentifier(id) => {
write!(f, "Identifier {id} is undefined, but referenced anyway.")
}
Self::RedefinedLabel(id) => {
write!(f, "Redefined label '{id}'.")
}
Self::JumpedTooFar(id, num) => {
write!(f, "Label '{id}' is too far away. ({num} is outside range -0x400..=0x3fe)")
}
Self::ParseError(e) => Display::fmt(e, f),
Self::Context(e, file, line) => write!(f, "{}:{line}:\n\t{e}", file.display()),
Self::EmptyBuffer => Display::fmt("Tried to get last element of output buffer, but buffer was empty", f),
}
}
}
impl std::error::Error for AssemblyError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::ParseError(e) => Some(e),
Self::Context(e, ..) => Some(e),
_ => None,
}
}
}

View File

@@ -1,49 +0,0 @@
// © 2023 John Breauxs
//! Common error type for [msp430-asm](crate) errors
use super::*;
use std::fmt::Display;
#[derive(Debug)]
pub enum Error {
/// Produced by [lexer]
LexError(lexer::error::LexError),
/// Produced by [parser]
ParseError(parser::error::ParseError),
/// Produced by [assembler]
AssemblyError(assembler::error::AssemblyError),
}
impl Error {}
impl From<lexer::error::LexError> for Error {
fn from(value: lexer::error::LexError) -> Self { Self::LexError(value) }
}
impl From<parser::error::ParseError> for Error {
fn from(value: parser::error::ParseError) -> Self { Self::ParseError(value) }
}
impl From<assembler::error::AssemblyError> for Error {
fn from(value: assembler::error::AssemblyError) -> Self { Self::AssemblyError(value) }
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::LexError(e) => Display::fmt(e, f),
Error::ParseError(e) => Display::fmt(e, f),
Error::AssemblyError(e) => Display::fmt(e, f),
}
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::LexError(e) => Some(e),
Error::ParseError(e) => Some(e),
Error::AssemblyError(e) => Some(e),
}
}
}

View File

@@ -1,22 +0,0 @@
// © 2023 John Breaux
//! Convenience functions and traits for dealing with hashable data
pub type Hash = u64;
/// Calculates a hash using Rust hashmap's default hasher.
pub fn hash<T: std::hash::Hash>(hashable: T) -> Hash {
use std::hash::Hasher;
let mut hasher = std::collections::hash_map::DefaultHasher::new();
hashable.hash(&mut hasher);
hasher.finish()
}
pub trait FromHash: From<Hash> {
/// Hashes anything that implements [type@Hash] using the
/// [DefaultHasher](std::collections::hash_map::DefaultHasher)
fn hash<T: std::hash::Hash>(hashable: T) -> Hash { hash(hashable) }
fn from_hash<T: std::hash::Hash>(hashable: T) -> Self
where Self: Sized {
Self::from(Self::hash(hashable))
}
}
impl<T: From<Hash>> FromHash for T {}

View File

@@ -1,69 +1,269 @@
// © 2023 John Breaux // © 2023-2024 John Breaux
//! Iterates over [`&str`](str), producing [`Token`s](Token) //See LICENSE.md for license
//! The [Lexer] turns a [sequence of characters](str) into a stream of
//! [lexically-tagged tokens](token)
pub mod context;
pub mod error;
pub mod ignore;
pub mod preprocessed;
pub mod token; pub mod token;
pub mod token_stream;
use context::Context; use self::token::{Special, TokenKind, *};
use error::LexError; use crate::span::Span;
use token::{Token, Type}; use std::{
use token_stream::TokenStream; iter::Peekable,
str::{CharIndices, FromStr},
};
use unicode_ident::*;
/// Iterates over &[str], producing [Token]s const DEFAULT_BASE: u32 = 10;
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] /// Turns a [sequence of characters](str) into a stream of [lexically identified tokens](token).
pub struct Tokenizer<'t> { ///
/// # Examples
/// ```rust
/// # use libmsp430::lexer::{Lexer, token::*};
/// let text = "mov r14, r15";
/// let mut lexer = Lexer::new(text);
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::TwoArg(TwoArg::Mov));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R14));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Comma);
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R15));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Eof);
/// ```
#[derive(Clone, Debug)]
pub struct Lexer<'t> {
/// Keeps track of the byte offset into the string
iter: Peekable<CharIndices<'t>>,
text: &'t str, text: &'t str,
idx: usize, start: usize,
context: Context, index: usize,
} }
impl<'t> Tokenizer<'t> { impl<'t> Lexer<'t> {
/// Produces a new [Tokenizer] from a [str]ing slice /// Creates a new [Lexer] over some [text](str)
pub fn new<T>(text: &'t T) -> Self pub fn new(text: &'t str) -> Self {
where T: AsRef<str> + ?Sized { Self { iter: text.char_indices().peekable(), text, start: 0, index: 0 }
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() } }
/// Gets the current byte-position
pub fn location(&self) -> usize {
self.start
}
/// Internal: Emits a token with the provided [TokenKind], providing its extents.
fn emit(&mut self, kind: TokenKind) -> Option<Token<'t>> {
let out =
Some(Token::new(self.next_lexeme(), kind, Span { start: self.start, end: self.index }));
self.start = self.index;
out
}
fn next_lexeme(&self) -> &'t str {
&self.text[self.start..self.index]
}
fn repeat(&mut self, f: impl Fn(char) -> bool) -> &mut Self {
while let Some(&c) = self.peek() {
if !f(c) {
break;
}
self.next();
}
self
}
fn space(&mut self) -> Option<&mut Self> {
while self.peek()?.is_whitespace() && *self.peek()? != '\n' {
self.next();
}
self.start = self.index;
Some(self)
}
/// Consumes a [char] without checking, for ergonomic chaining
fn then(&mut self) -> &mut Self {
self.next();
self
}
fn peek(&mut self) -> Option<&char> {
self.iter.peek().map(|(_, c)| c)
}
fn next(&mut self) -> Option<char> {
let (index, c) = self.iter.next()?;
self.index = index + c.len_utf8();
Some(c)
} }
fn count(&mut self, token: &Token) { /// Scans for the next [Token] in the stream
// update the context pub fn scan(&mut self) -> Option<Token<'t>> {
self.context.count(token); if self.space().is_none() {
// advance the index return self.emit(TokenKind::Eof);
self.idx += token.len(); }
let Some(c) = self.peek() else {
return self.emit(TokenKind::Eof);
};
match c {
'\n' => self.then().emit(TokenKind::Newline),
'!' => self.then().emit(TokenKind::Bang),
'#' => self.then().emit(TokenKind::Hash),
'$' => self.then().emit(TokenKind::Dollar),
'%' => self.then().emit(TokenKind::Percent),
'&' => self.then().emit(TokenKind::Amp),
'\'' => self.then().char(),
'"' => self.then().string(),
'(' => self.then().emit(TokenKind::OpenParen),
')' => self.then().emit(TokenKind::CloseParen),
'*' => self.then().emit(TokenKind::Star),
'+' => self.then().emit(TokenKind::Plus),
',' => self.then().emit(TokenKind::Comma),
'-' => self.then().emit(TokenKind::Minus),
'.' => self.then().directive_or_bw(),
'/' => self.then().comment_or_slash(),
'0' => self.then().number_with_base(),
':' => self.then().emit(TokenKind::Colon),
';' => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
'<' => self.then().less(),
'>' => self.then().greater(),
'@' => self.then().emit(TokenKind::At),
'[' => self.then().emit(TokenKind::OpenBrace),
']' => self.then().emit(TokenKind::CloseBrace),
'^' => self.then().emit(TokenKind::Caret),
'_' => self.then().identifier(),
'{' => self.then().emit(TokenKind::OpenCurly),
'|' => self.then().emit(TokenKind::Bar),
'}' => self.then().emit(TokenKind::CloseCurly),
c if c.is_numeric() => self.number::<DEFAULT_BASE>(),
&c if is_xid_start(c) => self.identifier(),
c => todo!("Unrecognized character: {c}"),
}
}
fn number_with_base(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('x') => self.then().number::<16>(),
Some('d') => self.then().number::<10>(),
Some('o') => self.then().number::<8>(),
Some('b') => self.then().number::<2>(),
Some(c) if c.is_ascii_digit() => self.number::<DEFAULT_BASE>(),
_ => self.emit(TokenKind::Number(0, 10)),
}
}
fn number<const B: u32>(&mut self) -> Option<Token<'t>> {
let mut num = self.digit::<B>()?;
while let Some(digit) = self.digit::<B>() {
num = num * B + digit;
}
if num > u16::MAX as u32 {
None
} else {
self.emit(TokenKind::Number(num as u16, B as u8))
}
}
fn digit<const B: u32>(&mut self) -> Option<u32> {
let digit = self.peek()?.to_digit(B)?;
self.then();
Some(digit)
}
fn comment_or_slash(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('/') => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
_ => self.emit(TokenKind::Slash),
}
}
fn less(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('<') => self.then().emit(TokenKind::Lsh),
_ => todo!("less"),
}
}
fn greater(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('>') => self.then().emit(TokenKind::Rsh),
_ => todo!("greater"),
}
}
fn identifier(&mut self) -> Option<Token<'t>> {
while let Some(c) = self.then().peek() {
if !is_xid_continue(*c) {
break;
}
}
let lexeme = self.next_lexeme();
if let Ok(op) = Reg::from_str(lexeme) {
self.emit(TokenKind::Reg(op))
} else if let Ok(op) = NoEm::from_str(lexeme) {
self.emit(TokenKind::NoEm(op))
} else if let Ok(op) = OneEm::from_str(lexeme) {
self.emit(TokenKind::OneEm(op))
} else if let Ok(op) = Special::from_str(lexeme) {
self.emit(TokenKind::Special(op))
} else if let Ok(op) = OneArg::from_str(lexeme) {
self.emit(TokenKind::OneArg(op))
} else if let Ok(op) = TwoArg::from_str(lexeme) {
self.emit(TokenKind::TwoArg(op))
} else if let Ok(op) = Jump::from_str(lexeme) {
self.emit(TokenKind::Jump(op))
} else {
self.emit(TokenKind::Identifier)
}
}
fn directive_or_bw(&mut self) -> Option<Token<'t>> {
while let Some(c) = self.then().peek() {
if !is_xid_continue(*c) {
break;
}
}
match self.next_lexeme() {
".b" => self.emit(TokenKind::Byte),
".w" => self.emit(TokenKind::Word),
_ => self.emit(TokenKind::Directive),
}
}
/// Todo: Character unescaping in Lexer::string
fn string(&mut self) -> Option<Token<'t>> {
while '"' != self.next()? {}
self.emit(TokenKind::String)
}
fn char(&mut self) -> Option<Token<'t>> {
let out = self.unescape()?;
self.next().filter(|c| *c == '\'').and_then(|_| self.emit(TokenKind::Char(out)))
}
/// Unescape a single character
fn unescape(&mut self) -> Option<char> {
match self.next() {
Some('\\') => (),
other => return other,
}
Some(match self.next()? {
'a' => '\x07',
'b' => '\x08',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'x' => self.hex_escape()?,
'u' => self.unicode_escape()?,
'0' => '\0',
chr => chr,
})
}
/// unescape a single 2-digit hex escape
fn hex_escape(&mut self) -> Option<char> {
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
char::from_u32(out) //.ok_or(Error::bad_unicode(out, self.line(), self.col()))
}
/// unescape a single \u{} unicode escape
fn unicode_escape(&mut self) -> Option<char> {
let mut out = 0;
let Some('{') = self.peek() else {
return None; //Err(Error::invalid_escape('u', self.line(), self.col()));
};
self.then();
while let Some(c) = self.peek() {
match c {
'}' => {
self.then();
return char::from_u32(out); //.ok_or(Error::bad_unicode(out, self.line(), self.col()));
}
_ => out = (out << 4) + self.digit::<16>()?,
}
}
None //Err(Error::invalid_escape('u', self.line(), self.col()))
} }
} }
impl<'text> Iterator for Tokenizer<'text> { #[cfg(test)]
type Item = Token<'text>; mod tests;
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.text.len() {
return None;
}
let token = Token::from(&self.text[self.idx..]);
// Process [Type::Directive]s
// Count the token
self.count(&token);
Some(token)
}
}
impl<'text> TokenStream<'text> for Tokenizer<'text> {
fn context(&self) -> Context { self.context }
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
// registers are valid identifiers.
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
self.count(&token);
Ok(token)
}
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))
}
}

View File

@@ -1,38 +0,0 @@
// © 2023 John Breaux
//! A [Context] stores contextual information about the current tokenizer state
//!
//! This data is trivially copyable and can be provided in error messages using the
//! [Error::Contextual] specialization)
use super::*;
/// Stores contextual information about the current tokenizer state, useful for printing errors
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Context {
line: usize,
position: usize,
tokens: usize,
}
impl Context {
pub fn new() -> Self { Default::default() }
pub fn line(&self) -> usize { self.line }
pub fn tokens(&self) -> usize { self.tokens }
pub fn position(&self) -> usize { self.position }
pub(super) fn count(&mut self, t: &Token) {
match t.variant() {
Type::EndOfFile => return,
Type::Endl => {
self.line += 1;
self.position = 1;
}
_ => self.position += t.len(),
}
self.tokens += 1;
}
}
impl Default for Context {
fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } }
}
impl std::fmt::Display for Context {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) }
}

View File

@@ -1,68 +0,0 @@
// © 2023 John Breauxs
use super::{
context::Context,
token::{OwnedToken, *},
};
use std::fmt::Display;
#[derive(Debug)]
pub enum LexError {
/// Any other error, tagged with [Context]. Created by [`Error::context()`]
Contextual(Context, Box<Self>),
/// Produced by [Token] when the input is entirely unexpected.
UnexpectedSymbol(String),
/// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type]
UnexpectedToken { expected: Type, got: OwnedToken },
/// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the
/// expected [Types](Type)
AllExpectationsFailed { expected: Types, got: OwnedToken },
}
impl LexError {
pub fn context(self, c: Context) -> Self {
match self {
Self::Contextual(..) => self,
_ => Self::Contextual(c, Box::new(self)),
}
}
// Extracts the root of the error tree
pub fn bare(self) -> Self {
match self {
Self::Contextual(_, bare) => bare.bare(),
_ => self,
}
}
pub fn expected<E: AsRef<[Type]>, T: Into<OwnedToken>>(expected: E, got: T) -> Self {
match expected.as_ref().len() {
1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() },
_ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() },
}
}
pub fn mask_expectation(mut self, expected: Type) -> Self {
match self {
LexError::UnexpectedToken { got, .. } => self = LexError::UnexpectedToken { expected, got },
LexError::AllExpectationsFailed { got, .. } => self = LexError::UnexpectedToken { expected, got },
LexError::Contextual(context, err) => {
self = LexError::Contextual(context, Box::new(err.mask_expectation(expected)))
}
_ => (),
}
self
}
}
impl Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexError::Contextual(ctx, error) => write!(f, "{ctx}: {error}"),
LexError::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""),
LexError::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."),
LexError::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."),
}
}
}
impl std::error::Error for LexError {}

View File

@@ -1,55 +0,0 @@
// © 2023 John Breaux
//! Removes a single [kind](Type) of [`Token`] from a [`TokenStream`]
use super::*;
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Ignore<'t, T>
where T: TokenStream<'t>
{
ignore: Type,
inner: &'t mut T,
}
impl<'t, T> Ignore<'t, T>
where T: TokenStream<'t>
{
/// Creates a new [Ignore], which ignores the [ignore Type](Type)
pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } }
/// Gets a mutable reference to the inner [Iterator]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
}
impl<'t, T> Iterator for Ignore<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.inner.next()?;
// Space tokens are greedy, so the next token shouldn't be a Space
match next.variant() {
Type::Space => self.next(),
_ => Some(next),
}
}
}
impl<'t, T> TokenStream<'t> for Ignore<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
self.inner.allow(self.ignore);
self.inner.expect(expected)
}
fn peek(&mut self) -> Self::Item {
self.inner.allow(self.ignore);
self.inner.peek()
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
self.inner.allow(self.ignore);
self.inner.peek_expect(expected)
}
}

View File

@@ -1,174 +0,0 @@
// © 2023 John Breaux
//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band
//! ".define" rules
use super::*;
use std::collections::{HashMap, VecDeque};
// TODO: Clean this spaghetti mess up
/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define"
/// rules
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(PartialEq, Eq)]
pub struct Preprocessed<'t, T>
where T: TokenStream<'t>
{
sub_table: HashMap<Token<'t>, Vec<Token<'t>>>,
sub_types: Vec<Type>,
queue: VecDeque<Token<'t>>,
inner: &'t mut T,
}
impl<'t, T> Iterator for Preprocessed<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
match self.queue.pop_front() {
Some(token) => Some(token),
None => {
let next = self.inner.next()?;
if let Some(subs) = self.sub_table.get(&next) {
self.queue.extend(subs);
return self.next();
}
Some(next)
}
}
}
}
impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> {
/// Creates a new [Preprocessed] [TokenStream]
pub fn new(inner: &'t mut T) -> Self {
Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner }
}
/// Gets a mutable reference to the inner [TokenStream]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
/// Preserve the next token in the queue
fn enqueue(&mut self, token: Token<'t>) -> Token<'t> {
self.queue.push_back(token);
token
}
/// Process .define directives in the preprocessor
fn define(&mut self, token: Token<'t>) -> Result<(), LexError> {
if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) {
return Ok(());
}
// Tokenize the subdocument
self.allow(Type::Directive);
self.allow(Type::Space);
let Some(k) = self.inner.next() else { return Ok(()) };
if !self.sub_types.contains(&k.variant()) {
self.sub_types.push(k.variant());
};
self.allow(Type::Space);
let mut replacement = vec![];
loop {
match self.inner.peek().variant() {
Type::Endl | Type::EndOfFile => break,
Type::Comment | Type::Space => {
// ignore comments
self.inner.next();
}
_ => {
let next = self.inner.next().unwrap();
replacement.push(self.enqueue(next));
}
}
}
self.sub_table.insert(k, replacement);
Ok(())
}
/// Does the preprocessing step
fn preprocess(&mut self, token: Token<'t>) {
if let Some(subs) = self.sub_table.get(&token) {
self.queue.extend(subs);
self.inner.next();
}
}
}
impl<'t, T> TokenStream<'t> for Preprocessed<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
match self.queue.front() {
Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()),
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
None => {
// Only resolve defines when expecting, otherwise you'll run into issues.
if let Ok(next) = self.inner.expect(expected) {
self.define(next)?;
return Ok(next);
}
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
if let Some(subs) = self.sub_table.get(&next) {
self.inner.allow_any_of(&self.sub_types);
self.queue.extend(subs);
}
return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) };
}
Err(LexError::expected([expected], self.inner.peek()).context(self.context()))
}
}
}
fn peek(&mut self) -> Self::Item {
match self.queue.front() {
Some(token) => *token,
None => {
// Only allow substitution when the next token is unexpected
let old = self.inner.peek();
self.preprocess(old);
match self.queue.front() {
Some(&new) => new,
None => old,
}
}
}
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
match self.queue.front() {
Some(&token) if token.is_variant(expected) => Ok(token),
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
None => {
if let Ok(next) = self.inner.peek_expect(expected) {
return Ok(next);
}
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
self.preprocess(next);
return if self.queue.is_empty() {
self.inner.peek_expect(expected)
} else {
self.peek_expect(expected)
};
}
Err(LexError::expected([expected], self.inner.peek()))
}
}
}
}
impl<'t, T> std::fmt::Debug for Preprocessed<'t, T>
where T: TokenStream<'t>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Preprocessed")
.field("sub_table", &self.sub_table)
.field("sub_types", &self.sub_types)
.field("queue", &self.queue)
.field("context", &self.context())
.finish_non_exhaustive()
}
}

175
src/lexer/tests.rs Normal file
View File

@@ -0,0 +1,175 @@
use super::*;
macro_rules! lex {
(type ($($t:tt)*), $expected:expr) => {
let token = Lexer::new(stringify!($($t)*)).scan().expect(stringify!($($t:tt)* should yield a valid token));
assert_eq!(token.kind, $expected);
};
(str $t:literal, $expected:expr) => {
let token = Lexer::new($t).scan().expect(stringify!($t:tt should yield a valid token));
assert_eq!(token.kind, $expected);
};
({ $($t:tt)* }) => {
Lexer::new(stringify!($($t)*))
};
}
#[test]
fn ascii_char() {
lex!(type ('A'), TokenKind::Char('A')); // 'A' should be a valid char
lex!(type ('\x1b'), TokenKind::Char('\x1b')); // '\\x1b' should be a valid char
}
#[test]
fn unicode_escape_char() {
lex!(type ('\u{1f988}'), TokenKind::Char('🦈')); // '\\u{1f988}' should be a valid 🦈
}
#[test]
fn number_with_base() {
lex!(type (0), TokenKind::Number(0, 10)); // 0 should be a 16-bit base-10 number
lex!(type (42069), TokenKind::Number(42069, 10)); // 42069 should be a 16-bit base-10 number
lex!(type (0x420), TokenKind::Number(0x420, 16)); // 0x420 should be a 16-bit base-16 number
lex!(type (0d100), TokenKind::Number(100, 10)); // 0d100 should be a 16-bit base-10 number
lex!(type (0o100), TokenKind::Number(64, 8)); // 0o100 should be a 16-bit base-8 number
lex!(type (0b100), TokenKind::Number(4, 2)); // 0b100 should be a 16-bit base-8 number
}
#[test]
fn no_operand_emulated() {
lex!(type (nop), TokenKind::NoEm(NoEm::Nop)); // nop should be a valid NoEm
lex!(type (ret), TokenKind::NoEm(NoEm::Ret)); // ret should be a valid NoEm
lex!(type (clrc), TokenKind::NoEm(NoEm::Clrc)); // clrc should be a valid NoEm
lex!(type (clrz), TokenKind::NoEm(NoEm::Clrz)); // clrz should be a valid NoEm
lex!(type (clrn), TokenKind::NoEm(NoEm::Clrn)); // clrn should be a valid NoEm
lex!(type (setc), TokenKind::NoEm(NoEm::Setc)); // setc should be a valid NoEm
lex!(type (setz), TokenKind::NoEm(NoEm::Setz)); // setz should be a valid NoEm
lex!(type (setn), TokenKind::NoEm(NoEm::Setn)); // setn should be a valid NoEm
lex!(type (dint), TokenKind::NoEm(NoEm::Dint)); // dint should be a valid NoEm
lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm
}
#[test]
fn br() {
lex!(type (br), TokenKind::Special(Special::Br));
}
#[test]
fn one_operand_emulated() {
lex!(type (pop), TokenKind::OneEm(OneEm::Pop));
lex!(type (rla), TokenKind::OneEm(OneEm::Rla));
lex!(type (rlc), TokenKind::OneEm(OneEm::Rlc));
lex!(type (inv), TokenKind::OneEm(OneEm::Inv));
lex!(type (clr), TokenKind::OneEm(OneEm::Clr));
lex!(type (tst), TokenKind::OneEm(OneEm::Tst));
lex!(type (dec), TokenKind::OneEm(OneEm::Dec));
lex!(type (decd), TokenKind::OneEm(OneEm::Decd));
lex!(type (inc), TokenKind::OneEm(OneEm::Inc));
lex!(type (incd), TokenKind::OneEm(OneEm::Incd));
lex!(type (adc), TokenKind::OneEm(OneEm::Adc));
lex!(type (dadc), TokenKind::OneEm(OneEm::Dadc));
lex!(type (sbc), TokenKind::OneEm(OneEm::Sbc));
}
#[test]
fn one_operand() {
lex!(type (rrc), TokenKind::OneArg(OneArg::Rrc));
lex!(type (swpb), TokenKind::OneArg(OneArg::Swpb));
lex!(type (rra), TokenKind::OneArg(OneArg::Rra));
lex!(type (sxt), TokenKind::OneArg(OneArg::Sxt));
lex!(type (push), TokenKind::OneArg(OneArg::Push));
lex!(type (call), TokenKind::OneArg(OneArg::Call));
lex!(type (reti), TokenKind::OneArg(OneArg::Reti));
}
#[test]
fn two_operand() {
lex!(type (mov), TokenKind::TwoArg(TwoArg::Mov));
lex!(type (add), TokenKind::TwoArg(TwoArg::Add));
lex!(type (addc), TokenKind::TwoArg(TwoArg::Addc));
lex!(type (subc), TokenKind::TwoArg(TwoArg::Subc));
lex!(type (sub), TokenKind::TwoArg(TwoArg::Sub));
lex!(type (cmp), TokenKind::TwoArg(TwoArg::Cmp));
lex!(type (dadd), TokenKind::TwoArg(TwoArg::Dadd));
lex!(type (bit), TokenKind::TwoArg(TwoArg::Bit));
lex!(type (bic), TokenKind::TwoArg(TwoArg::Bic));
lex!(type (bis), TokenKind::TwoArg(TwoArg::Bis));
lex!(type (xor), TokenKind::TwoArg(TwoArg::Xor));
lex!(type (and), TokenKind::TwoArg(TwoArg::And));
}
#[test]
fn jump() {
lex!(type (jne), TokenKind::Jump(Jump::Jne));
lex!(type (jnz), TokenKind::Jump(Jump::Jnz));
lex!(type (jeq), TokenKind::Jump(Jump::Jeq));
lex!(type (jz), TokenKind::Jump(Jump::Jz));
lex!(type (jnc), TokenKind::Jump(Jump::Jnc));
lex!(type (jlo), TokenKind::Jump(Jump::Jlo));
lex!(type (jc), TokenKind::Jump(Jump::Jc));
lex!(type (jhs), TokenKind::Jump(Jump::Jhs));
lex!(type (jn), TokenKind::Jump(Jump::Jn));
lex!(type (jge), TokenKind::Jump(Jump::Jge));
lex!(type (jl), TokenKind::Jump(Jump::Jl));
lex!(type (jmp), TokenKind::Jump(Jump::Jmp));
}
#[test]
fn registers() {
lex!(type (pc), TokenKind::Reg(Reg::PC));
lex!(type (sp), TokenKind::Reg(Reg::SP));
lex!(type (sr), TokenKind::Reg(Reg::SR));
lex!(type (cg), TokenKind::Reg(Reg::CG));
lex!(type (r0), TokenKind::Reg(Reg::PC));
lex!(type (r1), TokenKind::Reg(Reg::SP));
lex!(type (r2), TokenKind::Reg(Reg::SR));
lex!(type (r3), TokenKind::Reg(Reg::CG));
lex!(type (r4), TokenKind::Reg(Reg::R4));
lex!(type (r5), TokenKind::Reg(Reg::R5));
lex!(type (r6), TokenKind::Reg(Reg::R6));
lex!(type (r7), TokenKind::Reg(Reg::R7));
lex!(type (r8), TokenKind::Reg(Reg::R8));
lex!(type (r9), TokenKind::Reg(Reg::R9));
lex!(type (r10), TokenKind::Reg(Reg::R10));
lex!(type (r11), TokenKind::Reg(Reg::R11));
lex!(type (r12), TokenKind::Reg(Reg::R12));
lex!(type (r13), TokenKind::Reg(Reg::R13));
lex!(type (r14), TokenKind::Reg(Reg::R14));
lex!(type (r15), TokenKind::Reg(Reg::R15));
}
#[test]
fn delimiters() {
lex!(str "", TokenKind::Eof);
lex!(str "\n", TokenKind::Newline);
lex!(str "(", TokenKind::OpenParen);
lex!(str ")", TokenKind::CloseParen);
lex!(str "{", TokenKind::OpenCurly);
lex!(str "}", TokenKind::CloseCurly);
lex!(str "[", TokenKind::OpenBrace);
lex!(str "]", TokenKind::CloseBrace);
}
#[test]
fn comment() {
lex!(str "; this is a comment!\n\n", TokenKind::Comment);
}
#[test]
fn other() {
// lex!(type (), TokenKind::)
lex!(type (,), TokenKind::Comma);
lex!(type (:), TokenKind::Colon);
lex!(type (!), TokenKind::Bang);
lex!(type (@), TokenKind::At);
lex!(type (&), TokenKind::Amp);
lex!(type (|), TokenKind::Bar);
lex!(type (^), TokenKind::Caret);
lex!(type (*), TokenKind::Star);
lex!(type (#), TokenKind::Hash);
lex!(type ($), TokenKind::Dollar);
lex!(type (%), TokenKind::Percent);
lex!(type (+), TokenKind::Plus);
lex!(type (-), TokenKind::Minus);
lex!(type (/), TokenKind::Slash);
lex!(type (<<), TokenKind::Lsh);
lex!(type (>>), TokenKind::Rsh);
lex!(type (.directive), TokenKind::Directive);
lex!(type (identifier), TokenKind::Identifier);
lex!(type (.b), TokenKind::Byte);
lex!(type (.w), TokenKind::Word);
}
#[test]
fn ignores_leading_whitespace() {
lex!(str " \u{a0}\t\t\t\t\t\t\t-", TokenKind::Minus);
}

View File

@@ -1,335 +1,480 @@
// © 2023 John Breaux // © 2023-2024 John Breaux
//! A [Token] is a [semantically tagged](Type) sequence of characters. //See LICENSE.md for license
//! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span]
//! //!
//! Token, and the tokenizer, intend to copy as little as possible. //! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`)
use crate::span::Span;
use super::error::LexError; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
use regex::Regex; pub struct Token<'t> {
use std::{ pub lexeme: &'t str,
fmt::{Debug, Display}, pub kind: TokenKind,
sync::OnceLock, pub pos: Span<usize>,
}; }
impl<'t> Token<'t> {
/// Implements regex matching functions on [`Token`] for each [`Type`], pub fn new(lexeme: &'t str, kind: TokenKind, pos: Span<usize>) -> Self {
/// and implements [`From<&str>`] for [`Token`] Self { lexeme, kind, pos }
macro_rules! regex_impl {
(<$t:lifetime> $type:ty {$(
$(#[$meta:meta])*
pub fn $func:ident (text: &str) -> Option<Self> {
regex!($out:path = $re:literal)
} }
)*}) => { pub fn kind(&self) -> TokenKind {
impl<$t> $type { self.kind
/// Lexes a token only for the expected `variant` }
/// }
/// Warning: This bypasses precedence rules. Only use for specific patterns.
pub fn expect(text: &$t str, expected: Type) -> Result<Self, LexError> { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
match expected {$( pub enum TokenKind {
$out => Self::$func(text), Eof,
)*}.ok_or(LexError::UnexpectedToken { Newline, // \n
expected, OpenParen, // (
got: Self::from(text).into(), CloseParen, // )
OpenCurly, // {
CloseCurly, // }
OpenBrace, // [
CloseBrace, // ]
Comma, // ,
Colon, // :
Bang, // !
At, // @
Amp, // &
Bar, // |
Caret, // ^
Star, // *
Hash, // #
Dollar, // $
Percent, // %
Plus, // +
Minus, // -
Slash, // /
Lsh, // <<
Rsh, // >>
Comment, // (';' | '//') .* '\n' |
Directive, // '.' XID_CONTINUE*
Identifier, // XID_START XID_CONTINUE*
Number(u16, u8), // varies depending on base
Char(char), // '\'' ('\' Escape | .) '\''
String, // '"' .* '"'
Reg(Reg),
NoEm(NoEm),
OneEm(OneEm),
Special(Special),
OneArg(OneArg),
TwoArg(TwoArg),
Jump(Jump),
Byte, // .b
Word, // .w
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Reg {
PC,
SP,
SR,
CG,
R4,
R5,
R6,
R7,
R8,
R9,
R10,
R11,
R12,
R13,
R14,
R15,
}
/// Fake instructions of the form `opcode`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum NoEm {
Nop,
Ret,
Clrc,
Clrz,
Clrn,
Setc,
Setz,
Setn,
Dint,
Eint,
}
/// Fake instructions of the form `opcode dst`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum OneEm {
Pop,
Rla,
Rlc,
Inv,
Clr,
Tst,
Dec,
Decd,
Inc,
Incd,
Adc,
Dadc,
Sbc,
}
/// These opcodes have bespoke grammatical rules
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Special {
/// Br = "br" Src
Br,
}
/// Real instructions of the form `opcode src`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum OneArg {
Rrc,
Swpb,
Rra,
Sxt,
Push,
Call,
Reti,
}
/// Real instructions of the form `opcode src, dst`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum TwoArg {
Mov,
Add,
Addc,
Subc,
Sub,
Cmp,
Dadd,
Bit,
Bic,
Bis,
Xor,
And,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Jump {
Jne,
Jnz,
Jeq,
Jz,
Jnc,
Jlo,
Jc,
Jhs,
Jn,
Jge,
Jl,
Jmp,
}
mod convert {
//! Implementations of [FromStr] for [token](super) types.
use super::*;
use std::str::FromStr;
impl FromStr for Reg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"pc" => Reg::PC,
"sp" => Reg::SP,
"sr" => Reg::SR,
"cg" => Reg::CG,
"r0" => Reg::PC,
"r1" => Reg::SP,
"r2" => Reg::SR,
"r3" => Reg::CG,
"r4" => Reg::R4,
"r5" => Reg::R5,
"r6" => Reg::R6,
"r7" => Reg::R7,
"r8" => Reg::R8,
"r9" => Reg::R9,
"r10" => Reg::R10,
"r11" => Reg::R11,
"r12" => Reg::R12,
"r13" => Reg::R13,
"r14" => Reg::R14,
"r15" => Reg::R15,
_ => Err(())?,
})
}
}
impl FromStr for NoEm {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"nop" => NoEm::Nop,
"ret" => NoEm::Ret,
"clrc" => NoEm::Clrc,
"clrz" => NoEm::Clrz,
"clrn" => NoEm::Clrn,
"setc" => NoEm::Setc,
"setz" => NoEm::Setz,
"setn" => NoEm::Setn,
"dint" => NoEm::Dint,
"eint" => NoEm::Eint,
_ => Err(())?,
})
}
}
impl FromStr for OneEm {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"pop" => OneEm::Pop,
"rla" => OneEm::Rla,
"rlc" => OneEm::Rlc,
"inv" => OneEm::Inv,
"clr" => OneEm::Clr,
"tst" => OneEm::Tst,
"dec" => OneEm::Dec,
"decd" => OneEm::Decd,
"inc" => OneEm::Inc,
"incd" => OneEm::Incd,
"adc" => OneEm::Adc,
"dadc" => OneEm::Dadc,
"sbc" => OneEm::Sbc,
_ => Err(())?,
})
}
}
impl FromStr for Special {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"br" => Special::Br,
_ => Err(())?,
})
}
}
impl FromStr for OneArg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"rrc" => OneArg::Rrc,
"swpb" => OneArg::Swpb,
"rra" => OneArg::Rra,
"sxt" => OneArg::Sxt,
"push" => OneArg::Push,
"call" => OneArg::Call,
"reti" => OneArg::Reti,
_ => Err(())?,
})
}
}
impl FromStr for TwoArg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"mov" => TwoArg::Mov,
"add" => TwoArg::Add,
"addc" => TwoArg::Addc,
"subc" => TwoArg::Subc,
"sub" => TwoArg::Sub,
"cmp" => TwoArg::Cmp,
"dadd" => TwoArg::Dadd,
"bit" => TwoArg::Bit,
"bic" => TwoArg::Bic,
"bis" => TwoArg::Bis,
"xor" => TwoArg::Xor,
"and" => TwoArg::And,
_ => Err(())?,
})
}
}
impl FromStr for Jump {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"jne" => Jump::Jne,
"jnz" => Jump::Jnz,
"jeq" => Jump::Jeq,
"jz" => Jump::Jz,
"jnc" => Jump::Jnc,
"jlo" => Jump::Jlo,
"jc" => Jump::Jc,
"jhs" => Jump::Jhs,
"jn" => Jump::Jn,
"jge" => Jump::Jge,
"jl" => Jump::Jl,
"jmp" => Jump::Jmp,
_ => Err(())?,
}) })
} }
$(
$(#[$meta])*
/// Tries to read [`
#[doc = stringify!($out)]
/// `] from `text`
pub fn $func(text: &$t str) -> Option<Self> {
static RE: OnceLock<Regex> = OnceLock::new();
let lexeme = RE.get_or_init(|| Regex::new($re).unwrap())
.find(text)?.into();
Some(Self { variant: $out, lexeme })
})*
}
impl<$t> From<&$t str> for $type {
fn from (value: &$t str) -> Self {
$(
if let Some(token) = Self::$func(value) {
token
} else
)*
{todo!("Unexpected input: {value:#?} (Tokenization failure)")}
} }
} }
}; mod display {
} //! Implementations of [Display] for [token](super) types.
use super::*;
/// A [Token] is a [semantically tagged](Type) sequence of characters use std::fmt::Display;
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] impl<'t> Display for Token<'t> {
pub struct Token<'text> {
/// The type of this token
variant: Type,
/// The sub[str]ing corresponding to this token
lexeme: &'text str,
}
impl<'text> Token<'text> {
/// Returns the [Type] of this [Token]
pub fn variant(&self) -> Type { self.variant }
/// Returns the lexeme (originating string slice) of this token
pub fn lexeme(&self) -> &'text str { self.lexeme }
/// Parses this [Token] into another type
pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
where F: std::str::FromStr {
self.lexeme.parse()
}
/// Returns whether the Lexeme is the expected [Type]
pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected }
/// Returns the length of [Self::lexeme] in bytes.
pub fn len(&self) -> usize { self.lexeme.len() }
/// Returns `true` if [Self::lexeme] has a length of zero bytes.
pub fn is_empty(&self) -> bool { self.lexeme.is_empty() }
}
impl<'text> Debug for Token<'text> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list().entry(&self.variant).entry(&self.lexeme).finish() let Self { lexeme, kind, pos: _ } = self;
match kind {
TokenKind::Comment
| TokenKind::Directive
| TokenKind::Identifier
| TokenKind::String => {
write!(f, "{}", lexeme)
} }
} ty => ty.fmt(f),
impl<'text> Display for Token<'text> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.variant {
Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f),
v => write!(f, "{v} \"{}\"", self.lexeme),
} }
} }
}
/// A [token Type](Type) is a semantic tag for a sequence of characters
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Type {
/// contiguous whitespace, excluding newline
Space,
/// newline and contiguous whitespace
Endl,
/// A line-comment
Comment,
/// Jump label *definition*
Label,
/// Instructions
Insn,
/// Operand width is byte
ByteWidth,
/// Operand width is word
WordWidth,
/// Register mnemonic (i.e. `pc`, `r14`)
Register,
/// Marker for base-10
RadixMarkerDec,
/// Marker for base-16
RadixMarkerHex,
/// Marker for base-8
RadixMarkerOct,
/// Marker for base-2
RadixMarkerBin,
/// 1-4 hexadigit numbers only
Number,
/// Negative number marker
Minus,
/// post-increment mode marker
Plus,
/// Open-Indexed-Mode marker
LParen,
/// Close-Indexed-Mode marker
RParen,
/// Open Square Bracket
LBracket,
/// Closed Square Bracket
RBracket,
/// Indirect mode marker
Indirect,
/// absolute address marker
Absolute,
/// immediate value marker
Immediate,
/// Valid identifier. Identifiers must start with a Latin alphabetic character or underline
Identifier,
/// A string, encased in "quotes"
String,
/// Assembler directive
Directive,
/// Separator (comma)
Separator,
/// End of File marker
#[default]
EndOfFile,
/// Invalid token
Invalid,
}
regex_impl! {<'text> Token<'text> {
pub fn expect_space(text: &str) -> Option<Self> {
regex!(Type::Space = r"^[\s--\n]+")
} }
pub fn expect_endl(text: &str) -> Option<Self> { impl Display for TokenKind {
regex!(Type::Endl = r"^\n[\s--\n]*")
}
pub fn expect_comment(text: &str) -> Option<Self> {
regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*")
}
pub fn expect_label(text: &str) -> Option<Self> {
regex!(Type::Label = r"^:")
}
pub fn expect_insn(text: &str) -> Option<Self> {
regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)")
}
pub fn expect_byte_width(text: &str) -> Option<Self> {
regex!(Type::ByteWidth = r"(?i)^\.b")
}
pub fn expect_word_width(text: &str) -> Option<Self> {
regex!(Type::WordWidth = r"(?i)^\.w")
}
pub fn expect_register(text: &str) -> Option<Self> {
// old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)")
}
pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerDec = r"(?i)^0d")
}
pub fn expect_radix_marker_hex(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)")
}
pub fn expect_radix_marker_oct(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerOct = r"(?i)^0o")
}
pub fn expect_radix_marker_bin(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerBin = r"(?i)^0b")
}
pub fn expect_number(text: &str) -> Option<Self> {
regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)")
}
pub fn expect_minus(text: &str) -> Option<Self> {
regex!(Type::Minus = r"^-")
}
pub fn expect_plus(text: &str) -> Option<Self> {
regex!(Type::Plus = r"^\+")
}
pub fn expect_l_paren(text: &str) -> Option<Self> {
regex!(Type::LParen = r"^\(")
}
pub fn expect_r_paren(text: &str) -> Option<Self> {
regex!(Type::RParen = r"^\)")
}
pub fn expect_l_bracket(text: &str) -> Option<Self> {
regex!(Type::LBracket = r"^\[")
}
pub fn expect_r_bracket(text: &str) -> Option<Self> {
regex!(Type::RBracket = r"^]")
}
pub fn expect_indrect(text: &str) -> Option<Self> {
regex!(Type::Indirect = r"^@")
}
pub fn expect_absolute(text: &str) -> Option<Self> {
regex!(Type::Absolute = r"^&")
}
pub fn expect_immediate(text: &str) -> Option<Self> {
regex!(Type::Immediate = r"^#")
}
pub fn expect_string(text: &str) -> Option<Self> {
regex!(Type::String = r#"^"[^"]*""#)
}
pub fn expect_directive(text: &str) -> Option<Self> {
regex!(Type::Directive = r"^\.\w+")
}
pub fn expect_identifier(text: &str) -> Option<Self> {
regex!(Type::Identifier = r"^[A-Za-z_]\w*")
}
pub fn expect_separator(text: &str) -> Option<Self> {
regex!(Type::Separator = r"^,")
}
pub fn expect_end_of_file(text: &str) -> Option<Self> {
regex!(Type::EndOfFile = r"^$")
}
pub fn expect_anything(text: &str) -> Option<Self> {
regex!(Type::Invalid = r"^.*")
}
}}
impl Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Self::Space => Display::fmt("space", f), TokenKind::Eof => write!(f, "[EOF]"),
Self::Endl => Display::fmt("newline", f), TokenKind::Newline => writeln!(f),
Self::Comment => Display::fmt("comment", f), TokenKind::OpenParen => write!(f, "("),
Self::Label => Display::fmt("label definition", f), TokenKind::CloseParen => write!(f, ")"),
Self::Insn => Display::fmt("opcode", f), TokenKind::OpenCurly => write!(f, "{{"),
Self::ByteWidth => Display::fmt("byte-width", f), TokenKind::CloseCurly => write!(f, "}}"),
Self::WordWidth => Display::fmt("word-width", f), TokenKind::OpenBrace => write!(f, "["),
Self::Register => Display::fmt("register", f), TokenKind::CloseBrace => write!(f, "]"),
Self::RadixMarkerDec => Display::fmt("decimal marker", f), TokenKind::Comma => write!(f, ","),
Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f), TokenKind::Colon => write!(f, ":"),
Self::RadixMarkerOct => Display::fmt("octal marker", f), TokenKind::Bang => write!(f, "!"),
Self::RadixMarkerBin => Display::fmt("binary marker", f), TokenKind::At => write!(f, "@"),
Self::Number => Display::fmt("number", f), TokenKind::Amp => write!(f, "&"),
Self::Minus => Display::fmt("minus sign", f), TokenKind::Bar => write!(f, "|"),
Self::Plus => Display::fmt("plus sign", f), TokenKind::Caret => write!(f, "^"),
Self::LParen => Display::fmt("left parenthesis", f), TokenKind::Star => write!(f, "*"),
Self::RParen => Display::fmt("right parenthesis", f), TokenKind::Hash => write!(f, "#"),
Self::LBracket => Display::fmt("left bracket", f), TokenKind::Dollar => write!(f, "$"),
Self::RBracket => Display::fmt("right bracket", f), TokenKind::Percent => write!(f, "%"),
Self::Indirect => Display::fmt("indirect", f), TokenKind::Plus => write!(f, "+"),
Self::Absolute => Display::fmt("absolute", f), TokenKind::Minus => write!(f, "-"),
Self::Immediate => Display::fmt("immediate", f), TokenKind::Slash => write!(f, "/"),
Self::Identifier => Display::fmt("identifier", f), TokenKind::Lsh => write!(f, "<<"),
Self::String => Display::fmt("string", f), TokenKind::Rsh => write!(f, ">>"),
Self::Directive => Display::fmt("directive", f), TokenKind::Comment => write!(f, "; "),
Self::Separator => Display::fmt("comma", f), TokenKind::Directive => write!(f, "."),
Self::EndOfFile => Display::fmt("EOF", f), TokenKind::Identifier => write!(f, "Identifier"),
Self::Invalid => Display::fmt("invalid token", f), TokenKind::Number(val, 2) => write!(f, "0b{val:b}"),
TokenKind::Number(val, 8) => write!(f, "0o{val:o}"),
TokenKind::Number(val, 16) => write!(f, "0x{val:x}"),
TokenKind::Number(val, _) => write!(f, "{val}"),
TokenKind::Char(c) => write!(f, "'{c}'"),
TokenKind::String => write!(f, "\"String\""),
TokenKind::Reg(kw) => write!(f, "{kw}"),
TokenKind::NoEm(kw) => write!(f, "{kw}"),
TokenKind::OneEm(kw) => write!(f, "{kw}"),
TokenKind::Special(kw) => write!(f, "{kw}"),
TokenKind::OneArg(kw) => write!(f, "{kw}"),
TokenKind::TwoArg(kw) => write!(f, "{kw}"),
TokenKind::Jump(kw) => write!(f, "{kw}"),
TokenKind::Byte => write!(f, ".b"),
TokenKind::Word => write!(f, ".w"),
} }
} }
}
/// A [Token] which can outlive its parent buffer
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct OwnedToken {
/// The type of this token
variant: Type,
/// The sub[String] corresponding to this token
lexeme: String,
}
impl Display for OwnedToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) }
}
impl<'t> From<&'t OwnedToken> for Token<'t> {
fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } }
}
impl From<Token<'_>> for OwnedToken {
fn from(value: Token<'_>) -> Self {
let Token { variant, lexeme } = value;
OwnedToken { variant, lexeme: lexeme.to_owned() }
} }
} impl Display for Reg {
/// [Types] are an owned array of [types](Type), with a custom [Display] implementation
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Types(Vec<Type>);
impl<T: AsRef<[Type]>> From<T> for Types {
// TODO: Possibly bad. Check out in rust playground.
fn from(value: T) -> Self { Self(value.as_ref().to_owned()) }
}
impl Display for Types {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (idx, t) in self.0.iter().enumerate() { match self {
Display::fmt(t, f)?; Reg::PC => "pc".fmt(f),
match idx { Reg::SP => "sp".fmt(f),
i if i < self.0.len() - 2 => Display::fmt(", ", f)?, Reg::SR => "sr".fmt(f),
i if i < self.0.len() - 1 => Display::fmt(" or ", f)?, Reg::CG => "cg".fmt(f),
_ => (), Reg::R4 => "r4".fmt(f),
Reg::R5 => "r5".fmt(f),
Reg::R6 => "r6".fmt(f),
Reg::R7 => "r7".fmt(f),
Reg::R8 => "r8".fmt(f),
Reg::R9 => "r9".fmt(f),
Reg::R10 => "r10".fmt(f),
Reg::R11 => "r11".fmt(f),
Reg::R12 => "r12".fmt(f),
Reg::R13 => "r13".fmt(f),
Reg::R14 => "r14".fmt(f),
Reg::R15 => "r15".fmt(f),
}
}
}
impl Display for NoEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
NoEm::Nop => "nop".fmt(f),
NoEm::Ret => "ret".fmt(f),
NoEm::Clrc => "clrc".fmt(f),
NoEm::Clrz => "clrz".fmt(f),
NoEm::Clrn => "clrn".fmt(f),
NoEm::Setc => "setc".fmt(f),
NoEm::Setz => "setz".fmt(f),
NoEm::Setn => "setn".fmt(f),
NoEm::Dint => "dint".fmt(f),
NoEm::Eint => "eint".fmt(f),
}
}
}
impl Display for OneEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OneEm::Pop => "pop".fmt(f),
OneEm::Rla => "rla".fmt(f),
OneEm::Rlc => "rlc".fmt(f),
OneEm::Inv => "inv".fmt(f),
OneEm::Clr => "clr".fmt(f),
OneEm::Tst => "tst".fmt(f),
OneEm::Dec => "dec".fmt(f),
OneEm::Decd => "decd".fmt(f),
OneEm::Inc => "inc".fmt(f),
OneEm::Incd => "incd".fmt(f),
OneEm::Adc => "adc".fmt(f),
OneEm::Dadc => "dadc".fmt(f),
OneEm::Sbc => "sbc".fmt(f),
}
}
}
impl Display for Special {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Special::Br => "br".fmt(f),
}
}
}
impl Display for OneArg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OneArg::Rrc => "rrc".fmt(f),
OneArg::Swpb => "swpb".fmt(f),
OneArg::Rra => "rra".fmt(f),
OneArg::Sxt => "sxt".fmt(f),
OneArg::Push => "push".fmt(f),
OneArg::Call => "call".fmt(f),
OneArg::Reti => "reti".fmt(f),
}
}
}
impl Display for TwoArg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TwoArg::Mov => "mov".fmt(f),
TwoArg::Add => "add".fmt(f),
TwoArg::Addc => "addc".fmt(f),
TwoArg::Subc => "subc".fmt(f),
TwoArg::Sub => "sub".fmt(f),
TwoArg::Cmp => "cmp".fmt(f),
TwoArg::Dadd => "dadd".fmt(f),
TwoArg::Bit => "bit".fmt(f),
TwoArg::Bic => "bic".fmt(f),
TwoArg::Bis => "bis".fmt(f),
TwoArg::Xor => "xor".fmt(f),
TwoArg::And => "and".fmt(f),
}
}
}
impl Display for Jump {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Jump::Jne => "jne".fmt(f),
Jump::Jnz => "jnz".fmt(f),
Jump::Jeq => "jeq".fmt(f),
Jump::Jz => "jz".fmt(f),
Jump::Jnc => "jnc".fmt(f),
Jump::Jlo => "jlo".fmt(f),
Jump::Jc => "jc".fmt(f),
Jump::Jhs => "jhs".fmt(f),
Jump::Jn => "jn".fmt(f),
Jump::Jge => "jge".fmt(f),
Jump::Jl => "jl".fmt(f),
Jump::Jmp => "jmp".fmt(f),
} }
} }
Ok(())
} }
} }

View File

@@ -1,85 +0,0 @@
// © 2023 John Breaux
//! A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
use super::*;
use super::ignore::Ignore;
use super::preprocessed::Preprocessed;
/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> + std::fmt::Debug {
/// Gets this stream's [Context]
fn context(&self) -> Context;
/// Creates an iterator that skips [Type::Space] in the input
#[inline]
fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self>
where Self: Sized {
Ignore::new(variant, self)
}
/// Creates a [TokenStream] that performs live substitution of the input
#[inline]
fn preprocessed(&'text mut self) -> Preprocessed<'text, Self>
where Self: Sized {
Preprocessed::new(self)
}
/// Returns the next [Token] without advancing
fn peek(&mut self) -> Self::Item;
/// Returns the next [Token] if it is of the expected [Type], without advancing
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
/// Consumes and returns a [Token] if it is the expected [Type]
///
/// Otherwise, does not consume a [Token]
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
/// Ignores a [Token] of the expected [Type], propegating errors.
#[inline]
fn require(&mut self, expected: Type) -> Result<(), LexError> { self.expect(expected).map(|_| ()) }
/// Ignores a [Token] of the expected [Type], discarding errors.
#[inline]
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
/// Runs a function on each
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, LexError>, expected: T) -> Result<U, LexError>
where T: AsRef<[Type]> {
for &expected in expected.as_ref() {
match f(self, expected).map_err(|e| e.bare()) {
Ok(t) => return Ok(t),
Err(LexError::UnexpectedToken { .. }) => continue,
Err(e) => return Err(e.context(self.context())),
}
}
Err(LexError::expected(expected, self.peek()).context(self.context()))
}
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
#[inline]
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
where T: AsRef<[Type]> {
self.any_of(Self::peek_expect, expected)
}
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
///
/// Otherwise, does not consume a [Token]
#[inline]
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
where T: AsRef<[Type]> {
self.any_of(Self::expect, expected)
}
/// Ignores a [Token] of any expected [Type], discarding errors.
#[inline]
fn allow_any_of<T>(&mut self, expected: T)
where T: AsRef<[Type]> {
let _ = self.expect_any_of(expected);
}
/// Ignores a [Token] of any expected [Type], propegating errors.
#[inline]
fn require_any_of<T>(&mut self, expected: T) -> Result<(), LexError>
where T: AsRef<[Type]> {
self.any_of(Self::require, expected)
}
}

View File

@@ -1,76 +1,95 @@
// © 2023 John Breaux // © 2023-2024 John Breaux
//See LICENSE.md for license
//! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption //! A bare-bones toy assembler for the TI MSP430, for use in MicroCorruption
//! //!
//! This project aims to assemble any valid msp430 instructions, while being lenient about the //! This project aims to assemble any valid msp430 instructions, while including important quality
//! syntax. After all, a real-world parser is going to face all kinds of malformed input, and it //! of life features such as constant expression evaluation.
//! would be nice to support that kind of input (or, if it's completely unsalvageable, provide a
//! useful message to the author.)
//! //!
//! The [`Parser`](preamble::Parser) will ignore whitespace, excluding newlines, //! ## Tokenization
//! unless syntactically relevant. It will also discard comma-separators between operands of a //! The [`Lexer`](lexer::Lexer) will ignore whitespace, except newlines. It borrows a text buffer,
//! two-operand instruction. //! and outputs [tokens](lexer::token::Token) of various [TokenKinds](lexer::token::TokenKind).
//! //!
//! It returns an AST structured as follows //! ## Preprocessing
//! The [`Preprocessor`](preprocessor::Preprocessor) will filter
//! [newlines](lexer::token::TokenKind::Newline), unless used to terminate a `.define` directive.
//!
//! ## Parsing
//! The [`Parser`](parser::Parser) consumes a [Lexer](lexer::Lexer)
//! and returns an [AST](parser::ast) structured roughly as follows:
//! ```text //! ```text
//! Root //! Statements
//! ├─ Line //! ├─ Stmt
//! │ └─ Empty
//! ├─ Line
//! │ └─ Comment //! │ └─ Comment
//! ├─ Line //! ├─ Stmt
//! │ └─ Directive // Pre- or Post-processor directive //! │ └─ Directive // Pre- or Post-processor directive
//! ├─ Linel //! ├─ Stmt
//! │ └─ Label // Label definition //! │ └─ Label // Label definition
//! ├─ Line //! ├─ Stmt
//! │ └─ Instruction //! │ └─ Insn
//! │ └─ NoEm // A zero-operand "emulated" instruction
//! ├─ Stmt
//! │ └─ Insn
//! │ └─ OneEm // A one-operand "emulated" instruction
//! │ ├─ Opcode //! │ ├─ Opcode
//! │ └─ Encoding::Single
//! │ ├─ Width //! │ ├─ Width
//! │ └─ PrimaryOperand //! │ └─ Dst // A destination register has several addressing modes:
//! │ Identifier // Label, for relative-addressed data/code //! │ Direct // - The contents of a register
//! │ Register // Direct, indexed, indirect or indirect-post-increment register. //! │ Indexed // - The register, as a pointer, plus a byte index
//! │ Number // Index, absolute address or immediate value. //! │ Absolute // - An immediate absolute address
//! ├─ Line //! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
//! │ └─ Instruction //! ├─ Stmt
//! │ └─ Insn
//! │ └─ OneArg // A one-operand instruction
//! │ ├─ Opcode //! │ ├─ Opcode
//! │ └─ Encoding::Double
//! │ ├─ Width //! │ ├─ Width
//! │ PrimaryOperand //! │ Src // A source register has even more addressing modes:
//! │ Identifier // Label, for relative-addressed data/code //! │ Direct // - The contents of a register
//! │ ├─ Register // Direct, indexed, indirect or indirect-post-increment register. //! │ ╶─ Indexed // - The register, as a pointer, plus a byte index
//! │ Number // Index, absolute address or immediate value. //! │ Indirect // - The word at the address stored in the register
//! │ └─ SecondaryOperand //! │ // (like Indexed, but without an extension word.)
//! │ Identifier // Label, for relative-addressed data/code //! │ PostIncrement // - Indirect, but the register is post-incremented by 1
//! │ ├─ Register // Direct or indexed register //! │ // (or, if it's the PC or SP, by 2)
//! │ Number // Index or absolute address //! │ Absolute // - An immediate absolute address
//! ├─ Line //! │ ╶─ Immediate // - An immediate 16-bit number
//! │ └─ Instruction //! │ ╶─ Special // - A so-called "special" immediate (#0 or #1) - these are joke encodings.
//! ├─ Stmt
//! │ └─ Insn
//! │ └─ TwoArg // A two-operand instruction
//! │ ├─ Opcode //! │ ├─ Opcode
//! │ └─ Encoding::Jump //! │ ├─ Width
//! │ JumpTarget //! │ Src
//! │ ├─ Identifier // Label //! │ └─ Dst
//! │ └─ Number // Even, PC-relative offset in range (-1024..=1022) //! └─ Stmt
//! └─ Line //! └─ Insn
//! └─ EndOfFile //! └─ Jump // A relative jump instruction
//! ├─ Opcode // The jump condition
//! └─ JumpDst // A jump instruction's destination can be either:
//! └─ Rel // - An even, signed 11-bit offset
//! ╶─ Label // - A label to jump to
//! ``` //! ```
//!
//! ## Canonicalization
//! After parsing, tokens must be [canonicalized](parser::ast::canonical::Canonicalize):
//! - Expressions which act exclusively on numbers are eagerly evaluated
//! - Expressions which begin with a numeric part are repacked for late evaluation
//! - "Emulated" instructions are desugared into their canonical counterparts
//!
//! ## Assembly
//! The [Assembler](assembler::Assembler) takes an [AST](parser::ast), and
//! 1. Encodes all [Instructions](parser::ast::Instruction) into 16-bit words
//! 2. Records all jump labels, for backpatching
//! 3. Records all expressions, for late evaluation
//! 4. Performs late evaluation and backpatching
//!
//! If a non-canonical instruction is found, the assembler will print a warning,
//! and canonicalize it.
pub mod preamble { pub mod span;
//! Common imports for msp430-asm
use super::*;
pub use assembler::Assembler;
pub use error::Error;
pub use lexer::{
context::Context,
token::{Token, Type},
token_stream::TokenStream,
Tokenizer,
};
pub use parser::Parser;
}
use preamble::*; pub mod lexer;
pub mod error;
pub mod preprocessor;
pub mod parser;
pub mod assembler; pub mod assembler;
pub mod lexer;
pub mod parser;

View File

@@ -1,81 +1,598 @@
// © 2023 John Breaux // © 2023-2024 John Breaux
//! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root) //See LICENSE.md for license
//! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast)
pub mod ast;
use crate::{TokenStream, Type}; use self::error::{
use error::ParseError; Error,
use preamble::*; ErrorKind::{self, *},
use std::{ PResult, Parsing,
fmt::{Debug, Display},
path::Path,
}; };
use crate::{
pub mod preamble { lexer::{
//! All the different AST node types token::{Reg, Special, Token, TokenKind as Kind},
use super::*; Lexer,
// Traits
pub use parsable::Parsable;
// Nodes
pub use comment::Comment;
pub use directive::Directive;
pub use identifier::Identifier;
pub use instruction::{
encoding::{
encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand,
register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding,
}, },
opcode::Opcode, preprocessor::Preprocessor,
Instruction, span::Span,
};
use ast::*;
#[derive(Clone, Debug)]
pub struct Parser<'t> {
lexer: Preprocessor<'t>,
next: Option<Token<'t>>,
loc: Span<usize>,
}
impl<'t> Parser<'t> {
/// Creates a new [Parser]
pub fn new(text: &'t str) -> Self {
let lexer = Preprocessor::new(text);
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
}
/// Createes a new [Parser] from an existing [Lexer]
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
let lexer = Preprocessor::with_lexer(lexer);
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
}
pub fn parse<T: Parsable<'t>>(&mut self) -> PResult<T> {
Parsable::parse_with(self)
}
pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error {
Error { parsing, kind, loc: self.loc }
}
/// Peek a token out of the lexer
pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> {
if self.next.is_none() {
self.next = self.lexer.scan();
}
self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p))
}
pub fn next(&mut self, p: Parsing) -> PResult<Token<'t>> {
Ok(match self.take() {
Some(token) => token,
None => {
self.peek(p)?;
self.take().expect("should have been populated by peek")
}
})
}
/// Consumes the next token
pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> {
match self.peek(p)?.kind {
kind if kind == expect => {
self.take();
Ok(self)
}
kind => Err(self.error(Unexpected(kind), p)),
}
}
/// Consumes the next token without checking it
pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> {
self.next(p)?;
Ok(self)
}
/// Take the last peeked token
pub fn take(&mut self) -> Option<Token<'t>> {
self.next.take()
}
}
// Expressions
impl<'t> Parser<'t> {
/// Parses an expression
pub fn expr(&mut self) -> PResult<Expr<'t>> {
self.term()
}
/// Parses a term-expression (binary `*`mul, `/`div, `%`rem)
pub fn term(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.factor()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)),
Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)),
Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a factor expression (binary `+`add, `-`sub)
pub fn factor(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.shift()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)),
Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right)
pub fn shift(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.bin()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)),
Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
pub fn bin(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.unary()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)),
Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)),
Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a unary expression (`!`invert, `-`negate)
pub fn unary(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let mut ops = vec![];
loop {
match self.peek(p)?.kind {
Kind::Star => ops.push(UnOp::Deref),
Kind::Minus => ops.push(UnOp::Neg),
Kind::Bang => ops.push(UnOp::Not),
_ if ops.is_empty() => break Ok(self.primary()?),
_ => break Ok(Expr::Unary(ops, self.primary()?.into())),
}
self.take();
}
}
/// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier
pub fn primary(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let Token { lexeme, kind, .. } = *self.peek(p)?;
Ok(match kind {
Kind::OpenParen => {
let out = Expr::Group(self.then(p)?.parse()?);
self.assert(Kind::CloseParen, p)?;
out
}
Kind::Number(n, _) => {
self.take();
Expr::Number(n)
}
Kind::Identifier => {
self.take();
Expr::Ident(lexeme)
}
Kind::Amp => self.then(p)?.addrof()?,
ty => Err(self.error(NonNumeric(ty), p))?,
})
}
pub fn addrof(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let token = self.peek(p)?;
let out = match token.kind {
Kind::Identifier => Expr::AddrOf(token.lexeme),
Kind::Number(n, _) => Expr::Number(n),
ty => Err(self.error(Unexpected(ty), p))?,
}; };
pub use label::Label; self.take();
pub use line::Line; Ok(out)
pub use root::Root; }
// Error
pub use error::ParseError;
} }
pub mod parsable; pub trait Parsable<'t>: Sized {
fn parse(text: &'t str) -> PResult<Self> {
pub mod comment; Self::parse_with(&mut Parser::new(text))
pub mod directive; }
pub mod error; fn parse_with(p: &mut Parser<'t>) -> PResult<Self>;
pub mod identifier;
pub mod instruction;
pub mod label;
pub mod line;
pub mod root;
pub struct Parser {
radix: u32,
} }
impl Parser { impl<'t> Parsable<'t> for Statements<'t> {
pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result<Root, ParseError> { fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
Root::parse(&self, &mut stream.ignore(Type::Space)) let mut stmts = vec![];
while p.peek(Parsing::File)?.kind != Kind::Eof {
stmts.push(p.parse()?)
} }
pub fn parse<T>(self, input: &T) -> Result<Root, ParseError> Ok(Self { stmts })
where T: AsRef<str> + ?Sized {
Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
} }
pub fn parse_file<P>(self, path: &P) -> Result<Root, ParseError>
where P: AsRef<Path> + ?Sized {
self.parse(&std::fs::read_to_string(path.as_ref())?).map(|r| r.set_file(path.as_ref().into()))
}
pub fn parse_one<T>(self, input: &T) -> Result<Line, ParseError>
where T: AsRef<str> + ?Sized {
Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
}
/// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number]
/// conversion
pub fn radix(mut self, radix: u32) { self.radix = radix; }
} }
impl Default for Parser { impl<'t> Parsable<'t> for Statement<'t> {
fn default() -> Self { Self { radix: 16 } } fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let token = *p.peek(Parsing::Stmt)?;
Ok(match token.kind {
Kind::Comment => {
p.take();
Statement::Comment(token.lexeme)
}
Kind::Directive => Statement::Directive(p.parse()?),
Kind::Identifier => Statement::Label(p.label()?),
_ => Statement::Insn(p.parse()?),
})
}
}
impl<'t> Parsable<'t> for Directive<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Directive;
let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?;
let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) };
p.take();
Ok(match lexeme {
".define" => Directive::Define(p.parse()?),
".org" => Directive::Org(p.expr()?.into()),
".word" => Directive::Word(p.parse()?),
".words" => Directive::Words(p.parse()?),
".string" => Directive::String(p.string()?),
_ => Err(p.error(Unexpected(Kind::Directive), parsing))?,
})
}
}
impl<'t> Parsable<'t> for Vec<Token<'t>> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Directive;
let mut tokens = vec![];
loop {
if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind {
break;
}
tokens.push(p.next(parsing)?)
}
p.take();
Ok(tokens)
}
}
impl<'t> Parsable<'t> for Instruction<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let start = p.peek(Parsing::Instruction)?.pos.start;
Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } })
}
}
impl<'t> Parsable<'t> for InstructionKind<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
use crate::lexer::token::OneArg;
// an instruction starts with an opcode
Ok(match p.peek(Parsing::Instruction)?.kind() {
Kind::NoEm(_) => Self::NoEm(p.parse()?),
Kind::OneEm(_) => Self::OneEm(p.parse()?),
Kind::Special(Special::Br) => Self::Br(p.parse()?),
Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?),
Kind::OneArg(_) => Self::OneArg(p.parse()?),
Kind::TwoArg(_) => Self::TwoArg(p.parse()?),
Kind::Jump(_) => Self::Jump(p.parse()?),
ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?,
})
}
}
impl<'t> Parsable<'t> for NoEm {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
match p.next(Parsing::NoEm)?.kind {
Kind::NoEm(opcode) => Ok(Self { opcode }),
ty => Err(p.error(Unexpected(ty), Parsing::NoEm)),
}
}
}
impl<'t> Parsable<'t> for OneEm<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Self {
opcode: match p.next(Parsing::OneEm)?.kind {
Kind::OneEm(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?,
},
width: p.parse()?,
dst: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for OneArg<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Self {
opcode: match p.next(Parsing::OneArg)?.kind {
Kind::OneArg(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?,
},
width: p.parse()?,
src: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for TwoArg<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::TwoArg;
Ok(Self {
opcode: match p.next(parsing)?.kind {
Kind::TwoArg(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
width: p.parse()?,
src: p.parse()?,
dst: p.assert(Kind::Comma, parsing)?.parse()?,
})
}
}
impl<'t> Parsable<'t> for Jump<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Jump;
Ok(Self {
opcode: match p.next(parsing)?.kind {
Kind::Jump(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
dst: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for Reti {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
use crate::lexer::token::OneArg;
p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?;
Ok(Reti)
}
}
impl<'t> Parsable<'t> for Br<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
p.assert(Kind::Special(Special::Br), Parsing::Br)?;
Ok(Self { src: p.parse()? })
}
} }
impl Debug for Parser { impl<'t> Parsable<'t> for Src<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Src;
Ok(match p.peek(parsing)?.kind {
Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special
Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?), // &addr
Kind::At => {
let reg = match p.then(parsing)?.next(parsing)?.kind {
Kind::Reg(r) => r,
ty => Err(p.error(Unexpected(ty), parsing))?,
};
if let Kind::Plus = p.peek(parsing)?.kind {
p.take();
Src::PostInc(reg)
} else {
Src::Indirect(reg)
}
} // @reg+, @reg
Kind::Reg(_) => Src::Direct(p.parse()?),
_ => {
let expr = p.parse()?;
match p.peek(parsing)?.kind {
Kind::OpenParen => Src::Indexed(expr, {
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
p.assert(Kind::CloseParen, parsing)?;
reg
}),
_ => Src::BareExpr(expr),
}
}
})
}
}
impl<'t> Parsable<'t> for Dst<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Dst;
Ok(match p.peek(parsing)?.kind {
Kind::Hash => match p.then(parsing)?.next(parsing)?.kind {
Kind::Number(0, _) => Dst::Special(DstSpecial::Zero),
Kind::Number(1, _) => Dst::Special(DstSpecial::One),
Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?),
Kind::Reg(_) => Dst::Direct(p.parse()?),
_ => Dst::Indexed(p.expr()?.into(), {
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
p.assert(Kind::CloseParen, parsing)?;
reg
}),
})
}
}
impl<'t> Parsable<'t> for JumpDst<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Jump;
let mut neg = false;
let out = loop {
let token = p.peek(parsing)?;
match token.kind {
Kind::Minus => {
neg = !neg;
}
Kind::Plus => {}
Kind::Identifier => break Self::Label(token.lexeme),
Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }),
ty => Err(p.error(Unexpected(ty), parsing))?,
}
p.take();
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Width {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let out = match p.peek(Parsing::Width)?.kind() {
Kind::Byte => Width::Byte,
Kind::Word => Width::Word,
_ => return Ok(Width::Word),
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Reg {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let out = match p.peek(Parsing::Reg)?.kind {
Kind::Reg(r) => r,
ty => Err(p.error(Unexpected(ty), Parsing::Reg))?,
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Expr<'t> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
p.expr()
}
}
impl<'t, T: Parsable<'t>> Parsable<'t> for Box<T> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Box::new(p.parse()?))
}
}
impl<'t, T: Parsable<'t>> Parsable<'t> for Vec<T> {
fn parse_with(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Vec;
p.assert(Kind::OpenBrace, parsing)?;
let mut out = vec![];
while Kind::CloseBrace != p.peek(parsing)?.kind {
out.push(p.parse()?)
}
p.assert(Kind::CloseBrace, parsing)?;
Ok(out)
}
}
/// Context-sensitive parsing rules
impl<'t> Parser<'t> {
pub fn string(&mut self) -> PResult<&'t str> {
let token = *self.peek(Parsing::Directive)?;
match token.kind {
Kind::String => {
self.take();
Ok(&token.lexeme[1..token.lexeme.len() - 1])
}
ty => Err(self.error(Unexpected(ty), Parsing::Directive)),
}
}
pub fn label(&mut self) -> PResult<&'t str> {
let p = Parsing::Label;
let token = self.next(p)?;
assert_eq!(Kind::Identifier, token.kind);
self.assert(Kind::Colon, p)?;
Ok(token.lexeme)
}
pub fn reg(&mut self) -> PResult<Reg> {
match self.peek(Parsing::Reg)?.kind {
Kind::Reg(r) => {
self.take();
Ok(r)
}
ty => Err(self.error(Unexpected(ty), Parsing::Reg)),
}
}
}
pub mod error {
use super::Kind;
use crate::span::Span;
use std::{fmt::Display, num::TryFromIntError};
pub type PResult<T> = Result<T, Error>;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Error {
pub parsing: Parsing,
pub kind: ErrorKind,
pub loc: Span<usize>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
LexError,
/// Returned when [Parsing::Expr] fails without consuming
NotExpr,
DivZero,
NonNumeric(Kind),
BadIntForDst(u16),
TryFromIntError(TryFromIntError),
Unexpected(Kind),
BufEmpty,
Todo,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Parsing {
File,
Stmt,
Label,
Directive,
Instruction,
NoEm,
OneEm,
Reti,
Br,
OneArg,
TwoArg,
Jump,
Width,
Src,
Dst,
Reg,
Expr,
Vec,
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive() write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing)
} }
}
impl Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorKind::LexError => write!(f, "lexical error"),
ErrorKind::TryFromIntError(e) => write!(f, "{e}"),
ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"),
ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"),
ErrorKind::DivZero => write!(f, "Division by zero"),
ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"),
ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"),
ErrorKind::BufEmpty => write!(f, "Peek buffer empty"),
ErrorKind::Todo => write!(f, "Not yet implemented"),
}
}
}
impl Display for Parsing {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Parsing::File => "a file".fmt(f),
Parsing::Stmt => "a line".fmt(f),
Parsing::Label => "a label".fmt(f),
Parsing::Directive => "a directive".fmt(f),
Parsing::Instruction => "an instruction".fmt(f),
Parsing::NoEm => "a no-operand emulated instruction".fmt(f),
Parsing::OneEm => "a one-operand emulated instruction".fmt(f),
Parsing::Reti => "a `reti` instruction".fmt(f),
Parsing::Br => "a `br` instruction".fmt(f),
Parsing::OneArg => "a one-operand instruction".fmt(f),
Parsing::TwoArg => "a two-operand instruction".fmt(f),
Parsing::Jump => "a jump instruction".fmt(f),
Parsing::Width => "an instruction width".fmt(f),
Parsing::Src => "a source".fmt(f),
Parsing::Dst => "a destination".fmt(f),
Parsing::Reg => "a register".fmt(f),
Parsing::Expr => "a constant expression".fmt(f),
Parsing::Vec => "a list".fmt(f),
}
}
}
impl std::error::Error for Error {}
} }
#[cfg(test)]
mod tests;

680
src/parser/ast.rs Normal file
View File

@@ -0,0 +1,680 @@
// © 2023-2024 John Breaux
//See LICENSE.md for license
/// Represents MSP430 instructions,
use crate::{
lexer::token::{self, Reg, Token},
span::Span,
};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Statements<'t> {
pub stmts: Vec<Statement<'t>>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Statement<'t> {
Label(&'t str),
Insn(Instruction<'t>),
Directive(Directive<'t>),
Comment(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Directive<'t> {
/// TODO: Store define as a vec of tokens. This will require help from the
/// [preprocessor](crate::preprocessor)
Define(Vec<Token<'t>>),
Org(Box<Expr<'t>>),
Word(Box<Expr<'t>>),
Words(Vec<Expr<'t>>),
String(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Instruction<'t> {
pub span: Span<usize>,
pub kind: InstructionKind<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum InstructionKind<'t> {
NoEm(NoEm),
OneEm(OneEm<'t>),
OneArg(OneArg<'t>),
TwoArg(TwoArg<'t>),
Jump(Jump<'t>),
Reti(Reti),
Br(Br<'t>),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct NoEm {
pub opcode: token::NoEm,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct OneEm<'t> {
pub opcode: token::OneEm,
pub width: Width,
pub dst: Dst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct OneArg<'t> {
pub opcode: token::OneArg,
pub width: Width,
pub src: Src<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct TwoArg<'t> {
pub opcode: token::TwoArg,
pub width: Width,
pub src: Src<'t>,
pub dst: Dst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Jump<'t> {
pub opcode: token::Jump,
pub dst: JumpDst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Reti;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Br<'t> {
pub src: Src<'t>,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub enum Width {
#[default]
Word,
Byte,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Src<'t> {
Direct(Reg),
Indexed(Box<Expr<'t>>, Reg),
Indirect(Reg),
PostInc(Reg),
Absolute(Box<Expr<'t>>),
Immediate(Box<Expr<'t>>),
Special(SrcSpecial),
BareExpr(Box<Expr<'t>>),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum SrcSpecial {
Zero,
One,
Four,
Two,
Eight,
NegOne,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Dst<'t> {
Direct(Reg),
Indexed(Box<Expr<'t>>, Reg),
Absolute(Box<Expr<'t>>),
Special(DstSpecial),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum DstSpecial {
Zero,
One,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum JumpDst<'t> {
/// A relative offset, nominally an even number from -0x400..=0x3fe
Rel(i16),
Label(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Expr<'t> {
Binary(Box<Expr<'t>>, Vec<(BinOp, Expr<'t>)>),
Unary(Vec<UnOp>, Box<Expr<'t>>),
Group(Box<Expr<'t>>),
Number(u16),
Ident(&'t str),
AddrOf(&'t str),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum BinOp {
Mul,
Div,
Rem,
Add,
Sub,
Lsh,
Rsh,
And,
Xor,
Or,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum UnOp {
Deref,
Not,
Neg,
}
pub mod conv {
//! Conversions between [ast](super) types, via [From], or via `new` constructor
use super::{InstructionKind as Ik, *};
macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$(
impl<'t> From<$src> for $dst {
fn from(value: $src) -> Self {
$expr(value)
}
}
)*}}
// sure am glad macros aren't hygenic over lifetimes
impl_from! { Ik<'t> {
NoEm => Ik::NoEm,
OneEm<'t> => Ik::OneEm,
OneArg<'t> => Ik::OneArg,
TwoArg<'t> => Ik::TwoArg,
Jump<'t> => Ik::Jump,
Reti => Ik::Reti,
Br<'t> => Ik::Br,
}}
impl_from! { Expr<'t> {
u16 => Expr::Number
}}
impl<'t> From<Dst<'t>> for Src<'t> {
fn from(value: Dst<'t>) -> Self {
match value {
Dst::Special(v) => Src::Special(v.into()),
Dst::Absolute(v) => Src::Absolute(v),
Dst::Indexed(i, r) => Src::Indexed(i, r),
Dst::Direct(r) => Src::Direct(r),
}
}
}
impl From<DstSpecial> for SrcSpecial {
fn from(value: DstSpecial) -> Self {
match value {
DstSpecial::Zero => SrcSpecial::Zero,
DstSpecial::One => SrcSpecial::One,
}
}
}
impl<'t> TwoArg<'t> {
pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self {
Self { opcode, width, src, dst }
}
}
}
pub mod display {
use super::*;
use std::fmt::Display;
impl<'t> Display for Statements<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for stmt in &self.stmts {
writeln!(f, "{stmt}")?;
}
Ok(())
}
}
impl<'t> Display for Statement<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Statement::Label(v) => write!(f, "{v}:"),
Statement::Insn(v) => write!(f, "{v}"),
Statement::Directive(v) => write!(f, "{v}"),
Statement::Comment(v) => write!(f, "{v}"),
}
}
}
impl<'t> Display for Directive<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Directive::Define(_) => write!(f, ".directive"),
Directive::Org(e) => write!(f, ".org {e}"),
Directive::Word(w) => write!(f, ".word {w}"),
Directive::Words(words) => {
write!(f, ".words [ ")?;
for word in words {
write!(f, "{word} ")?;
}
write!(f, "]")
}
Directive::String(s) => write!(f, ".string \"{s}\""),
}
}
}
impl<'t> Display for Instruction<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { span: _, kind } = self;
write!(f, "{kind}")
}
}
impl<'t> Display for InstructionKind<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InstructionKind::NoEm(v) => v.fmt(f),
InstructionKind::OneEm(v) => v.fmt(f),
InstructionKind::OneArg(v) => v.fmt(f),
InstructionKind::TwoArg(v) => v.fmt(f),
InstructionKind::Jump(v) => v.fmt(f),
InstructionKind::Reti(v) => v.fmt(f),
InstructionKind::Br(v) => v.fmt(f),
}
}
}
impl Display for NoEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode } = self;
write!(f, "{opcode}")
}
}
impl<'t> Display for OneEm<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, dst } = self;
write!(f, "{opcode}{width}\t{dst}")
}
}
impl<'t> Display for OneArg<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, src } = self;
write!(f, "{opcode}{width}\t{src}")
}
}
impl<'t> Display for TwoArg<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, src, dst } = self;
write!(f, "{opcode}{width}\t{src}, {dst}")
}
}
impl<'t> Display for Jump<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, dst } = self;
write!(f, "{opcode}\t{dst}")
}
}
impl Display for Reti {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "reti")
}
}
impl<'t> Display for Br<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { src } = self;
write!(f, "br\t{src}")
}
}
impl<'t> Display for Src<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Src::Direct(r) => write!(f, "{r}"),
Src::Indexed(e, r) => write!(f, "{e}({r})"),
Src::Indirect(r) => write!(f, "@{r}"),
Src::PostInc(r) => write!(f, "@{r}+"),
Src::Absolute(e) => write!(f, "&{e}"),
Src::Immediate(e) => write!(f, "#{e}"),
Src::Special(i) => write!(f, "#{i}"),
Src::BareExpr(id) => write!(f, "{id}"),
}
}
}
impl Display for SrcSpecial {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SrcSpecial::Zero => write!(f, "0"),
SrcSpecial::One => write!(f, "1"),
SrcSpecial::Four => write!(f, "4"),
SrcSpecial::Two => write!(f, "2"),
SrcSpecial::Eight => write!(f, "8"),
SrcSpecial::NegOne => write!(f, "-1"),
}
}
}
impl<'t> Display for Dst<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Dst::Direct(r) => write!(f, "{r}"),
Dst::Indexed(e, r) => write!(f, "{e}({r})"),
Dst::Absolute(e) => write!(f, "&{e}"),
Dst::Special(i) => write!(f, "#{i}"),
}
}
}
impl Display for DstSpecial {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DstSpecial::Zero => write!(f, "0"),
DstSpecial::One => write!(f, "1"),
}
}
}
impl<'t> Display for JumpDst<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
JumpDst::Rel(i) => write!(f, "{i}"),
JumpDst::Label(l) => write!(f, "{l}"),
}
}
}
impl<'t> Display for Expr<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expr::Binary(head, tail) => {
write!(f, "{head}")?;
for (op, tail) in tail {
write!(f, "{op}{tail}")?;
}
Ok(())
}
Expr::Unary(ops, tail) => {
for op in ops {
write!(f, "{op}")?
}
write!(f, "{tail}")
}
Expr::Group(e) => write!(f, "({e})"),
Expr::Number(n) => write!(f, "{n:x}"),
Expr::Ident(n) => write!(f, "{n}"),
Expr::AddrOf(n) => write!(f, "&{n}"),
}
}
}
impl Display for BinOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BinOp::Mul => write!(f, "*"),
BinOp::Div => write!(f, "/"),
BinOp::Rem => write!(f, "%"),
BinOp::Add => write!(f, "+"),
BinOp::Sub => write!(f, "-"),
BinOp::Lsh => write!(f, "<<"),
BinOp::Rsh => write!(f, ">>"),
BinOp::And => write!(f, "&"),
BinOp::Xor => write!(f, "^"),
BinOp::Or => write!(f, "|"),
}
}
}
impl Display for UnOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
UnOp::Deref => write!(f, "*"),
UnOp::Not => write!(f, "!"),
UnOp::Neg => write!(f, "-"),
}
}
}
impl Display for Width {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Width::Word => Ok(()),
Width::Byte => write!(f, ".b"),
}
}
}
}
pub mod canonical {
use std::iter;
use super::*;
use token::TwoArg::*;
pub trait Canonicalize {
/// The output after canonicalization
type Output;
/// Transmutes Self into its "canonical" form. "Emulated" instructions are converted
/// into their respective non-emulated forms.
fn to_canonical(self) -> Self::Output;
}
impl<'t> Canonicalize for Statements<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() }
}
}
impl<'t> Canonicalize for Statement<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Statement::Insn(i) => Self::Insn(i.to_canonical()),
_ => self,
}
}
}
impl<'t> Canonicalize for Instruction<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
Self { kind: self.kind.to_canonical(), ..self }
}
}
impl<'t> Canonicalize for InstructionKind<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Self::NoEm(v) => Self::TwoArg(v.to_canonical()),
Self::OneEm(v) => Self::TwoArg(v.to_canonical()),
Self::Reti(v) => Self::Reti(v.to_canonical()),
Self::Br(v) => Self::TwoArg(v.to_canonical()),
Self::OneArg(v) => Self::OneArg(v.to_canonical()),
Self::TwoArg(v) => Self::TwoArg(v.to_canonical()),
Self::Jump(v) => Self::Jump(v.to_canonical()),
}
}
}
impl Canonicalize for NoEm {
type Output = TwoArg<'static>;
fn to_canonical(self) -> Self::Output {
let Self { opcode } = self;
use SrcSpecial::*;
use Width::*;
match opcode {
token::NoEm::Nop => {
TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG))
}
token::NoEm::Ret => {
TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC))
}
token::NoEm::Clrc => {
TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR))
}
token::NoEm::Clrz => {
TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR))
}
token::NoEm::Clrn => {
TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR))
}
token::NoEm::Setc => {
TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR))
}
token::NoEm::Setz => {
TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR))
}
token::NoEm::Setn => {
TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR))
}
token::NoEm::Dint => {
TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
}
token::NoEm::Eint => {
TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
}
}
}
}
impl<'t> Canonicalize for OneEm<'t> {
type Output = TwoArg<'t>;
fn to_canonical(self) -> Self::Output {
use SrcSpecial::*;
let Self { opcode, width, dst } = self;
match opcode {
token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst),
token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst),
token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst),
token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst),
token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst),
token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst),
token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst),
token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst),
token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst),
token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst),
token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst),
token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst),
token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst),
}
}
}
impl<'t> Canonicalize for OneArg<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, width, src } = self;
Self {
opcode,
width: match opcode {
token::OneArg::Call => Width::Word,
_ => width,
},
src: src.to_canonical(),
}
}
}
impl<'t> Canonicalize for TwoArg<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, width, src, dst } = self;
Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() }
}
}
impl<'t> Canonicalize for Jump<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, dst } = self;
Self {
opcode: match opcode {
token::Jump::Jnz => token::Jump::Jne,
token::Jump::Jz => token::Jump::Jeq,
token::Jump::Jnc => token::Jump::Jlo,
token::Jump::Jc => token::Jump::Jhs,
t => t,
},
dst: dst.to_canonical(),
}
}
}
impl Canonicalize for Reti {
type Output = Self;
fn to_canonical(self) -> Self::Output {
self
}
}
impl<'t> Canonicalize for Br<'t> {
type Output = TwoArg<'t>;
fn to_canonical(self) -> Self::Output {
let Self { src } = self;
TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC))
}
}
impl<'t> Canonicalize for Src<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
use SrcSpecial::*;
match self {
Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self,
Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r),
Src::Absolute(e) => Src::Absolute(e.to_canonical().into()),
Src::Immediate(e) => match e.to_canonical() {
Expr::Number(0) => Src::Special(Zero),
Expr::Number(1) => Src::Special(One),
Expr::Number(2) => Src::Special(Two),
Expr::Number(4) => Src::Special(Four),
Expr::Number(8) => Src::Special(Eight),
Expr::Number(0xffff) => Src::Special(NegOne),
expr => Src::Immediate(expr.into()),
},
Src::BareExpr(_) => self,
}
}
}
impl<'t> Canonicalize for Dst<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Dst::Direct(_) | Dst::Special(_) => self,
Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r),
Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()),
}
}
}
impl<'t> Canonicalize for JumpDst<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
self
}
}
impl<'t> Canonicalize for Expr<'t> {
type Output = Self;
/// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number],
/// this returns a single [Expr::Number]. If not, it evaluates until
/// it runs into an unevaluatable leaf.
fn to_canonical(self) -> Self::Output {
match self {
Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self,
Expr::Group(e) => e.to_canonical(),
Expr::Unary(ops, tail) => {
let mut tail = match tail.to_canonical() {
Expr::Number(n) => n,
other => return other,
};
// If the tail is dereferenced, canonicalization must halt,
// since we have no knowledge of memory layout
let mut ops = ops.into_iter();
for op in ops.by_ref() {
tail = match op {
UnOp::Deref => {
return Expr::Unary(
iter::once(op).chain(ops).collect(),
Box::new(tail.into()),
)
}
UnOp::Not => !tail,
UnOp::Neg => 0u16.wrapping_sub(tail),
}
}
Expr::Number(tail)
}
Expr::Binary(head, tails) => {
let mut head = match head.to_canonical() {
Expr::Number(n) => n,
head => return Expr::Binary(head.into(), tails),
};
let mut tails = tails.into_iter();
for (op, tail) in &mut tails {
let tail = tail.to_canonical();
// If the canonical tail isn't a number, rebuild and return
let Expr::Number(tail) = tail else {
return Expr::Binary(
Box::new(head.into()),
iter::once((op, tail)).chain(tails).collect(),
);
};
head = match op {
BinOp::Mul => head.wrapping_mul(tail),
BinOp::Div => head.wrapping_div(tail),
BinOp::Rem => head.wrapping_rem(tail),
BinOp::Add => head.wrapping_add(tail),
BinOp::Sub => head.wrapping_sub(tail),
BinOp::Lsh => head.wrapping_shl(tail as u32),
BinOp::Rsh => head.wrapping_shr(tail as u32),
BinOp::And => head & tail,
BinOp::Xor => head ^ tail,
BinOp::Or => head | tail,
};
}
Expr::Number(head)
}
}
}
}
}

View File

@@ -1,15 +0,0 @@
// © 2023 John Breaux
//! A [`Comment`] stores the contents of a line comment, including the preceding `;` or `//`
use super::*;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Comment(pub String);
impl Parsable for Comment {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(Self(stream.expect(Type::Comment)?.lexeme().to_string()))
}
}
impl Display for Comment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) }
}

View File

@@ -1,90 +0,0 @@
// © 2023 John Breaux
//! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and
//! [Linker](crate::Linker)
use std::path::PathBuf;
use super::*;
use crate::lexer::token::OwnedToken;
// TODO: Parse each kind of *postprocessor* directive into an AST node
// - .org 8000: Directive::Org { base: Number }
// - .define ident tt... Directive::Define { } ; should this be in the AST? How do I put this
// in the AST?
// - .include "<filename>" Directive::Include { Root } ; should this include an entire AST in
// the AST?
// - .word 8000 Directive::Word(Number)
// - .words dead beef Directive::Words(Vec<u16>|Vec<Number>)
// - .byte ff Directive::Byte(Number)
// - .bytes de, ad, be, ef Directive::Bytes(Vec<u8>)
// - .string "string" Directive::String(String)
// - .ascii "string" Directive::Ascii(Vec<u8>)
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Directive {
Org(Number),
Define(Vec<OwnedToken>),
Include(Root),
Byte(Number),
Bytes(Vec<Number>),
Word(Number),
Words(Vec<Number>),
String(String),
Strings(Vec<String>),
}
impl Directive {}
impl Parsable for Directive {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let d = stream.expect(Type::Directive)?;
// match on the directive
Ok(match d.lexeme() {
".org" => Self::Org(Number::parse(p, stream)?),
".define" => {
let mut tokens = vec![];
loop {
match stream.peek().variant() {
Type::Endl | Type::EndOfFile => break,
_ => tokens.push(stream.next().unwrap_or_default().into()),
}
}
Self::Define(tokens)
}
".include" => {
// Try to get path
Self::Include(Parser::default().parse_file(&PathBuf::parse(p, stream)?)?)
}
".byte" => Self::Byte(Number::parse(p, stream)?),
".bytes" => Self::Bytes(Vec::<Number>::parse(p, stream)?),
".word" => Self::Word(Number::parse(p, stream)?),
".words" => Self::Words(Vec::<Number>::parse(p, stream)?),
".string" => Self::String(String::parse(p, stream)?),
".strings" => Self::Strings(Vec::<String>::parse(p, stream)?),
e => Err(ParseError::UnrecognizedDirective(e.into()))?,
})
}
}
impl Display for Directive {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Directive::Org(num) => write!(f, ".org {num}"),
Directive::Define(rep) => {
write!(f, ".define")?;
for t in rep {
write!(f, " {t}")?;
}
Ok(())
}
Directive::Include(r) => Display::fmt(r, f),
Directive::Byte(num) => write!(f, ".org {num}"),
Directive::Bytes(v) => write!(f, ".bytes {v:?}"),
Directive::Word(num) => write!(f, ".org {num}"),
Directive::Words(v) => write!(f, ".bytes {v:?}"),
Directive::String(s) => write!(f, ".string \"{s}\""),
Directive::Strings(s) => write!(f, ".string \"{s:?}\""),
}
}
}

View File

@@ -1,74 +0,0 @@
// © 2023 John Breauxs
use super::*;
use crate::lexer::error::LexError;
#[derive(Debug)]
pub enum ParseError {
/// Produced by [lexer](crate::lexer)
LexError(LexError),
/// Produced by [std::io]
IoError(std::io::Error),
/// Produced by [Number](Number)[::parse()](Parsable::parse())
/// when the parsed number contains digits too high for the specified radix
UnexpectedDigits(String, u32),
/// Produced by [Opcode](Opcode)[::parse()](Parsable::parse())
/// when the opcode passed lexing but did not match recognized opcodes.
///
/// This is always a lexer bug.
UnrecognizedOpcode(String),
/// Produced by [Directive](Directive)[::parse()](Parsable::parse())
/// when an unknown or unimplemented directive is used
UnrecognizedDirective(String),
/// Produced by [Register] when attempting to convert from a [str]
/// that isn't a register (pc, sp, sr, cg, or r{number})
NotARegister(String),
/// Produced by [Register] when the r{number} is outside the range 0-15
RegisterTooHigh(u16),
/// Produced by [SecondaryOperand] when the joke "secondary immediate" form
/// is out of range 0..=1
FatSecondaryImmediate(isize),
/// Produced by a [Number] too wide to fit in 16 bits
/// (outside the range `(-2^15) .. (2^16-1)` )
NumberTooWide(isize),
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
/// when the jump offset is outside the range (-0x3ff..0x3fc)
JumpedTooFar(isize),
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
JumpedOdd(isize),
}
impl From<LexError> for ParseError {
fn from(value: LexError) -> Self { Self::LexError(value) }
}
impl From<std::io::Error> for ParseError {
fn from(value: std::io::Error) -> Self { Self::IoError(value) }
}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LexError(error) => Display::fmt(error, f),
Self::IoError(error) => Display::fmt(error, f),
Self::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."),
Self::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"),
Self::UnrecognizedDirective(d) => write!(f, "{d} is not a directive."),
Self::NotARegister(reg) => write!(f, "{reg} is not a register"),
Self::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"),
Self::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"),
Self::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"),
Self::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"),
Self::JumpedOdd(num) => {
write!(f, "Jump targets only encode even numbers: {num} must not be odd.")
}
}
}
}
impl std::error::Error for ParseError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::LexError(e) => Some(e),
Self::IoError(e) => Some(e),
_ => None,
}
}
}

View File

@@ -1,26 +0,0 @@
// © 2023 John Breaux
//! An [Identifier] stores the hash of an identifier
use super::*;
use std::rc::Rc;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Identifier {
str: Rc<str>,
}
impl Identifier {
fn str<T: AsRef<str>>(s: T) -> Self { Self { str: s.as_ref().to_owned().into() } }
}
impl Parsable for Identifier {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let token = stream.expect(Type::Identifier)?;
match token.variant() {
Type::Identifier => Ok(Self::str(token.lexeme())),
_ => unreachable!("Expected identifier, got {token:?}"),
}
}
}
impl Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.str, f) }
}

View File

@@ -1,52 +0,0 @@
// © 2023 John Breaux
//! An [`Instruction`] contains the [`Opcode`] and [`Encoding`] information for a single msp430
//! instruction
//!
//!
//! Note: [`Opcode`] and [`Encoding`] are very tightly coupled, because they represent
//! interdependent parts of the same instruction. This is why [`Opcode`]::resolve() returns an
//! [`EncodingParser`] -- otherwise, there's an explosion of states that I can't really cope with on
//! my own. Really, there's about 9 valid classes of instruction, some of which are only used for
//! one or two of the MSP430's instructions.
use super::*;
pub mod encoding;
pub mod opcode;
/// Contains the [Opcode] and [Encoding] information for a single msp430 instruction
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Instruction(Opcode, Encoding);
impl Instruction {
pub fn opcode(&self) -> &Opcode { &self.0 }
pub fn encoding(&self) -> &Encoding { &self.1 }
/// Gets the Instruction as a [u16]
pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() }
/// Gets the [extension words]
pub fn ext_words(&self) -> [Option<u16>; 2] { self.1.extwords() }
}
impl Parsable for Instruction {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where
Self: Sized,
T: crate::TokenStream<'text>,
{
// parse an opcode
let opcode: Opcode = Opcode::parse(p, stream)?;
// resolve the opcode to a final opcode and an encoding
let (opcode, encoding) = opcode.resolve();
// parse the encoding
let encoding = encoding.parse(p, stream)?;
Ok(Self(opcode, encoding))
}
}
impl From<Instruction> for u16 {
fn from(value: Instruction) -> Self { value.word() }
}
impl Display for Instruction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) }
}

View File

@@ -1,81 +0,0 @@
// © 2023 John Breaux
//! An [`Encoding`] represents the set of arguments for a given [msp430 opcode](Opcode)
use super::*;
pub mod number;
pub mod register;
pub mod width;
pub mod jump_target;
pub mod primary_operand;
pub mod secondary_operand;
mod builder;
pub mod encoding_parser;
use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder};
use encoding_parser::EncodingParser;
/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html)
///
/// # Examples
/// ```rust
/// use msp430_asm::{preamble::*, parser::preamble::*};
/// // Create a token sequence
/// let asm_file = r".b 8000(r15)";
/// // Create a single-operand encoding parser
/// let single: EncodingParser = Encoding::single().end();
/// // Parse an Encoding from it
/// let encoding: Encoding = single
/// .parse(&Default::default(), &mut Tokenizer::new(asm_file).ignore_spaces())
/// .unwrap();
/// // Print the Encoding
/// println!("{encoding}");
/// ```
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Encoding {
Single { width: Width, dst: PrimaryOperand },
Jump { target: JumpTarget },
Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand },
}
impl Encoding {
/// Returns a builder for [Encoding::Single]
pub fn single() -> SingleBuilder { Default::default() }
/// Returns a builder for [Encoding::Jump]
pub fn jump() -> JumpBuilder { Default::default() }
/// Returns a builder for [Encoding::Double]
pub fn double() -> DoubleBuilder { Default::default() }
/// Returns a builder for [Encoding::Double]
///
/// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and
/// dst are the same
pub fn reflexive() -> ReflexiveBuilder { Default::default() }
///
pub fn word(&self) -> u16 {
match self {
Encoding::Single { width, dst } => u16::from(*width) | dst.mode() | dst.register() as u16,
Encoding::Jump { target } => target.word().unwrap_or_default(),
Encoding::Double { width, src, dst } => {
u16::from(*width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8)
}
}
}
/// Returns extwords for instruction
pub fn extwords(&self) -> [Option<u16>; 2] {
match self {
Encoding::Double { src, dst, .. } => [src.ext_word(), dst.ext_word()],
Encoding::Single { dst, .. } => [dst.ext_word(), None],
Encoding::Jump { .. } => [None, None],
}
}
}
impl Display for Encoding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Encoding::Single { width, dst } => write!(f, "{width} {dst}"),
Encoding::Jump { target } => write!(f, " {target}"),
Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"),
}
}
}

View File

@@ -1,76 +0,0 @@
// © 2023 John Breaux
//! Builder API for [`EncodingParser`]
use super::*;
#[derive(Debug, Default)]
pub struct SingleBuilder {
width: Option<Width>,
dst: Option<PrimaryOperand>,
}
impl SingleBuilder {
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
/// Sets the [PrimaryOperand] field
pub fn operand(mut self, dst: PrimaryOperand) -> Self {
self.dst = Some(dst);
self
}
/// Build
pub fn end(self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } }
}
#[derive(Debug, Default)]
pub struct JumpBuilder {
target: Option<JumpTarget>,
}
impl JumpBuilder {
pub fn target(mut self, target: JumpTarget) -> Self {
self.target = Some(target);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Jump { target: self.target } }
}
#[derive(Debug, Default)]
pub struct DoubleBuilder {
width: Option<Width>,
src: Option<PrimaryOperand>,
dst: Option<SecondaryOperand>,
}
impl DoubleBuilder {
/// Sets the [Width] field
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
/// Sets the [PrimaryOperand] field
pub fn src(mut self, src: PrimaryOperand) -> Self {
self.src = Some(src);
self
}
/// Sets the [PrimaryOperand] field
pub fn dst(mut self, dst: SecondaryOperand) -> Self {
self.dst = Some(dst);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } }
}
#[derive(Debug, Default)]
pub struct ReflexiveBuilder {
width: Option<Width>,
reg: Option<SecondaryOperand>,
}
impl ReflexiveBuilder {
/// Sets the [Width] field
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
pub fn reg(mut self, reg: SecondaryOperand) -> Self {
self.reg = Some(reg);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } }
}

View File

@@ -1,37 +0,0 @@
// © 2023 John Breaux
//! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`]
use super::*;
#[derive(Clone, Debug)]
/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream]
pub enum EncodingParser {
Single { width: Option<Width>, dst: Option<PrimaryOperand> },
Jump { target: Option<JumpTarget> },
Double { width: Option<Width>, src: Option<PrimaryOperand>, dst: Option<SecondaryOperand> },
Reflexive { width: Option<Width>, reg: Option<SecondaryOperand> },
}
impl EncodingParser {
/// Constructs an [Encoding] from this [EncodingParser], filling holes
/// with the tokenstream
pub fn parse<'text, T>(self, p: &Parser, stream: &mut T) -> Result<Encoding, ParseError>
where T: crate::TokenStream<'text> {
Ok(match self {
Self::Single { width, dst } => Encoding::Single {
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
dst: if let Some(dst) = dst { dst } else { PrimaryOperand::parse(p, stream)? },
},
Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) },
Self::Double { width, src, dst } => Encoding::Double {
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
src: if let Some(src) = src { src } else { PrimaryOperand::parse(p, stream)? },
dst: if let Some(dst) = dst { dst } else { SecondaryOperand::parse(p, stream)? },
},
Self::Reflexive { width, reg } => {
let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default());
let reg = if let Some(reg) = reg { reg } else { SecondaryOperand::parse(p, stream)? };
Encoding::Double { width, src: reg.clone().into(), dst: reg }
}
})
}
}

View File

@@ -1,58 +0,0 @@
// © 2023 John Breaux
//! A [`JumpTarget`] contains the [pc-relative offset](Number) or [label](Identifier)
//! for a [Jump](Encoding::Jump) [instruction]
use super::*;
/// Contains the [pc-relative offset](Number) or [label](Identifier)
/// for a [Jump](Encoding::Jump) [Instruction]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum JumpTarget {
Number(Number),
Identifier(Identifier),
}
impl JumpTarget {
pub fn word(&self) -> Option<u16> {
match self {
JumpTarget::Number(n) => Some(u16::from(*n) & 0x3ff),
JumpTarget::Identifier(_) => None,
}
}
pub fn squish(value: isize) -> Result<u16, ParseError> {
match value {
i if i % 2 != 0 => Err(ParseError::JumpedOdd(i))?,
i if (-1024..=1022).contains(&(i - 2)) => Ok(((value >> 1) - 1) as u16 & 0x3ff),
i => Err(ParseError::JumpedTooFar(i))?,
}
}
pub fn unsquish(value: u16) -> isize { (value as isize + 1) << 1 }
}
impl Parsable for JumpTarget {
// - Identifier
// - Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
// Try to parse a number
if let Some(num) = Number::try_parse(p, stream)? {
Self::try_from(num)
} else {
// if that fails, try to parse an identifier instead
Ok(Self::Identifier(Identifier::parse(p, stream)?))
}
}
}
impl TryFrom<Number> for JumpTarget {
type Error = ParseError;
fn try_from(value: Number) -> Result<Self, Self::Error> { Ok(Self::Number(Self::squish(value.into())?.into())) }
}
impl Display for JumpTarget {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Number(num) => write!(f, "{:x}", Self::unsquish(u16::from(*num))),
Self::Identifier(id) => write!(f, "{id}"),
}
}
}

View File

@@ -1,81 +0,0 @@
// © 2023 John Breaux
//! A [`Number`] represents a 16-bit signed or unsigned word
use super::*;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Number(isize, u32); // (value, radix)
impl Parsable for Number {
// A number is:
// [Minus|Plus]? RadixMarker[Hex|Dec|Oct|Bin]? Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
use Type as Ty;
// The number is negative when it begins with a Minus, but Plus is also acceptable.
let negative = stream.expect_any_of([Ty::Minus, Ty::Plus]).map_or(false, |t| t.is_variant(Ty::Minus));
let radix = match stream
.expect_any_of([Ty::RadixMarkerHex, Ty::RadixMarkerDec, Ty::RadixMarkerOct, Ty::RadixMarkerBin])
.ok()
.map(|t| t.variant())
{
Some(Ty::RadixMarkerHex) => 16,
Some(Ty::RadixMarkerDec) => 10,
Some(Ty::RadixMarkerOct) => 8,
Some(Ty::RadixMarkerBin) => 2,
_ => p.radix,
};
let number = stream.expect(Ty::Number)?;
// TODO: Reintroduce error context
let number = isize::from_str_radix(number.lexeme(), radix)
.map_err(|_| ParseError::UnexpectedDigits(number.lexeme().into(), radix))?
* if negative { -1 } else { 1 };
// Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit)
Ok(Self(
if (-0x8000..0x10000).contains(&number) { number } else { Err(ParseError::NumberTooWide(number))? },
radix,
))
}
}
impl From<isize> for Number {
fn from(value: isize) -> Self { Self(value, 16) }
}
impl From<Number> for isize {
fn from(value: Number) -> Self { value.0 as Self }
}
impl From<u16> for Number {
fn from(value: u16) -> Self { Self(value as isize, 16) }
}
impl From<Number> for u16 {
fn from(value: Number) -> Self { value.0 as Self }
}
impl std::ops::Sub<isize> for Number {
type Output = Self;
fn sub(mut self, rhs: isize) -> Self::Output {
self.0 -= rhs;
self
}
}
impl std::ops::Shr<usize> for Number {
type Output = Self;
fn shr(mut self, rhs: usize) -> Self::Output {
self.0 >>= rhs;
self
}
}
impl std::fmt::Display for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.1 {
2 => std::fmt::Binary::fmt(&self.0, f),
8 => std::fmt::Octal::fmt(&self.0, f),
16 => std::fmt::LowerHex::fmt(&self.0, f),
_ => std::fmt::Display::fmt(&self.0, f),
}
}
}

View File

@@ -1,146 +0,0 @@
// © 2023 John Breaux
//! A [`PrimaryOperand`] contains the first [`Register`], addressing mode, and Extension
//! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [`Instruction`]
use super::*;
/// Contains the first [Register], addressing mode, and Extension Word for a
/// [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum PrimaryOperand {
Direct(Register),
Indirect(Register),
PostInc(Register),
Indexed(Register, Number),
Relative(Identifier),
Absolute(Number),
Immediate(Number),
Four,
Eight,
Zero,
One,
Two,
MinusOne,
}
impl PrimaryOperand {
/// Returns the mode bits
pub fn mode(&self) -> u16 {
use PrimaryOperand::*;
match self {
Direct(_) | Zero => 0,
Indexed(_, _) | Relative(_) | Absolute(_) | One => 1 << 4,
Indirect(_) | Two | Four => 2 << 4,
PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4,
}
}
/// Gets the register
pub fn register(&self) -> Register {
use PrimaryOperand::*;
match self {
Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r,
Immediate(_) | Relative(_) => Register::pc,
Absolute(_) | Four | Eight => Register::sr,
Zero | One | Two | MinusOne => Register::cg,
}
}
/// Gets the extension word, if present
pub fn ext_word(&self) -> Option<u16> {
use PrimaryOperand::*;
match self {
Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()),
_ => None,
}
}
}
impl Parsable for PrimaryOperand {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
// Try parsing as Register (Direct)
if let Some(r) = Register::try_parse(p, stream)? {
return Ok(Self::Direct(r));
}
// Try parsing as Number (Indexed)
if let Some(idx) = Number::try_parse(p, stream)? {
stream.expect(Type::LParen)?;
let reg = Register::parse(p, stream)?;
stream.expect(Type::RParen)?;
return Ok(Self::Indexed(reg, idx));
}
// Try parsing as Identifier (Relative, label mode)
if let Some(id) = Identifier::try_parse(p, stream)? {
return Ok(Self::Relative(id));
}
// Or directly match any of the valid prefix markers
// Register, Number, and Identifier are included here to make error messages clearer.
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
// (a failure condition)
let token = stream.expect_any_of([
Type::Indirect,
Type::Absolute,
Type::Immediate,
Type::Register,
Type::Number,
Type::Identifier,
])?;
Ok(match token.variant() {
Type::Indirect => {
let reg = Register::parse(p, stream)?;
match stream.expect(Type::Plus) {
Ok(_) => Self::PostInc(reg),
Err(_) => Self::Indirect(reg),
}
}
Type::Absolute => Self::Absolute(Number::parse(p, stream)?),
Type::Immediate => {
let number = Number::parse(p, stream)?;
match number.into() {
// There are two representations for the all-ones constant, since Number preserves
// signedness.
-1_isize | 0xffff => Self::MinusOne,
0 => Self::Zero,
1 => Self::One,
2 => Self::Two,
4 => Self::Four,
8 => Self::Eight,
_ => Self::Immediate(number),
}
}
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
})
}
}
impl From<SecondaryOperand> for PrimaryOperand {
fn from(value: SecondaryOperand) -> Self {
match value {
SecondaryOperand::Direct(r) => Self::Direct(r),
SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n),
SecondaryOperand::Absolute(n) => Self::Absolute(n),
SecondaryOperand::Relative(id) => Self::Relative(id),
SecondaryOperand::Zero => Self::Zero,
SecondaryOperand::One => Self::One,
}
}
}
impl Display for PrimaryOperand {
// Turn the operand back into a form which parses into the same type
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Direct(r) => Display::fmt(r, f),
Self::Indirect(r) => write!(f, "@{r}"),
Self::PostInc(r) => write!(f, "@{r}+"),
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
Self::Relative(id) => Display::fmt(id, f),
Self::Absolute(n) => write!(f, "&{n}"),
Self::Immediate(n) => write!(f, "#{n}"),
Self::Four => Display::fmt("#4", f),
Self::Eight => Display::fmt("#8", f),
Self::Zero => Display::fmt("#0", f),
Self::One => Display::fmt("#1", f),
Self::Two => Display::fmt("#2", f),
Self::MinusOne => Display::fmt("#-1", f),
}
}
}

View File

@@ -1,112 +0,0 @@
// © 2023 John Breaux
//! A [`Register`] represents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
use super::*;
use std::str::FromStr;
/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Register {
/// Program Counter
pc,
/// Stack Pointer
sp,
/// Status Register
sr,
/// Constant Generator
cg,
r4,
r5,
r6,
r7,
r8,
r9,
r10,
r11,
r12,
r13,
r14,
r15,
}
impl Parsable for Register {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
stream.expect(Type::Register)?.lexeme().parse()
}
}
impl From<Register> for u16 {
fn from(value: Register) -> Self { value as u16 }
}
impl TryFrom<u16> for Register {
type Error = ParseError;
fn try_from(value: u16) -> Result<Self, Self::Error> {
use Register::*;
Ok(match value {
0 => pc,
1 => sp,
2 => sr,
3 => cg,
4 => r4,
5 => r5,
6 => r6,
7 => r7,
8 => r8,
9 => r9,
10 => r10,
11 => r11,
12 => r12,
13 => r13,
14 => r14,
15 => r15,
_ => return Err(ParseError::RegisterTooHigh(value)),
})
}
}
impl FromStr for Register {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use Register::*;
match s {
"pc" => Ok(pc),
"sp" => Ok(sp),
"sr" => Ok(sr),
"cg" => Ok(cg),
_ => {
str::parse::<u16>(&s[1..]).map_err(|_| -> Self::Err { ParseError::NotARegister(s.into()) })?.try_into()
}
}
}
}
impl From<Register> for &str {
fn from(value: Register) -> Self {
use Register::*;
match value {
pc => "pc",
sp => "sp",
sr => "sr",
cg => "cg",
r4 => "r4",
r5 => "r5",
r6 => "r6",
r7 => "r7",
r8 => "r8",
r9 => "r9",
r10 => "r10",
r11 => "r11",
r12 => "r12",
r13 => "r13",
r14 => "r14",
r15 => "r15",
}
}
}
impl std::fmt::Display for Register {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) }
}

View File

@@ -1,105 +0,0 @@
// © 2023 John Breaux
//! A [`SecondaryOperand`] contains the second [`Register`], addressing mode, and Extension
//! Word for a [two-operand](Encoding::Double) [instruction]
use super::*;
/// The destination of a [Double](Encoding::Double)
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SecondaryOperand {
Direct(Register),
Indexed(Register, Number),
Relative(Identifier),
Absolute(Number),
// Joke encodings?
Zero,
One,
}
use SecondaryOperand as So;
impl SecondaryOperand {
pub fn mode(&self) -> u16 {
match self {
So::Direct(_) | So::Zero => 0,
So::Indexed(_, _) | So::Relative(_) | So::Absolute(_) | So::One => 1 << 7,
}
}
pub fn register(&self) -> Register {
use SecondaryOperand::*;
match self {
Direct(r) | Indexed(r, _) => *r,
Relative(_) => Register::pc,
Absolute(_) => Register::sr,
Zero | One => Register::cg,
}
}
/// This is the only way to have an extension word
pub fn ext_word(&self) -> Option<u16> {
use SecondaryOperand::*;
match self {
Indexed(_, w) | Absolute(w) => Some((*w).into()),
_ => None,
}
}
}
impl Parsable for SecondaryOperand {
// Separator
// - Register => Direct
// - Number => Indexed
// - OpenIdx
// - Register
// - CloseIdx
// - Absolute
// - Number
// - Immediate
// - Number == 0, 1
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
use SecondaryOperand::*;
stream.allow(Type::Separator);
// Try parsing as Register (Direct)
if let Some(r) = Register::try_parse(p, stream)? {
return Ok(Self::Direct(r));
}
// Try parsing as Number (Indexed)
if let Some(idx) = Number::try_parse(p, stream)? {
stream.expect(Type::LParen)?;
let reg = Register::parse(p, stream)?;
stream.expect(Type::RParen)?;
return Ok(Self::Indexed(reg, idx));
}
// Try parsing as Identifier (Relative, label mode)
if let Some(id) = Identifier::try_parse(p, stream)? {
return Ok(Self::Relative(id));
}
// Register, Number, and Identifier are included here to make error messages clearer.
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
// (a failure condition) but should not match a token
let token =
stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number, Type::Identifier])?;
Ok(match token.variant() {
Type::Absolute => Absolute(Number::parse(p, stream)?),
// TODO: Reintroduce error context
Type::Immediate => match Number::parse(p, stream)?.into() {
0 => Zero,
1 => One,
n => Err(ParseError::FatSecondaryImmediate(n))?,
},
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
})
}
}
impl Display for SecondaryOperand {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Direct(r) => Display::fmt(r, f),
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
Self::Relative(id) => Display::fmt(id, f),
Self::Absolute(n) => write!(f, "&{n}"),
Self::Zero => Display::fmt("#0", f),
Self::One => Display::fmt("#1", f),
}
}
}

View File

@@ -1,32 +0,0 @@
// © 2023 John Breaux
//! A [`Width`] represents whether an instruction operates on whole words or bytes
use super::*;
/// Represents an instruction's operand width.
///
/// Evaluates to false when instruction takes word-sized operands, or true when
/// instruction takes byte-sized operands
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Width(bool);
impl Parsable for Width {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else {
return Ok(Self(false));
};
Ok(Self(token.is_variant(Type::ByteWidth)))
}
}
impl From<Width> for u16 {
fn from(value: Width) -> Self { (value.0 as Self) << 6 }
}
impl From<Width> for bool {
fn from(value: Width) -> Self { value.0 }
}
impl From<bool> for Width {
fn from(value: bool) -> Self { Width(value) }
}
impl std::fmt::Display for Width {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) }
}

View File

@@ -1,261 +0,0 @@
// © 2023 John Breaux
//! An [`Opcode`] encodes an msp430 operation
use super::*;
use std::str::FromStr;
/// Opcode from the [MSPGCC Manual][1]
///
/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will
/// extract from a [TokenStream] only the required arguments for that call.
///
/// [1]: https://mspgcc.sourceforge.net/manual/x223.html
#[allow(clippy::identity_op)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Opcode {
// "Emulated" opcodes
Nop,
Pop,
Br,
Ret,
Clrc,
Setc,
Clrz,
Setz,
Clrn,
Setn,
Dint,
Eint,
Rla,
Rlc,
Inv,
Clr,
Tst,
Dec,
Decd,
Inc,
Incd,
Adc,
Dadc,
Sbc,
// Single
Rrc = 0x1000 | 0 << 7,
Swpb = 0x1000 | 1 << 7,
Rra = 0x1000 | 2 << 7,
Sxt = 0x1000 | 3 << 7,
Push = 0x1000 | 4 << 7,
Call = 0x1000 | 5 << 7,
Reti = 0x1000 | 6 << 7,
// Jump
Jnz = 0x2000 | 0 << 10,
Jz = 0x2000 | 1 << 10,
Jnc = 0x2000 | 2 << 10,
Jc = 0x2000 | 3 << 10,
Jn = 0x2000 | 4 << 10,
Jge = 0x2000 | 5 << 10,
Jl = 0x2000 | 6 << 10,
Jmp = 0x2000 | 7 << 10,
// Double
Mov = 0x4000,
Add = 0x5000,
Addc = 0x6000,
Subc = 0x7000,
Sub = 0x8000,
Cmp = 0x9000,
Dadd = 0xa000,
Bit = 0xb000,
Bic = 0xc000,
Bis = 0xd000,
Xor = 0xe000,
And = 0xf000,
}
impl Opcode {
/// Resolve an Opcode into an [Opcode] and an [EncodingParser]
pub fn resolve(self) -> (Opcode, EncodingParser) {
use super::Encoding as Enc;
use Register as Reg;
use {PrimaryOperand as Src, SecondaryOperand as Dst};
match self {
Self::Rrc | Self::Rra | Self::Push => (self, Enc::single().end()),
// these instructions do not take a width specifier (though they may still behave properly)
Self::Swpb | Self::Sxt | Self::Call => (self, Enc::single().width(false).end()),
// `reti` does not take any operands.
Self::Reti => (self, Enc::single().operand(Src::Direct(Reg::pc)).end()),
Self::Jnz | Self::Jz | Self::Jnc | Self::Jc | Self::Jn | Self::Jge | Self::Jl | Self::Jmp => {
(self, Enc::jump().end())
}
Self::Mov
| Self::Add
| Self::Addc
| Self::Subc
| Self::Sub
| Self::Cmp
| Self::Dadd
| Self::Bit
| Self::Bic
| Self::Bis
| Self::Xor
| Self::And => (self, Enc::double().end()),
Self::Nop => (Self::Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()),
Self::Pop => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).end()),
Self::Br => (Self::Mov, Enc::double().dst(Dst::Direct(Reg::pc)).end()),
Self::Ret => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).dst(Dst::Direct(Reg::pc)).end()),
Self::Clrc => (Self::Bic, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
Self::Setc => (Self::Bis, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
Self::Clrz => (Self::Bic, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
Self::Setz => (Self::Bis, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
Self::Clrn => (Self::Bic, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
Self::Setn => (Self::Bis, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
Self::Dint => (Self::Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
Self::Eint => (Self::Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
Self::Rla => (Self::Add, Enc::reflexive().end()),
Self::Rlc => (Self::Addc, Enc::reflexive().end()),
Self::Inv => (Self::Xor, Enc::double().src(Src::MinusOne).end()),
Self::Clr => (Self::Mov, Enc::double().src(Src::Zero).end()),
Self::Tst => (Self::Cmp, Enc::double().src(Src::Zero).end()),
Self::Dec => (Self::Sub, Enc::double().src(Src::One).end()),
Self::Decd => (Self::Sub, Enc::double().src(Src::Two).end()),
Self::Inc => (Self::Add, Enc::double().src(Src::One).end()),
Self::Incd => (Self::Add, Enc::double().src(Src::Two).end()),
Self::Adc => (Self::Addc, Enc::double().src(Src::Zero).end()),
Self::Dadc => (Self::Dadd, Enc::double().src(Src::Zero).end()),
Self::Sbc => (Self::Subc, Enc::double().src(Src::Zero).end()),
}
}
}
impl Parsable for Opcode {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
// TODO: Reintroduce error context
stream.expect(Type::Insn)?.parse()
}
}
impl FromStr for Opcode {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
//TODO: Reduce allocations here?
let s = s.to_ascii_lowercase();
Ok(match s.as_str() {
"rrc" => Self::Rrc,
"swpb" => Self::Swpb,
"rra" => Self::Rra,
"sxt" => Self::Sxt,
"push" => Self::Push,
"call" => Self::Call,
"reti" => Self::Reti,
"jne" | "jnz" => Self::Jnz,
"jeq" | "jz" => Self::Jz,
"jnc" | "jlo" => Self::Jnc,
"jc" | "jhs" => Self::Jc,
"jn" => Self::Jn,
"jge" => Self::Jge,
"jl" => Self::Jl,
"jmp" => Self::Jmp,
"mov" => Self::Mov,
"add" => Self::Add,
"addc" => Self::Addc,
"subc" => Self::Subc,
"sub" => Self::Sub,
"cmp" => Self::Cmp,
"dadd" => Self::Dadd,
"bit" => Self::Bit,
"bic" => Self::Bic,
"bis" => Self::Bis,
"xor" => Self::Xor,
"and" => Self::And,
"nop" => Self::Nop,
"pop" => Self::Pop,
"br" => Self::Br,
"ret" => Self::Ret,
"clrc" => Self::Clrc,
"setc" => Self::Setc,
"clrz" => Self::Clrz,
"setz" => Self::Setz,
"clrn" => Self::Clrn,
"setn" => Self::Setn,
"dint" => Self::Dint,
"eint" => Self::Eint,
"rla" => Self::Rla,
"rlc" => Self::Rlc,
"inv" => Self::Inv,
"clr" => Self::Clr,
"tst" => Self::Tst,
"dec" => Self::Dec,
"decd" => Self::Decd,
"inc" => Self::Inc,
"incd" => Self::Incd,
"adc" => Self::Adc,
"dadc" => Self::Dadc,
"sbc" => Self::Sbc,
_ => Err(ParseError::UnrecognizedOpcode(s))?,
})
}
}
impl Display for Opcode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
Self::Nop => "nop",
Self::Pop => "pop",
Self::Br => "br",
Self::Ret => "ret",
Self::Clrc => "clrc",
Self::Setc => "setc",
Self::Clrz => "clrz",
Self::Setz => "setz",
Self::Clrn => "clrn",
Self::Setn => "setn",
Self::Dint => "dint",
Self::Eint => "eint",
Self::Rla => "rla",
Self::Rlc => "rlc",
Self::Inv => "inv",
Self::Clr => "clr",
Self::Tst => "tst",
Self::Dec => "dec",
Self::Decd => "decd",
Self::Inc => "inc",
Self::Incd => "incd",
Self::Adc => "adc",
Self::Dadc => "dadc",
Self::Sbc => "sbc",
Self::Rrc => "rrc",
Self::Swpb => "swpb",
Self::Rra => "rra",
Self::Sxt => "sxt",
Self::Push => "push",
Self::Call => "call",
Self::Reti => "reti",
Self::Jnz => "jnz",
Self::Jz => "jz",
Self::Jnc => "jnc",
Self::Jc => "jc",
Self::Jn => "jn",
Self::Jge => "jge",
Self::Jl => "jl",
Self::Jmp => "jmp",
Self::Mov => "mov",
Self::Add => "add",
Self::Addc => "addc",
Self::Subc => "subc",
Self::Sub => "sub",
Self::Cmp => "cmp",
Self::Dadd => "dadd",
Self::Bit => "bit",
Self::Bic => "bic",
Self::Bis => "bis",
Self::Xor => "xor",
Self::And => "and",
}
)
}
}

View File

@@ -1,21 +0,0 @@
// © 2023 John Breaux
//! The definition of a label
use super::*;
/// The definition of a label
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Label(pub Identifier);
impl Parsable for Label {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(Self(Identifier::parse(p, stream).and_then(|t| {
stream.require(Type::Label)?;
Ok(t)
})?))
}
}
impl Display for Label {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) }
}

View File

@@ -1,72 +0,0 @@
// © 2023 John Breaux
//! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same
//! document line will be treated as if they took up multiple [`Line`s](Line).
//!
//! A line contains one of:
//! - [`Label`]
//! - [`Instruction`]
//! - [`Directive`]
//! - [`Comment`]
//! - [Nothing](Line::Empty)
use super::*;
/// A line contains any one of:
/// - [`Label`] (definition)
/// - [`Instruction`]
/// - [`Directive`]
/// - [`Comment`]
/// - Nothing at all
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Line {
Empty,
Insn(Instruction),
Comment(Comment),
Directive(Directive),
Label(Label),
EndOfFile, // Expected end of file
}
impl Parsable for Line {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(
match stream
.peek_expect_any_of([
Type::Endl,
Type::Insn,
Type::Comment,
Type::Directive,
Type::Identifier,
Type::EndOfFile,
])?
.variant()
{
Type::Endl => {
stream.next();
Self::Empty
}
Type::Insn => Self::Insn(Instruction::parse(p, stream)?),
Type::Comment => Self::Comment(Comment::parse(p, stream)?),
Type::Directive => Self::Directive(Directive::parse(p, stream)?),
Type::Identifier => Self::Label(Label::parse(p, stream)?),
Type::EndOfFile => {
stream.next();
Self::EndOfFile
}
_ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"),
},
)
}
}
impl Display for Line {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Empty => writeln!(f, "\n"),
Self::Label(arg0) => Display::fmt(arg0, f),
Self::Insn(arg0) => Display::fmt(arg0, f),
Self::Directive(arg0) => Display::fmt(arg0, f),
Self::Comment(arg0) => Display::fmt(arg0, f),
Self::EndOfFile => write!(f, "; End of file."),
}
}
}

View File

@@ -1,85 +0,0 @@
// © 2023 John Breaux
//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html)
use super::*;
/// Parses tokens from [stream](TokenStream) into Self node
pub trait Parsable {
/// Parses tokens from [TokenStream](TokenStream) into Self nodes
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where
Self: Sized,
T: TokenStream<'text>;
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
///
/// Masks failed expectations.
fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, ParseError>
where
Self: Sized,
T: TokenStream<'text>,
{
match Self::parse(p, stream) {
Ok(some) => Ok(Some(some)),
Err(ParseError::LexError(_)) => Ok(None),
Err(e) => Err(e),
}
}
fn parse_and<'text, T, R>(
p: &Parser,
stream: &mut T,
f: fn(p: &Parser, &mut T) -> R,
) -> Result<(Self, R), ParseError>
where
Self: Sized,
T: TokenStream<'text>,
{
Ok((Self::parse(p, stream)?, f(p, stream)))
}
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
///
/// Returns [`Self::default()`](Default::default()) on error
fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self
where
Self: Sized + Default,
T: TokenStream<'text>,
{
Self::parse(p, stream).unwrap_or_default()
}
}
macro_rules! parsable_str_types {
($($t:ty),*$(,)?) => {$(
impl Parsable for $t {
fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into())
}
}
)*};
}
use std::{path::PathBuf, rc::Rc};
parsable_str_types![String, Rc<str>, Box<str>, PathBuf];
/// Vectors of arbitrary parsables are cool
impl<P: Parsable> Parsable for Vec<P> {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
// [dead beef]
// [A, B,]
// [c d e f]
// [ something
// else ]
stream.require(Type::LBracket)?;
stream.allow(Type::Endl);
let mut out = vec![];
while let Some(t) = P::try_parse(p, stream)? {
out.push(t);
stream.allow(Type::Separator);
stream.allow(Type::Endl);
}
stream.require(Type::RBracket)?;
Ok(out)
}
}

View File

@@ -1,51 +0,0 @@
use std::path::{Path, PathBuf};
// © 2023 John Breaux
use super::*;
/// Contains the entire AST
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Root(Option<PathBuf>, Vec<(usize, Line)>);
// pub struct Root { pub path: PathBuf, pub lines: Vec<Line> }
impl Root {
pub fn file(&self) -> Option<&Path> { self.0.as_deref() }
pub(crate) fn set_file(mut self, path: PathBuf) -> Self {
self.0 = Some(path);
self
}
pub fn lines(&self) -> &[(usize, Line)] { &self.1 }
}
impl Parsable for Root {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let mut lines = vec![];
loop {
let number = stream.context().line();
match Line::parse(p, stream)? {
Line::EndOfFile => break,
line => lines.push((number, line)),
}
}
Ok(Root(None, lines))
}
}
impl Display for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (num, line) in &self.1 {
f.pad(&format!("{num:3}: {line} "))?;
}
Ok(())
}
}
impl Debug for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for line in self.0.iter() {
Debug::fmt(line, f)?;
}
Ok(())
}
}

256
src/parser/tests.rs Normal file
View File

@@ -0,0 +1,256 @@
#![allow(non_upper_case_globals)]
use super::*;
use crate::lexer::token;
/// Because [assert_matches](core::assert_matches::assert_matches) is unstable
macro_rules! assert_matches {
($e: expr, $($p: pat $(if $condition:expr)?)* ) => {
match $e {
$($p $(if $condition)? => (),)*
_ => panic!("{}", stringify!($e did not match $($p),*)),
}
};
}
/// Simplified grammar for constructing an expression
macro_rules! expr {
($ident:ident) => {
Expr::Ident(stringify!($ident)).into()
};
($lit:literal) => {
Expr::Number($lit).into()
};
(& $lit:literal) => {
Expr::AddrOf($lit).into()
};
(($($t:tt)*)) => {
Expr::Group(expr!($($t)*))
};
([$($op:tt)*] $($t:tt)*) => {
Expr::Unary(vec![$(UnOp::$op),*], expr!($($t)*))
};
(($($a:tt)*) $($op:tt ($($b:tt)*))+) => {
Expr::Binary(expr!($($a)*), vec![$((BinOp::$op, expr!($($b)*))),+])
}
}
macro_rules! passert {
($expected:expr, $text:literal) => {
assert_eq!($expected, Parsable::parse($text).unwrap())
};
}
// #[test]
// fn statements() {
// passert!(, "");
// }
// #[test]
// fn statement() {
// passert!(, "");
// }
#[test]
fn directive() {
passert!(Directive::Org(expr!(0x8000)), ".org 0x8000");
passert!(Directive::String("Hello, world!"), ".string \"Hello, world!\"");
assert_eq!(Directive::parse(".word 0x40").unwrap(), Directive::Word(expr!(0x40)));
passert!(
Directive::Words(vec![expr!(0x40), expr!(0x41), expr!(0x42), expr!(0x43)]),
".words [ 0x40 0x41 0x42 0x43 ]"
);
}
// #[test]
// fn instruction() {
// passert!(, "");
// }
#[test]
fn instruction_kind() {
assert_matches!(Parsable::parse("nop").unwrap(), InstructionKind::NoEm(NoEm { .. }));
assert_matches!(Parsable::parse("pop sp").unwrap(), InstructionKind::OneEm(OneEm { .. }));
}
#[test]
fn no_em() {
passert!(NoEm { opcode: token::NoEm::Nop }, "nop");
passert!(NoEm { opcode: token::NoEm::Ret }, "ret");
passert!(NoEm { opcode: token::NoEm::Clrc }, "clrc");
passert!(NoEm { opcode: token::NoEm::Clrz }, "clrz");
passert!(NoEm { opcode: token::NoEm::Clrn }, "clrn");
passert!(NoEm { opcode: token::NoEm::Setc }, "setc");
passert!(NoEm { opcode: token::NoEm::Setz }, "setz");
passert!(NoEm { opcode: token::NoEm::Setn }, "setn");
passert!(NoEm { opcode: token::NoEm::Dint }, "dint");
passert!(NoEm { opcode: token::NoEm::Eint }, "eint");
}
#[test]
fn one_em() {
const dst: Dst = Dst::Direct(Reg::R15);
let width = Width::Word;
passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop r15");
passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla r15");
passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc r15");
passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv r15");
passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr r15");
passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst r15");
passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec r15");
passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd r15");
passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc r15");
passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd r15");
passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc r15");
passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc r15");
passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc r15");
let width = Width::Byte;
passert!(OneEm { opcode: token::OneEm::Pop, width, dst }, "pop.b r15");
passert!(OneEm { opcode: token::OneEm::Rla, width, dst }, "rla.b r15");
passert!(OneEm { opcode: token::OneEm::Rlc, width, dst }, "rlc.b r15");
passert!(OneEm { opcode: token::OneEm::Inv, width, dst }, "inv.b r15");
passert!(OneEm { opcode: token::OneEm::Clr, width, dst }, "clr.b r15");
passert!(OneEm { opcode: token::OneEm::Tst, width, dst }, "tst.b r15");
passert!(OneEm { opcode: token::OneEm::Dec, width, dst }, "dec.b r15");
passert!(OneEm { opcode: token::OneEm::Decd, width, dst }, "decd.b r15");
passert!(OneEm { opcode: token::OneEm::Inc, width, dst }, "inc.b r15");
passert!(OneEm { opcode: token::OneEm::Incd, width, dst }, "incd.b r15");
passert!(OneEm { opcode: token::OneEm::Adc, width, dst }, "adc.b r15");
passert!(OneEm { opcode: token::OneEm::Dadc, width, dst }, "dadc.b r15");
passert!(OneEm { opcode: token::OneEm::Sbc, width, dst }, "sbc.b r15");
}
#[test]
fn one_arg() {
const src: Src = Src::Direct(Reg::PC);
let width = Width::Word;
passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc pc");
passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb pc");
passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra pc");
passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt pc");
passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push pc");
passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call pc");
let width = Width::Byte;
passert!(OneArg { opcode: token::OneArg::Rrc, width, src }, "rrc.b pc");
passert!(OneArg { opcode: token::OneArg::Swpb, width, src }, "swpb.b pc");
passert!(OneArg { opcode: token::OneArg::Rra, width, src }, "rra.b pc");
passert!(OneArg { opcode: token::OneArg::Sxt, width, src }, "sxt.b pc");
passert!(OneArg { opcode: token::OneArg::Push, width, src }, "push.b pc");
passert!(OneArg { opcode: token::OneArg::Call, width, src }, "call.b pc");
}
#[test]
fn two_arg() {
const src: Src = Src::Direct(Reg::R14);
const dst: Dst = Dst::Direct(Reg::R15);
let width = Width::Word;
passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor r14, r15");
passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and r14, r15");
let width = Width::Byte;
passert!(TwoArg { opcode: token::TwoArg::Mov, width, src, dst }, "mov.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Add, width, src, dst }, "add.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Addc, width, src, dst }, "addc.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Subc, width, src, dst }, "subc.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Sub, width, src, dst }, "sub.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Cmp, width, src, dst }, "cmp.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Dadd, width, src, dst }, "dadd.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Bit, width, src, dst }, "bit.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Bic, width, src, dst }, "bic.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Bis, width, src, dst }, "bis.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::Xor, width, src, dst }, "xor.b r14, r15");
passert!(TwoArg { opcode: token::TwoArg::And, width, src, dst }, "and.b r14, r15");
}
#[test]
fn jump() {
const dst100: JumpDst = JumpDst::Rel(100);
passert!(Jump { opcode: token::Jump::Jne, dst: dst100 }, "jne 100");
passert!(Jump { opcode: token::Jump::Jnz, dst: dst100 }, "jnz 100");
passert!(Jump { opcode: token::Jump::Jeq, dst: dst100 }, "jeq 100");
passert!(Jump { opcode: token::Jump::Jz, dst: dst100 }, "jz 100");
passert!(Jump { opcode: token::Jump::Jnc, dst: dst100 }, "jnc 100");
passert!(Jump { opcode: token::Jump::Jlo, dst: dst100 }, "jlo 100");
passert!(Jump { opcode: token::Jump::Jc, dst: dst100 }, "jc 100");
passert!(Jump { opcode: token::Jump::Jhs, dst: dst100 }, "jhs 100");
passert!(Jump { opcode: token::Jump::Jn, dst: dst100 }, "jn 100");
passert!(Jump { opcode: token::Jump::Jge, dst: dst100 }, "jge 100");
passert!(Jump { opcode: token::Jump::Jl, dst: dst100 }, "jl 100");
passert!(Jump { opcode: token::Jump::Jmp, dst: dst100 }, "jmp 100");
}
#[test]
fn reti() {
passert!(Reti, "reti");
}
#[test]
fn br() {
passert!(Br { src: Src::Direct(Reg::R15) }, "br r15");
}
#[test]
fn width() {
passert!(Width::Byte, ".b");
passert!(Width::Word, ".w");
passert!(Width::Word, "");
}
#[test]
fn src() {
passert!(Src::Direct(Reg::R15), "r15");
passert!(Src::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)");
passert!(Src::Indirect(Reg::R15), "@r15");
passert!(Src::PostInc(Reg::R15), "@r15+");
passert!(Src::Absolute(expr!(0x1000)), "&0x1000");
passert!(Src::Immediate(expr!(0x1000)), "#0x1000");
passert!(Src::BareExpr(expr!(foo)), "foo");
}
#[test]
fn dst() {
passert!(Dst::Direct(Reg::R15), "r15");
passert!(Dst::Indexed(expr!(0x1000), Reg::R15), "0x1000(r15)");
passert!(Dst::Absolute(expr!(0x1000)), "&0x1000");
passert!(Dst::Special(DstSpecial::Zero), "#0");
passert!(Dst::Special(DstSpecial::One), "#1");
}
#[test]
fn jump_dst() {
passert!(JumpDst::Rel(100), "100");
passert!(JumpDst::Rel(-100), "-100");
passert!(JumpDst::Label("foo"), "foo");
}
#[test]
fn expr() {
// Terms=
passert!(expr!((1) Mul(2) Rem(3) Div(4)), "1 * 2 % 3 / 4");
// Factors
passert!(expr!((1) Add(2) Sub(3)), "1 + 2 - 3");
// Shift
passert!(expr!((1) Lsh(2) Rsh(3)), "1 << 2 >> 3");
// Bitwise logic
passert!(expr!((1) And(2) Or(3) Xor(4)), "1 & 2 | 3 ^ 4");
// Unary
passert!(expr!([Deref Neg Not] 1), "*-!1");
// Number
passert!(Expr::Number(42), "42");
// Identifier
passert!(Expr::Ident("foo"), "foo");
// Addrof
passert!(Expr::AddrOf("bar"), "&bar");
// Group
passert!(expr!((42)), "(42)");
// All of the above
passert!(
expr!(
(4) Mul(
(3) Add(
(2) Lsh(
(1) And([Neg] 1)
) Rsh([Deref] 2)
) Add([Not] 3)
) Mul(4)
),
"4 * 3 + 2 << 1 & -1 >> *2 + !3 * 4"
);
}

89
src/preprocessor.rs Normal file
View File

@@ -0,0 +1,89 @@
// © 2023-2024 John Breaux
//See LICENSE.md for license
//! Preprocesses a token stream by removing and replacing tokens according to `.define` directives
use crate::{
lexer::{
token::{Token, TokenKind as Kind},
Lexer,
},
span::Span,
};
use std::collections::{HashMap, VecDeque};
#[derive(Clone, Debug)]
pub struct Preprocessor<'t> {
lexer: Lexer<'t>,
buf: VecDeque<Token<'t>>,
defn: HashMap<&'t str, Vec<Token<'t>>>,
/// Location for injected tokens
pos: Span<usize>,
}
impl<'t> Preprocessor<'t> {
pub fn new(text: &'t str) -> Self {
Self {
lexer: Lexer::new(text),
buf: Default::default(),
defn: Default::default(),
pos: Default::default(),
}
}
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
Self { lexer, buf: Default::default(), defn: Default::default(), pos: Default::default() }
}
pub fn scan(&mut self) -> Option<Token<'t>> {
self.buf.pop_front().or_else(|| self.next()).inspect(|t| self.pos = t.pos)
}
pub fn start(&self) -> usize {
self.lexer.location()
}
/// Grabs a token from the lexer, and attempts to match its lexeme
fn next(&mut self) -> Option<Token<'t>> {
let token = self.lexer.scan()?;
if let Some(tokens) = self.defn.get(token.lexeme) {
self.buf.extend(tokens.iter().copied().map(|mut t| {
t.pos = self.pos;
t
}));
return self.scan();
} else {
match token.kind {
Kind::Directive => self.directive(token),
Kind::Newline => return self.scan(),
_ => {}
}
Some(token)
}
}
/// Passes a token through while parsing a directive
fn tee(&mut self) -> Option<Token<'t>> {
let token = self.lexer.scan()?;
self.buf.push_back(token);
// self.buf.push_back(token);
Some(token)
}
/// Parses and executes a directive
pub fn directive(&mut self, token: Token<'t>) {
if ".define" == token.lexeme {
self.define()
}
}
pub fn define(&mut self) {
let Some(key) = self.tee() else {
return;
};
let mut value = vec![];
while let Some(token) = self.tee() {
match token.kind {
Kind::Comment => {
self.buf.push_back(token);
break;
}
Kind::Newline => break,
_ => value.push(token),
}
}
self.defn.insert(key.lexeme, value);
}
}

45
src/span.rs Normal file
View File

@@ -0,0 +1,45 @@
// © 2023-2024 John Breaux
//See LICENSE.md for license
//! A [Span] is a [Range] that does not implement [Iterator]. It is a [Copy] type.
use std::{
fmt::{Debug, Display},
ops::{Index, Range},
};
/// A <code> [Clone] + [Copy] + [!Iterator](Iterator) </code> version of a [Range]
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Span<Idx> {
pub start: Idx,
pub end: Idx,
}
impl<Idx> From<Span<Idx>> for Range<Idx> {
fn from(value: Span<Idx>) -> Self {
value.start..value.end
}
}
impl<Idx> From<Range<Idx>> for Span<Idx> {
fn from(value: Range<Idx>) -> Self {
Self { start: value.start, end: value.end }
}
}
impl<T> Index<Span<usize>> for [T] {
type Output = [T];
fn index(&self, index: Span<usize>) -> &Self::Output {
self.index(Range::from(index))
}
}
impl Index<Span<usize>> for str {
type Output = str;
fn index(&self, index: Span<usize>) -> &Self::Output {
self.index(Range::from(index))
}
}
impl<Idx: Debug> Debug for Span<Idx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}..{:?}", self.start, self.end)
}
}
impl<Idx: Display> Display for Span<Idx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}..{}", self.start, self.end)
}
}