v0.3.0: Total overhaul

- Everything has been rewritten
- Modularity is improved somewhat
  - No dependency injection in preprocessor/parser, though
- There are now early and late constant evaluation engines
  - This engine allows for by-value access to already-assembled code
  - Performs basic math operations, remainder, bitwise logic, bit shifts, negation, and bit inversion
  - Also allows for indexing into already-generated code using pointer-arithmetic syntax: `*(&main + 10)`. This is subject to change? It's clunky, and only allows word-aligned access. However, this rewrite is taking far too long, so I'll call the bikeshedding here.
  - Pretty sure this constant evaluation is computationally equivalent to Deadfish?
This commit is contained in:
John 2024-01-30 05:27:12 -06:00
parent e4a1b889c2
commit fc8f8b9622
44 changed files with 3119 additions and 3055 deletions

View File

@ -1,12 +1,12 @@
unstable_features = true
max_width = 120
max_width = 100
wrap_comments = true
comment_width = 100
# Allow structs to fill an entire line
use_small_heuristics = "Max"
# Allow small functions on single line
fn_single_line = true
# fn_single_line = true
# Alignment
enum_discrim_align_threshold = 12

View File

@ -1,23 +1,24 @@
[package]
name = "msp430-asm"
version = "0.2.0"
edition = "2021"
rust-version = "1.70"
[workspace]
members = ["msp430-asm"]
# default-members = ["msp430-asm"]
[workspace.package]
authors = ["John Breaux <j@soft.fish>"]
version = "0.3.0"
license = "MIT"
edition = "2021"
publish = false
[features]
default = []
[package]
name = "libmsp430"
authors.workspace = true
version.workspace = true
license.workspace = true
edition.workspace = true
publish.workspace = true
[[example]]
name = "msp430-asm"
path = "examples/msp430-asm/main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
regex = "1.9.3"
# TODO: Remove dependency on regex
[dev-dependencies]
anes = { version = "0.1.6" }
argp = { version = "0.3.0" }
# Provides very quick boolean tests for XID_START and XID_CONTINUE
unicode-ident = "1.0.12"

9
LICENSE.md Normal file
View File

@ -0,0 +1,9 @@
The MIT License (MIT)
Copyright © 2023-2024 John Breaux
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1,222 +1,224 @@
//! TODO: rewrite for v0.3.0
use super::*;
/// Creates a [Parsable] implementation for an enum whose variants
/// are named after other [Parsable] items
macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) {
$( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* }
impl ::msp430_asm::parser::parsable::Parsable for $id {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
$(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )*
{ Err(ParseError::UnrecognizedDirective("".into())) }
}
}
impl TryFrom<&str> for $id {
type Error = ParseError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed())
}
}
}
make_parsable! {
#[derive(Debug)]
pub enum SyntaxFragment {
Opcode,
PrimaryOperand,
Number,
}
}
// /// Creates a [Parsable] implementation for an enum whose variants
// /// are named after other [Parsable] items
// macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) {
// $( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* }
// impl ::msp430_asm::parser::parsable::Parsable for $id {
// fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
// where T: TokenStream<'text> {
// $(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )*
// { Err(ParseError::UnrecognizedDirective("".into())) }
// }
// }
// impl TryFrom<&str> for $id {
// type Error = ParseError;
// fn try_from(value: &str) -> Result<Self, Self::Error> {
// Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed())
// }
// }
// }
impl SyntaxFragment {
pub fn info(&self) {
match self {
SyntaxFragment::Opcode(o) => Self::opcode_info(o),
SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o),
SyntaxFragment::Number(n) => println!("The number {n}"),
}
}
fn opcode_info(o: &Opcode) {
let (desc, as_rust) = usage(o);
println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o));
footer!("https://mspgcc.sourceforge.net/manual/x223.html");
}
// TODO: re-enable full instruction decoding
// fn encoding_info(e: &Encoding) {
// match e {
// Encoding::Single { dst, .. } => Self::operand_info(dst),
// Encoding::Jump { target } => println!("Jumps to (pc + {target})"),
// Encoding::Double { src, dst, .. } => {
// Self::operand_info(src);
// Self::operand_info(&dst.clone().into())
// }
// }
// }
fn operand_info(o: &PrimaryOperand) {
match o {
PrimaryOperand::Direct(r) => Self::register_info(r),
PrimaryOperand::Indirect(r) => {
Self::register_info(r);
println!("Indirect addressing mode: use data pointed to by {r}");
}
PrimaryOperand::PostInc(r) => {
Self::register_info(r);
println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}");
}
PrimaryOperand::Indexed(r, n) => {
Self::register_info(r);
println!("Indexed mode: use the data at {r}[{n}]");
}
PrimaryOperand::Relative(_) => return,
PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"),
PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"),
PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"),
PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"),
PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"),
PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"),
PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"),
PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"),
}
footer!("https://mspgcc.sourceforge.net/manual/x82.html");
}
fn register_info(r: &Register) {
use Register as Re;
match r {
Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."),
Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."),
Re::sr => println!(
"sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC",
),
Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."),
Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => {
println!("{r} is a callee-saved general purpose register.")
}
Re::r12 | Re::r13 | Re::r14 | Re::r15 => {
println!("{r} is a caller-saved general purpose register, allowed for return values.")
}
}
}
}
// make_parsable! {
// #[derive(Debug)]
// pub enum SyntaxFragment {
// Opcode,
// PrimaryOperand,
// Number,
// }
// }
// Gets parameter usage information from the opcode's EncodingParser
pub fn params(opcode: &Opcode) -> &'static str {
match opcode.resolve().1 {
EncodingParser::Jump { target: None } => " target (relative address or label)",
EncodingParser::Single { width: None, dst: None } => "[.b] dst",
EncodingParser::Single { dst: None, .. } => " dst",
EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst",
EncodingParser::Double { src: None, .. } => "[.b] src",
EncodingParser::Double { dst: None, .. } => "[.b] dst",
EncodingParser::Double { .. } => "[.b]",
EncodingParser::Reflexive { reg: None, .. } => "[.b] dst",
_ => "",
}
}
// impl SyntaxFragment {
// pub fn info(&self) {
// match self {
// SyntaxFragment::Opcode(o) => Self::opcode_info(o),
// SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o),
// SyntaxFragment::Number(n) => println!("The number {n}"),
// }
// }
// fn opcode_info(o: &Opcode) {
// let (desc, as_rust) = usage(o);
// println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o));
// footer!("https://mspgcc.sourceforge.net/manual/x223.html");
// }
// // TODO: re-enable full instruction decoding
// // fn encoding_info(e: &Encoding) {
// // match e {
// // Encoding::Single { dst, .. } => Self::operand_info(dst),
// // Encoding::Jump { target } => println!("Jumps to (pc + {target})"),
// // Encoding::Double { src, dst, .. } => {
// // Self::operand_info(src);
// // Self::operand_info(&dst.clone().into())
// // }
// // }
// // }
// fn operand_info(o: &PrimaryOperand) {
// match o {
// PrimaryOperand::Direct(r) => Self::register_info(r),
// PrimaryOperand::Indirect(r) => {
// Self::register_info(r);
// println!("Indirect addressing mode: use data pointed to by {r}");
// }
// PrimaryOperand::PostInc(r) => {
// Self::register_info(r);
// println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}");
// }
// PrimaryOperand::Indexed(r, n) => {
// Self::register_info(r);
// println!("Indexed mode: use the data at {r}[{n}]");
// }
// PrimaryOperand::Relative(_) => return,
// PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"),
// PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"),
// PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"),
// PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"),
// PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"),
// PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"),
// PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"),
// PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"),
// }
// footer!("https://mspgcc.sourceforge.net/manual/x82.html");
// }
// fn register_info(r: &Register) {
// use Register as Re;
// match r {
// Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."),
// Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."),
// Re::sr => println!(
// "sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC",
// ),
// Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."),
// Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => {
// println!("{r} is a callee-saved general purpose register.")
// }
// Re::r12 | Re::r13 | Re::r14 | Re::r15 => {
// println!("{r} is a caller-saved general purpose register, allowed for return values.")
// }
// }
// }
// }
pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) {
match opcode {
// Single
Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"),
Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"),
Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"),
Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"),
Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"),
Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"),
Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"),
// Jump
Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"),
Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"),
Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"),
Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"),
Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"),
Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"),
Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"),
Opcode::Jmp => ("Jump unconditionally", "pc += target"),
// Double
Opcode::Mov => ("Copy src into dst", "dst = src"),
Opcode::Add => ("Add src to dst", "dst += src"),
Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"),
Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"),
Opcode::Sub => ("Subtract src from dst", "dst -= src"),
Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"),
Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"),
Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"),
Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"),
Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"),
Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"),
Opcode::And => ("Bitwise And src into dst", "dst &= src"),
// Emulated
Opcode::Nop => ("Does nothing", "{}"),
Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"),
Opcode::Br => ("Branches to the absolute address in src", "pc = src"),
Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"),
Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"),
Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"),
Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"),
Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"),
Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"),
Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"),
Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"),
Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"),
Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"),
Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"),
Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"),
Opcode::Clr => ("Sets dst to 0", "dst = 0"),
Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""),
Opcode::Dec => ("Decrements dst", "dst -= 1"),
Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"),
Opcode::Inc => ("Increments dst", "dst += 1"),
Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"),
Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"),
Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"),
Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"),
}
}
// // Gets parameter usage information from the opcode's EncodingParser
// pub fn params(opcode: &Opcode) -> &'static str {
// match opcode.resolve().1 {
// EncodingParser::Jump { target: None } => " target (relative address or label)",
// EncodingParser::Single { width: None, dst: None } => "[.b] dst",
// EncodingParser::Single { dst: None, .. } => " dst",
// EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst",
// EncodingParser::Double { src: None, .. } => "[.b] src",
// EncodingParser::Double { dst: None, .. } => "[.b] dst",
// EncodingParser::Double { .. } => "[.b]",
// EncodingParser::Reflexive { reg: None, .. } => "[.b] dst",
// _ => "",
// }
// }
const SINGLE: [Opcode; 7] =
[Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti];
// pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) {
// match opcode {
// // Single
// Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"),
// Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"),
// Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"),
// Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"),
// Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"),
// Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"),
// Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"),
// // Jump
// Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"),
// Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"),
// Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"),
// Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"),
// Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"),
// Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"),
// Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"),
// Opcode::Jmp => ("Jump unconditionally", "pc += target"),
// // Double
// Opcode::Mov => ("Copy src into dst", "dst = src"),
// Opcode::Add => ("Add src to dst", "dst += src"),
// Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"),
// Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"),
// Opcode::Sub => ("Subtract src from dst", "dst -= src"),
// Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"),
// Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"),
// Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"),
// Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"),
// Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"),
// Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"),
// Opcode::And => ("Bitwise And src into dst", "dst &= src"),
// // Emulated
// Opcode::Nop => ("Does nothing", "{}"),
// Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"),
// Opcode::Br => ("Branches to the absolute address in src", "pc = src"),
// Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"),
// Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"),
// Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"),
// Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"),
// Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"),
// Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"),
// Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"),
// Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"),
// Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"),
// Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"),
// Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"),
// Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"),
// Opcode::Clr => ("Sets dst to 0", "dst = 0"),
// Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""),
// Opcode::Dec => ("Decrements dst", "dst -= 1"),
// Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"),
// Opcode::Inc => ("Increments dst", "dst += 1"),
// Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"),
// Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"),
// Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"),
// Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"),
// }
// }
const JUMP: [Opcode; 8] =
[Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp];
// const SINGLE: [Opcode; 7] =
// [Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti];
#[rustfmt::skip]
const DOUBLE: [Opcode; 12] = [
Opcode::Mov, Opcode::Add, Opcode::Addc, Opcode::Subc, Opcode::Sub, Opcode::Cmp,
Opcode::Dadd, Opcode::Bit, Opcode::Bic, Opcode::Bis, Opcode::Xor, Opcode::And,
];
#[rustfmt::skip]
const SIMULATED: [Opcode; 24] = [
Opcode::Nop, Opcode::Pop, Opcode::Br, Opcode::Ret, Opcode::Clrc, Opcode::Setc,
Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint,
Opcode::Rla, Opcode::Rlc, Opcode::Inv, Opcode::Clr, Opcode::Tst, Opcode::Dec,
Opcode::Decd, Opcode::Inc, Opcode::Incd, Opcode::Adc, Opcode::Dadc, Opcode::Sbc,
];
// const JUMP: [Opcode; 8] =
// [Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp];
pub fn list_opcodes() {
let mut stdout = std::io::stdout().lock();
header!(stdout, "Single-operand instructions:");
let _ = write_opcode_list(&mut stdout, &SINGLE);
header!(stdout, "Relative Jump instructions:");
let _ = write_opcode_list(&mut stdout, &JUMP);
header!(stdout, "Double-operand instructions:");
let _ = write_opcode_list(&mut stdout, &DOUBLE);
header!(stdout, "Simulated instructions:");
let _ = write_opcode_list(&mut stdout, &SIMULATED);
}
// #[rustfmt::skip]
// const DOUBLE: [Opcode; 12] = [
// Opcode::Mov, Opcode::Add, Opcode::Addc, Opcode::Subc, Opcode::Sub, Opcode::Cmp,
// Opcode::Dadd, Opcode::Bit, Opcode::Bic, Opcode::Bis, Opcode::Xor, Opcode::And,
// ];
// #[rustfmt::skip]
// const SIMULATED: [Opcode; 24] = [
// Opcode::Nop, Opcode::Pop, Opcode::Br, Opcode::Ret, Opcode::Clrc, Opcode::Setc,
// Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint,
// Opcode::Rla, Opcode::Rlc, Opcode::Inv, Opcode::Clr, Opcode::Tst, Opcode::Dec,
// Opcode::Decd, Opcode::Inc, Opcode::Incd, Opcode::Adc, Opcode::Dadc, Opcode::Sbc,
// ];
fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> {
for (idx, opcode) in list.iter().enumerate() {
write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?;
}
if list.len() % 6 != 0 {
writeln!(f)?;
}
Ok(())
}
// pub fn list_opcodes() {
// let mut stdout = std::io::stdout().lock();
// header!(stdout, "Single-operand instructions:");
// let _ = write_opcode_list(&mut stdout, &SINGLE);
// header!(stdout, "Relative Jump instructions:");
// let _ = write_opcode_list(&mut stdout, &JUMP);
// header!(stdout, "Double-operand instructions:");
// let _ = write_opcode_list(&mut stdout, &DOUBLE);
// header!(stdout, "Simulated instructions:");
// let _ = write_opcode_list(&mut stdout, &SIMULATED);
// }
macro header ($f:ident, $($x: expr),+) {
{write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();}
}
macro footer ($($x: expr),+) {
{print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);}
}
// fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> {
// for (idx, opcode) in list.iter().enumerate() {
// write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?;
// }
// if list.len() % 6 != 0 {
// writeln!(f)?;
// }
// Ok(())
// }
// macro header ($f:ident, $($x: expr),+) {
// {write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();}
// }
// macro footer ($($x: expr),+) {
// {print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);}
// }

View File

@ -3,59 +3,63 @@
// https://mspgcc.sourceforge.net/manual/ln16.html
#![feature(decl_macro)]
use anes::{Color, ResetAttributes, SetForegroundColor};
use msp430_asm::parser::preamble::*;
use msp430_asm::preamble::*;
use std::{
error::Error,
io::{stdin, IsTerminal, Write},
};
type AsmResult<T> = Result<T, Box<dyn Error>>;
mod data;
fn main() -> AsmResult<()> {
if stdin().is_terminal() {
hello();
}
repl()
fn main() {
println!("Hello, world!")
}
fn hello() {
println!(
"{}{} v{}
This software contains instruction and register descriptions adapted from
the mspgcc project's fantastic documentation, which is licensed under the GPL.
https://mspgcc.sourceforge.net/manual/book1.html{}\n",
SetForegroundColor(Color::DarkGray),
env!("CARGO_BIN_NAME"),
env!("CARGO_PKG_VERSION"),
ResetAttributes
);
}
// use anes::{Color, ResetAttributes, SetForegroundColor};
// use msp430_asm::parser::preamble::*;
// use msp430_asm::preamble::*;
// use std::{
// error::Error,
// io::{stdin, IsTerminal, Write},
// };
fn repl() -> AsmResult<()> {
printflush!("> ");
let mut line = String::new();
while let Ok(len) = stdin().read_line(&mut line) {
match len {
0 => break, // No newline (reached EOF)
1 => (), // Line is empty
_ => {
if line.starts_with('?') || line.starts_with("help") {
data::list_opcodes()
} else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) {
sf.info();
}
}
}
printflush!("> ");
line.clear();
}
Ok(())
}
// type AsmResult<T> = Result<T, Box<dyn Error>>;
macro printflush ($($x: expr),+) {
{print!($($x),+); let _ = ::std::io::stdout().flush();}
}
// mod data;
// fn main() -> AsmResult<()> {
// if stdin().is_terminal() {
// hello();
// }
// repl()
// }
// fn hello() {
// println!(
// "{}{} v{}
// This software contains instruction and register descriptions adapted from
// the mspgcc project's fantastic documentation, which is licensed under the GPL.
// https://mspgcc.sourceforge.net/manual/book1.html{}\n",
// SetForegroundColor(Color::DarkGray),
// env!("CARGO_BIN_NAME"),
// env!("CARGO_PKG_VERSION"),
// ResetAttributes
// );
// }
// fn repl() -> AsmResult<()> {
// printflush!("> ");
// let mut line = String::new();
// while let Ok(len) = stdin().read_line(&mut line) {
// match len {
// 0 => break, // No newline (reached EOF)
// 1 => (), // Line is empty
// _ => {
// if line.starts_with('?') || line.starts_with("help") {
// data::list_opcodes()
// } else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) {
// sf.info();
// }
// }
// }
// printflush!("> ");
// line.clear();
// }
// Ok(())
// }
// macro printflush ($($x: expr),+) {
// {print!($($x),+); let _ = ::std::io::stdout().flush();}
// }

14
msp430-asm/Cargo.toml Normal file
View File

@ -0,0 +1,14 @@
[package]
name = "msp430-asm"
authors.workspace = true
version.workspace = true
license.workspace = true
edition.workspace = true
publish.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
libmsp430 = { path = ".." }
anes = { version = "0.2.0" }
argp = { version = "0.3.0" }

116
msp430-asm/src/lib.rs Normal file
View File

@ -0,0 +1,116 @@
//! Helper library for msp430-asm
#![feature(decl_macro)]
pub mod split_twice {
/// Slices a collection into a beginning, middle, and end, based on two unordered indices
pub trait SplitTwice<'t> {
type Slice;
type Idx;
/// Splits a collection into a beginning, middle, and end slice,
/// based on two unordered indices
///
/// # Examples
/// ```rust
/// # use msp430_asm::split_twice::SplitTwice;
/// let string = "foo,bar,baz";
/// let (foo, bar, baz) = string.split_twice(4, 8);
/// assert_eq!(foo, "foo,");
/// assert_eq!(bar, "bar,");
/// assert_eq!(baz, "baz");
/// ```
fn split_twice(
&'t self,
a: Self::Idx,
b: Self::Idx,
) -> (Self::Slice, Self::Slice, Self::Slice);
}
impl<'t, T: 't> SplitTwice<'t> for [T] {
type Slice = &'t [T];
type Idx = usize;
fn split_twice(
&'t self,
a: Self::Idx,
b: Self::Idx,
) -> (Self::Slice, Self::Slice, Self::Slice) {
let (a, b) = if a < b { (a, b) } else { (b, a) };
let (mid, end) =
if b < self.len() { self.split_at(b) } else { (self, Default::default()) };
let (start, mid) =
if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) };
(start, mid, end)
}
}
impl<'t> SplitTwice<'t> for str {
type Slice = &'t str;
type Idx = usize;
fn split_twice(
&'t self,
a: Self::Idx,
b: Self::Idx,
) -> (Self::Slice, Self::Slice, Self::Slice) {
let (a, b) = if a < b { (a, b) } else { (b, a) };
let (mid, end) =
if b < self.len() { self.split_at(b) } else { (self, Default::default()) };
let (start, mid) =
if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) };
(start, mid, end)
}
}
}
pub mod cursor {
use std::fmt::{Arguments, Display};
pub macro csi($($t:tt)*) {format_args!("\x1b[{}", format_args!($($t)*))}
pub macro color($fg:expr, $($t:tt)*) {
Colorized::new(Some($fg), None, format_args!($($t)*))
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Color {
#[default]
Black = 30,
Red,
Green,
Yellow,
Blue,
Magenta,
Cyan,
Gray,
DarkGray = 90,
Pink,
Lime,
Sunflower,
SkyBlue,
HotPink,
Turquoise,
White,
}
#[derive(Clone, Copy, Debug)]
pub struct Colorized<'args> {
fg: Option<Color>,
bg: Option<Color>,
args: Arguments<'args>,
}
impl<'t> Colorized<'t> {
pub fn new(fg: Option<Color>, bg: Option<Color>, args: Arguments<'t>) -> Self {
Self { fg, bg, args }
}
}
impl<'t> Display for Colorized<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let &Self { fg, bg, args } = self;
if let Some(fg) = fg {
write!(f, "{}", csi!("{}m", fg as u8))?;
}
if let Some(bg) = bg {
write!(f, "{}", csi!("{}m", bg as u8 + 10))?;
}
write!(f, "{args}{}", csi!("0m"))
}
}
}

View File

@ -1,7 +1,15 @@
//! Simple frontend for the assembler
#![feature(decl_macro)]
use argp::parse_args_or_exit;
use msp430_asm::preamble::*;
use libmsp430::{
assembler::Assemble,
parser::ast::{canonical::Canonicalize, *},
parser::{error::Error as PError, Parser},
};
use msp430_asm::{
cursor::{color, Color::*},
split_twice::SplitTwice,
};
use std::{
error::Error,
io::{stdin, IsTerminal, Read},
@ -36,21 +44,18 @@ mod args {
}
mod repl {
use super::*;
use anes::{Color, MoveCursorToPreviousLine, ResetAttributes, SetForegroundColor};
use msp430_asm::{
assembler::error::AssemblyError, error::Error as MspError, lexer::error::LexError, parser::error::ParseError,
};
use anes::MoveCursorToPreviousLine;
use std::io::{stderr, Write};
macro color ($color: expr, $fmt: literal, $($str: expr),*) {
format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)* ::anes::ResetAttributes)
}
// macro color ($color: expr, $fmt: literal, $($str: expr),*) {
// format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)*
// ::anes::ResetAttributes) }
macro linenr($n: expr) {
format_args!("{:4}: ", $n)
}
macro printflush ($($x: expr),+) {
macro printfl ($($x: expr),+) {
{print!($($x),+); let _ = ::std::io::stdout().flush();}
}
@ -62,13 +67,10 @@ mod repl {
let mut line = String::new();
let mut linenr = 1;
println!(
"{}{} v{}{}",
SetForegroundColor(Color::DarkGray),
env!("CARGO_BIN_NAME"),
env!("CARGO_PKG_VERSION"),
ResetAttributes
"{}",
color!(DarkGray, "{} v{}", env!("CARGO_BIN_NAME"), env!("CARGO_PKG_VERSION"))
);
printflush!("{}", linenr!(linenr));
printfl!("{}", linenr!(linenr));
while let Ok(len) = stdin().read_line(&mut line) {
match len {
0 => break, // No newline (reached EOF)
@ -76,8 +78,8 @@ mod repl {
_ => (),
}
// Try to parse this line in isolation (this restricts preprocessing)
match Parser::default().parse(&line) {
Err(error) => errpp(&line, linenr, &error.into()),
match Parser::new(&line).parse::<Statements>() {
Err(error) => errpp(&line, linenr, &error),
Ok(_) => {
okpp(&line, linenr);
*buf += &line;
@ -85,36 +87,29 @@ mod repl {
}
}
line.clear();
printflush!("{}", linenr!(linenr));
printfl!("{}", linenr!(linenr));
}
println!();
println!("{}", color!(Gray, "[EOF]"));
Ok(())
}
fn okpp(line: &str, linenr: i32) {
println!("{}{}{}", move_cursor!(1, 5), color!(Color::Green, "{:4}", linenr!(linenr)), line.trim_end(),);
println!(
"{}{}{}",
move_cursor!(1, 5),
color!(Green, "{:4}", linenr!(linenr)),
line.trim_end(),
);
}
/// Pretty-prints a line error
fn errpp(line: &str, linenr: i32, err: &msp430_asm::error::Error) {
fn errpp(line: &str, linenr: i32, err: &PError) {
let loc = err.loc;
if stderr().is_terminal() {
let line = line.trim_end();
eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Color::Red, "{}", linenr!(linenr)));
match err {
// TODO: use a recursive enum to store all valid states
MspError::LexError(LexError::Contextual(c, e))
| MspError::ParseError(ParseError::LexError(LexError::Contextual(c, e)))
| MspError::AssemblyError(AssemblyError::ParseError(ParseError::LexError(LexError::Contextual(
c,
e,
)))) => {
let (start, end) = line.split_at(c.position() - 1);
eprintln!("{start}{} ({e})", color!(Color::Red, "{}", end));
}
_ => {
eprintln!("{} ({err})", color!(Color::Red, "{}", line));
}
}
eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Red, "{}", linenr!(linenr)));
let (start, mid, end) = line.split_twice(loc.start, loc.end);
eprintln!("{start}{}{end} {}", color!(Red, "{}", mid), color!(DarkGray, "; {}", err));
} else {
eprintln!("{} ({err})", line.trim())
}
@ -123,7 +118,7 @@ mod repl {
// Parses and assembles a buffer, then prints it in hex to stdout
fn asm(buf: &str) -> Result<(), Box<dyn Error>> {
match Assembler::assemble(&Parser::default().parse(&buf)?) {
match Parser::new(buf).parse::<Statements>()?.to_canonical().assemble() {
Err(error) => println!("{error}"),
Ok(out) => {
for word in out {

View File

@ -9,11 +9,11 @@ jmp main
.string "ABA"
.string "ABAB"
.word 0b0101101001011010
.words [dead beef]
.words [0xdead 0xbeef 0x0000]
main:
; testing defines
.define asdfgh #1000
.define asdfgh #0x1000
.define qwerty @sp+
br asdfgh
mov qwerty, r15
@ -88,73 +88,74 @@ mov @r13+, r14
mov @r14+, r15
.define special r6
;mov @pc+, r15 ; This is how mov-immediate is encoded, and is not valid
;mov @sp+, r15 ; pop r15
;mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
;mov @cg+, r15
; mov , r14
; mov @pc+, r15 ; This is a mov-immediate, and may corrupt your output
mov @sp+, r15 ; pop r15
mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
mov @cg+, r15
indexed_mode:
.define numbered r7
mov.b 10(r0), r1
mov 10(r1), r2
mov.b 0x10(r0), r1
mov 0x10(r1), r2
;mov 10(r2), r3 ; Invalid: cannot index relative to sr
;mov 10(r3), r4 ; Invalid: cannot index relative to cg
mov 10(r4), r5
mov 10(r5), r6
mov 10(r6), r7
mov 10(r7), r8
mov 10(r8), r9
mov 10(r9), r10
mov 10(r10), r11
mov 10(r11), r12
mov 10(r12), r13
mov 10(r13), r14
mov 10(r14), r15
mov 0x10(r4), r5
mov 0x10(r5), r6
mov 0x10(r6), r7
mov 0x10(r7), r8
mov 0x10(r8), r9
mov 0x10(r9), r10
mov 0x10(r10), r11
mov 0x10(r11), r12
mov 0x10(r12), r13
mov 0x10(r13), r14
mov 0x10(r14), r15
.define special r8
mov 10(pc), r15
mov 10(sp), r15
mov 0x10(pc), r15
mov 0x10(sp), r15
;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
;mov 10(cg), r15
_immediate_mode:
.define numbered r9
mov #beef, r0
mov #beef, r1
mov #beef, r2
mov #beef, r3
mov #beef, r4
mov #beef, r5
mov #beef, r6
mov #beef, r7
mov #beef, r8
mov #beef, r9
mov #beef, r10
mov #beef, r11
mov #beef, r12
mov #beef, r13
mov #beef, r14
mov #beef, r15
mov #0xbeef, r0
mov #0xbeef, r1
mov #0xbeef, r2
mov #0xbeef, r3
mov #0xbeef, r4
mov #0xbeef, r5
mov #0xbeef, r6
mov #0xbeef, r7
mov #0xbeef, r8
mov #0xbeef, r9
mov #0xbeef, r10
mov #0xbeef, r11
mov #0xbeef, r12
mov #0xbeef, r13
mov #0xbeef, r14
mov #0xbeef, r15
.define special r10
mov #beef, pc
mov #beef, sp
mov #beef, sr
mov #beef, cg
mov #0xbeef, pc
mov #0xbeef, sp
mov #0xbeef, sr
mov #0xbeef, cg
jmp _register_mode
jmp 3fe
jmp -3fc
jmp 0x3fe
jmp -0x3fc
ret
; Funky encodings
mov r6, r4
mov @r6, r4
mov @r6+, r4
mov 0(r6), r4
mov 4141(r6), r4
mov 0x0(r6), r4
mov 0x4141(r6), r4
mov #-1, r4
mov #ffff, r4
mov #0xffff, r4
mov #0, r4
mov #1, r4
mov #2, r4
@ -164,33 +165,33 @@ mov r6, 0(r4)
mov @r6, 0(r4)
mov @r6+, 0(r4)
mov 0(r6), 0(r4)
mov 4141(r6), 0(r4)
mov 0x4141(r6), 0(r4)
mov #-1, 0(r4)
mov #ffff, 0(r4)
mov #0xffff, 0(r4)
mov #0, 0(r4)
mov #1, 0(r4)
mov #2, 0(r4)
mov #4, 0(r4)
mov #8, 0(r4)
mov r6, 4141(r4)
mov @r6, 4141(r4)
mov @r6+, 4141(r4)
mov 0(r6), 4141(r4)
mov 4141(r6), 4141(r4)
mov #-1, 4141(r4)
mov #ffff, 4141(r4)
mov #0, 4141(r4)
mov #1, 4141(r4)
mov #2, 4141(r4)
mov #4, 4141(r4)
mov #8, 4141(r4)
mov r6, 0x4141(r4)
mov @r6, 0x4141(r4)
mov @r6+, 0x4141(r4)
mov 0(r6), 0x4141(r4)
mov 0x4141(r6), 0x4141(r4)
mov #-1, 0x4141(r4)
mov #0xffff, 0x4141(r4)
mov #0, 0x4141(r4)
mov #1, 0x4141(r4)
mov #2, 0x4141(r4)
mov #4, 0x4141(r4)
mov #8, 0x4141(r4)
mov r6, #0
mov @r6, #0
mov @r6+, #0
mov 0(r6), #0
mov 4141(r6), #0
mov 0x4141(r6), #0
mov #-1, #0
mov #ffff, #0
mov #0xffff, #0
mov #0, #0
mov #1, #0
mov #2, #0
@ -200,9 +201,9 @@ mov r6, #1
mov @r6, #1
mov @r6+, #1
mov 0(r6), #1
mov 4141(r6), #1
mov 0x4141(r6), #1
mov #-1, #1
mov #ffff, #1
mov #0xffff, #1
mov #0, #1
mov #1, #1
mov #2, #1
@ -211,14 +212,14 @@ mov #8, #1
; Instruction exercise
; Jumps
jne 10
jeq 10
jlo 10
jhs 10
jn 10
jge 10
jl 10
jmp 10
jne 0x10
jeq 0x10
jlo 0x10
jhs 0x10
jn 0x10
jge 0x10
jl 0x10
jmp 0x10
; Two-ops
mov r14, r15
@ -232,7 +233,7 @@ bit r14, r15
bic r14, r15
bis r14, r15
xor r14, r15
and r14, 10(r15)
and r14, 0x10(r15)
; One-ops
rrc r15
@ -241,13 +242,14 @@ rra r15
sxt r15
push r15
call r15
reti r15
; reti is special
reti
; Jump aliases
jnc 10
jnz 10
jc 10
jz 10
jnc 0x10
jnz 0x10
jc 0x10
jz 0x10
; "emulated" no-op instructions
ret

View File

@ -1,197 +1,423 @@
// © 2023 John Breaux
//! Traverses an AST, assembling instructions.
//!
//! [Assembler] carries *some* state
//! Assembles a binary using the given [AST](crate::parser::ast)
use crate::parser::preamble::*;
use error::AssemblyError;
use error::{AResult, ErrorKind::*};
use std::collections::HashMap;
use std::path::Path;
pub mod error;
use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, util::Span};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum IdentType {
Word,
Jump,
}
use self::error::{Error, ErrorKind};
/// Takes in an AST's [Root], and outputs a sequence of bytes
/// Assembles a binary using the given [Assemble]-able item
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Assembler {
out: Vec<u16>,
/// A map from Labels' [Identifier]s to their location in the binary
labels: HashMap<Identifier, usize>,
/// A list of all referenced [Identifier]s in the binary, and their locations
identifiers: Vec<(usize, Identifier, IdentType)>,
pub struct Assembler<'t> {
/// The assembled output
output: Vec<u16>,
/// Table of labels, for backpatching
labels: HashMap<&'t str, usize>,
/// Backpatch table for jump instructions
jump_queue: Vec<(usize, &'t str)>,
/// Backpatch table for immediate values
expr_queue: Vec<(usize, Expr<'t>)>,
/// Base address from .org directives
org_base: usize,
/// Last seen index in input
loc: Span<usize>,
}
impl Assembler {
pub fn assemble(r: &Root) -> Result<Vec<u16>, AssemblyError> {
let mut out = Self::default();
out.visit_root(r)?;
Ok(out.out)
impl<'t> Assembler<'t> {
pub fn new() -> Self {
Default::default()
}
pub fn assemble<T: Assemble<'t>>(&mut self, t: &T) -> AResult<&mut Self> {
t.assemble_in(self)
}
/// Gets the address of a label
pub fn addrof(&self, name: &str) -> Option<u16> {
self.labels.get(name).map(|v| *v as u16)
}
/// Gets the value at a label
pub fn valueof(&self, name: &str) -> Option<u16> {
self.output.get(self.addrof(name)? as usize).copied()
}
fn push(&mut self, word: u16) {
self.output.push(word)
}
fn error(&self, kind: ErrorKind) -> Error {
Error { span: self.loc, kind }
}
/// Backpatches everything, and yoinks the output buffer.
pub fn out(&mut self) -> AResult<Vec<u16>> {
// Resolve jumps
for (idx, key) in &self.jump_queue {
// eprintln!("Patching jump at {idx} with key {key}");
match self.labels.get(key).map(|addr| addr.wrapping_sub(*idx as _) as i16) {
None => Err(self.error(UndefinedLabel(key.to_string())))?,
Some(value @ -0x3ff..=0x3fc) => self.output[*idx] |= (value - 1) as u16 & 0x3ff,
Some(value) => Err(self.error(LongJump(value)))?,
}
}
// Resolve immediates through late expression evaluation.
for (idx, expr) in &self.expr_queue {
// eprintln!("Patching immediate at {idx} with expression {expr:?}");
self.output[*idx] = self.eval(expr)?;
}
let out = std::mem::take(&mut self.output);
*self = Default::default();
Ok(out)
}
pub fn load(&mut self, r: &Root) -> Result<(), AssemblyError> { self.visit_root(r) }
pub fn out(self) -> Vec<u16> { self.out }
fn last_mut(&mut self) -> Result<&mut u16, AssemblyError> { self.out.last_mut().ok_or(AssemblyError::EmptyBuffer) }
fn push_default(&mut self) -> usize {
self.out.push(Default::default());
self.out.len() - 1
pub fn add_label(&mut self, label: &'t str) -> AResult<()> {
if *self.labels.entry(label).or_insert(self.output.len()) != self.output.len() {
Err(self.error(RedefinedLabel(label.into())))?
}
Ok(())
}
/// Appends an expr as an extword, deferring its calculation for later
pub fn defer_expr(&mut self, e: Expr<'t>) {
self.expr_queue.push((self.output.len(), e));
self.push(0);
}
/// Defers resolution of a jump label until output time
/// The jump label will be later resolved to the NEXT word.
pub fn defer_jump(&mut self, label: &'t str) {
self.jump_queue.push((self.output.len(), label))
}
}
impl Assembler {
/// Visits the [Root] node of a parse tree
fn visit_root(&mut self, r: &Root) -> Result<(), AssemblyError> {
// Visit the entire tree
for (num, line) in r.lines() {
self.visit_line(line).map_err(|e| e.ctx(r.file().unwrap_or(Path::new("stdin")), *num))?;
pub trait Assemble<'t> {
fn assemble(&self) -> AResult<Vec<u16>> {
self.assemble_in(&mut Default::default())?.out()
}
// Link identifiers
for (idx, id, id_type) in self.identifiers.iter() {
let Some(&num) = self.labels.get(id) else { return Err(AssemblyError::UnresolvedIdentifier(id.clone())) };
let offset = (num as isize - *idx as isize) * 2;
*self.out.get_mut(*idx).expect("idx should be a valid index into out") |= match id_type {
IdentType::Word => offset as u16,
IdentType::Jump => JumpTarget::squish(offset)?,
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>>;
}
impl<'t> Assemble<'t> for Statements<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
for stmt in &self.stmts {
stmt.assemble_in(a)?;
}
Ok(a)
}
}
impl<'t> Assemble<'t> for Statement<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
Statement::Label(label) => a.add_label(label).map(|_| a),
Statement::Insn(i) => i.assemble_in(a),
Statement::Directive(d) => d.assemble_in(a),
Statement::Comment(_) => Ok(a),
}
}
}
impl<'t> Assemble<'t> for Directive<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
Directive::Define(_) => {}
Directive::Org(base) => a.org_base = a.eval(base)? as usize,
Directive::Word(expr) => a.defer_expr(*expr.clone()),
Directive::Words(exprs) => {
for expr in exprs {
a.defer_expr(expr.clone())
}
}
Directive::String(str) => {
str.assemble_in(a)?;
}
}
Ok(a)
}
}
impl<'t> Assemble<'t> for &'t str {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
for chunk in self.as_bytes().chunks(2) {
match chunk.len() {
0 => a.push(0),
1 => {
a.push(chunk[0] as u16);
return Ok(a);
}
2 => a.push((chunk[1] as u16) << 8 | chunk[0] as u16),
n => unreachable!("expected chunks of length 2, got length {n}"),
}
}
a.push(0);
Ok(a)
}
}
impl<'t> Assemble<'t> for Instruction<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { span, kind } = self;
a.loc = *span;
kind.assemble_in(a)
}
}
impl<'t> Assemble<'t> for InstructionKind<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
InstructionKind::NoEm(v) => v.assemble_in(a),
InstructionKind::OneEm(v) => v.assemble_in(a),
InstructionKind::OneArg(v) => v.assemble_in(a),
InstructionKind::TwoArg(v) => v.assemble_in(a),
InstructionKind::Jump(v) => v.assemble_in(a),
InstructionKind::Reti(v) => v.assemble_in(a),
InstructionKind::Br(v) => v.assemble_in(a),
}
}
}
impl<'t> Assemble<'t> for NoEm {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
impl<'t> Assemble<'t> for OneEm<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
impl<'t> Assemble<'t> for OneArg<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src } = self;
let (src_reg, src_mode, src_ext) = source(src);
a.push(
0b000100 << 10 | one_arg(*opcode) << 7 | (*width as u16) << 6 | src_mode << 4 | src_reg,
);
if let Some(expr) = src_ext {
a.defer_expr(expr)
}
Ok(a)
}
}
impl<'t> Assemble<'t> for TwoArg<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src, dst } = self;
let (src_reg, src_mode, src_ext) = source(src);
let (dst_reg, dst_mode, dst_ext) = destination(dst);
a.push(
two_arg(*opcode) << 12
| src_reg << 8
| dst_mode << 7
| (*width as u16) << 6
| src_mode << 4
| dst_reg,
);
if let Some(expr) = src_ext {
a.defer_expr(expr)
}
if let Some(expr) = dst_ext {
a.defer_expr(expr)
}
Ok(a)
}
}
impl<'t> Assemble<'t> for Jump<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ 0 0 1 [cond:3] +- [word_offset:10 ] ]
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, dst } = self;
let word = 1 << 13
| jump(*opcode) << 10
| match *dst {
JumpDst::Rel(value) if value & 1 == 1 => return Err(a.error(OddJump(value))),
JumpDst::Rel(value) if !(-0x3fe..=0x400).contains(&value) => {
return Err(a.error(LongJump(value)))
}
JumpDst::Rel(value) => (value - 1) as u16 >> 1 & 0x3ff,
JumpDst::Label(label) => {
a.defer_jump(label);
0
}
} & 0x3ff;
a.push(word);
Ok(a)
}
}
impl<'t> Assemble<'t> for Reti {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
a.output.push(0b0001_0011_0000_0000);
Ok(a)
}
}
impl<'t> Assemble<'t> for Br<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
pub fn one_arg(opcode: token::OneArg) -> u16 {
opcode as u16
}
pub fn two_arg(opcode: token::TwoArg) -> u16 {
opcode as u16 + 4
}
pub fn jump(opcode: token::Jump) -> u16 {
use token::Jump;
match opcode {
Jump::Jne | Jump::Jnz => 0,
Jump::Jeq | Jump::Jz => 1,
Jump::Jnc | Jump::Jlo => 2,
Jump::Jc | Jump::Jhs => 3,
Jump::Jn => 4,
Jump::Jge => 5,
Jump::Jl => 6,
Jump::Jmp => 7,
}
}
/// Returns a tuple of (Reg, AddrMode, extword)
pub fn source<'t>(src: &Src<'t>) -> (u16, u16, Option<Expr<'t>>) {
use SrcSpecial::*;
match src {
Src::Special(Four) => (2, 2, None),
Src::Special(Eight) => (2, 3, None),
Src::Special(Zero) => (3, 0, None),
Src::Special(One) => (3, 1, None),
Src::Special(Two) => (3, 2, None),
Src::Special(NegOne) => (3, 3, None),
Src::Immediate(e) => (0, 3, Some(*e.clone())),
Src::Absolute(e) => (2, 1, Some(*e.clone())),
Src::Direct(r) => (*r as u16, 0, None),
Src::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
Src::Indirect(r) => (*r as u16, 2, None),
Src::PostInc(r) => (*r as u16, 3, None),
Src::BareExpr(e) => (0, 3, Some(*e.clone())),
}
}
/// Returns a tuple of (Reg, AddrMode, Extword)
pub fn destination<'t>(dst: &Dst<'t>) -> (u16, u16, Option<Expr<'t>>) {
use DstSpecial::*;
match dst {
Dst::Special(Zero) => (3, 0, None),
Dst::Special(One) => (3, 1, None),
Dst::Absolute(e) => (2, 1, Some(*e.clone())),
Dst::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
Dst::Direct(r) => (*r as u16, 0, None),
}
}
impl<'t> Assembler<'t> {
/// Evaluates an [Expr] using labels and constants defined in the current program
fn eval(&self, expr: &Expr) -> AResult<u16> {
match expr {
Expr::Binary(head, tails) => {
let mut head = self.eval(head)?;
for (op, tail) in tails {
let tail = self.eval(tail)?;
head = match op {
BinOp::Mul => head.wrapping_mul(tail),
BinOp::Div => head.wrapping_div(tail),
BinOp::Rem => head.wrapping_rem(tail),
BinOp::Add => head.wrapping_add(tail),
BinOp::Sub => head.wrapping_sub(tail),
BinOp::Lsh => head.wrapping_shl(tail as u32),
BinOp::Rsh => head.wrapping_shr(tail as u32),
BinOp::And => head & tail,
BinOp::Xor => head ^ tail,
BinOp::Or => head | tail,
};
}
Ok(())
Ok(head)
}
/// visit a [Line]
fn visit_line(&mut self, line: &Line) -> Result<(), AssemblyError> {
match line {
Line::Insn(insn) => self.visit_instruction(insn),
Line::Label(label) => self.visit_label(label),
Line::Directive(d) => self.visit_directive(d),
_ => Ok(()),
Expr::Unary(ops, tail) => {
let mut tail = self.eval(tail)?;
for op in ops {
tail = match op {
UnOp::Not => !tail,
UnOp::Neg => 0u16.wrapping_sub(tail),
UnOp::Deref => *self
.output
.get(tail.wrapping_sub(self.org_base as u16) as usize >> 1)
.ok_or_else(|| self.error(OobRead(tail)))?,
}
}
/// Visits a [Directive]
fn visit_directive(&mut self, node: &Directive) -> Result<(), AssemblyError> {
match node {
Directive::Org(_) => todo!("{node}"),
Directive::Define(..) => (),
Directive::Include(r) => self.visit_root(r)?,
Directive::Byte(word) | Directive::Word(word) => self.out.push((*word).into()),
Directive::Bytes(words) | Directive::Words(words) => {
for word in words {
self.out.push((*word).into());
Ok(tail)
}
Expr::Group(e) => self.eval(e),
Expr::Number(n) => Ok(*n),
Expr::Ident(name) => {
self.valueof(name).ok_or_else(|| self.error(UndefinedLabel(name.to_string())))
}
Expr::AddrOf(name) => self
.addrof(name)
.map(|p| (p << 1).wrapping_add(self.org_base as u16))
.ok_or_else(|| self.error(UndefinedLabel(name.to_string()))),
}
}
}
pub mod error {
use std::fmt::Display;
use crate::util::Span;
pub type AResult<T> = Result<T, Error>;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Error {
pub span: Span<usize>,
pub kind: ErrorKind,
}
impl std::error::Error for Error {}
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum ErrorKind {
#[default]
Todo,
/// A label was used, but not defined
UndefinedLabel(String),
RedefinedLabel(String),
OobRead(u16),
OddJump(i16),
LongJump(i16),
/// A plethora of [Error]s
Errors(Vec<Error>),
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { kind, span } = self;
write!(f, "[{span}]: ")?;
write!(f, "Error: {kind}")
}
}
impl Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorKind::Todo => write!(f, "Not yet implemented"),
ErrorKind::UndefinedLabel(label) => write!(f, "Label '{label}' not defined"),
ErrorKind::RedefinedLabel(label) => write!(f, "Label '{label}' already defined"),
ErrorKind::OobRead(addr) => {
write!(f, "Out of bounds read in constant expression: {addr}")
}
ErrorKind::OddJump(to) => write!(f, "Cannot jump to odd location: {to}"),
ErrorKind::LongJump(to) => {
write!(f, "Jump target ({to}) outside of range -0x400..=0x3fe")
}
ErrorKind::Errors(errors) => {
writeln!(f, "Could not complete assembly:")?;
for error in errors {
writeln!(f, "{error}")?;
}
Ok(())
}
}
Directive::String(s) => self.visit_string(s)?,
Directive::Strings(strs) => {
for s in strs {
self.visit_string(s)?;
}
}
};
Ok(())
}
/// Visits a [Label]
fn visit_label(&mut self, node: &Label) -> Result<(), AssemblyError> {
// Register the label
match self.labels.insert(node.0.to_owned(), self.out.len()) {
Some(_) => Err(AssemblyError::RedefinedLabel(node.0.to_owned())),
_ => Ok(()),
}
}
/// Visits an [Instruction]
fn visit_instruction(&mut self, insn: &Instruction) -> Result<(), AssemblyError> {
self.push_default();
self.visit_opcode(insn.opcode())?;
self.visit_encoding(insn.encoding())?;
Ok(())
}
/// Visits an [Opcode]
fn visit_opcode(&mut self, node: &Opcode) -> Result<(), AssemblyError> {
*self.last_mut()? |= *node as u16;
Ok(())
}
/// Visits an [Encoding]
fn visit_encoding(&mut self, node: &Encoding) -> Result<(), AssemblyError> {
*self.last_mut()? |= node.word();
match node {
Encoding::Single { dst, .. } => {
self.visit_primary_operand(dst)?;
}
Encoding::Jump { target } => {
self.visit_jump_target(target)?;
}
Encoding::Double { src, dst, .. } => {
self.visit_primary_operand(src)?;
self.visit_secondary_operand(dst)?;
}
}
Ok(())
}
/// Visits a [JumpTarget]
fn visit_jump_target(&mut self, node: &JumpTarget) -> Result<(), AssemblyError> {
match node {
JumpTarget::Number(num) => self.visit_number(num),
JumpTarget::Identifier(id) => {
self.visit_identifier(id, self.out.len() - 1, IdentType::Jump)?;
Ok(())
}
}
}
/// Visits a [SecondaryOperand]
fn visit_secondary_operand(&mut self, node: &SecondaryOperand) -> Result<(), AssemblyError> {
use SecondaryOperand as O;
if let O::Indexed(_, num) | O::Absolute(num) = node {
self.push_default();
self.visit_number(num)?;
}
Ok(())
}
/// Visits a [PrimaryOperand]
fn visit_primary_operand(&mut self, node: &PrimaryOperand) -> Result<(), AssemblyError> {
use PrimaryOperand as O;
match node {
O::Indexed(_, num) | O::Absolute(num) | O::Immediate(num) => {
self.push_default();
self.visit_number(num)?;
}
O::Relative(id) => {
let addr = self.push_default();
self.visit_identifier(id, addr, IdentType::Word)?;
}
_ => (),
}
Ok(())
}
/// Visits a number and writes it into the last index
fn visit_number(&mut self, node: &Number) -> Result<(), AssemblyError> {
*self.last_mut()? |= u16::from(*node);
Ok(())
}
/// Visits a number and appends it to the output buffer
fn visit_string(&mut self, node: &str) -> Result<(), AssemblyError> {
for (idx, byte) in node.bytes().chain([0u8].into_iter()).enumerate() {
if idx % 2 == 0 {
self.push_default();
}
*self.last_mut()? |= (byte as u16) << (8 * (idx % 2));
}
Ok(())
}
/// Visits an [Identifier], and registers it to the identifier list
fn visit_identifier(&mut self, node: &Identifier, addr: usize, ty: IdentType) -> Result<(), AssemblyError> {
self.identifiers.push((addr, node.clone(), ty));
Ok(())
}
}

View File

@ -1,56 +0,0 @@
// © 2023 John Breauxs
use crate::parser::{error::ParseError, preamble::*};
use std::{
fmt::Display,
path::{Path, PathBuf},
};
#[derive(Debug)]
pub enum AssemblyError {
UnresolvedIdentifier(Identifier),
RedefinedLabel(Identifier),
JumpedTooFar(Identifier, isize),
ParseError(ParseError),
// TODO: This, better'
Context(Box<AssemblyError>, PathBuf, usize),
EmptyBuffer,
}
impl AssemblyError {
pub(super) fn ctx<P: AsRef<Path> + ?Sized>(self, file: &P, line: usize) -> Self {
Self::Context(self.into(), file.as_ref().into(), line)
}
}
impl From<ParseError> for AssemblyError {
fn from(value: ParseError) -> Self { Self::ParseError(value) }
}
impl Display for AssemblyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnresolvedIdentifier(id) => {
write!(f, "Identifier {id} is undefined, but referenced anyway.")
}
Self::RedefinedLabel(id) => {
write!(f, "Redefined label '{id}'.")
}
Self::JumpedTooFar(id, num) => {
write!(f, "Label '{id}' is too far away. ({num} is outside range -0x400..=0x3fe)")
}
Self::ParseError(e) => Display::fmt(e, f),
Self::Context(e, file, line) => write!(f, "{}:{line}:\n\t{e}", file.display()),
Self::EmptyBuffer => Display::fmt("Tried to get last element of output buffer, but buffer was empty", f),
}
}
}
impl std::error::Error for AssemblyError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::ParseError(e) => Some(e),
Self::Context(e, ..) => Some(e),
_ => None,
}
}
}

View File

@ -1,49 +0,0 @@
// © 2023 John Breauxs
//! Common error type for [msp430-asm](crate) errors
use super::*;
use std::fmt::Display;
#[derive(Debug)]
pub enum Error {
/// Produced by [lexer]
LexError(lexer::error::LexError),
/// Produced by [parser]
ParseError(parser::error::ParseError),
/// Produced by [assembler]
AssemblyError(assembler::error::AssemblyError),
}
impl Error {}
impl From<lexer::error::LexError> for Error {
fn from(value: lexer::error::LexError) -> Self { Self::LexError(value) }
}
impl From<parser::error::ParseError> for Error {
fn from(value: parser::error::ParseError) -> Self { Self::ParseError(value) }
}
impl From<assembler::error::AssemblyError> for Error {
fn from(value: assembler::error::AssemblyError) -> Self { Self::AssemblyError(value) }
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::LexError(e) => Display::fmt(e, f),
Error::ParseError(e) => Display::fmt(e, f),
Error::AssemblyError(e) => Display::fmt(e, f),
}
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::LexError(e) => Some(e),
Error::ParseError(e) => Some(e),
Error::AssemblyError(e) => Some(e),
}
}
}

View File

@ -1,22 +0,0 @@
// © 2023 John Breaux
//! Convenience functions and traits for dealing with hashable data
pub type Hash = u64;
/// Calculates a hash using Rust hashmap's default hasher.
pub fn hash<T: std::hash::Hash>(hashable: T) -> Hash {
use std::hash::Hasher;
let mut hasher = std::collections::hash_map::DefaultHasher::new();
hashable.hash(&mut hasher);
hasher.finish()
}
pub trait FromHash: From<Hash> {
/// Hashes anything that implements [type@Hash] using the
/// [DefaultHasher](std::collections::hash_map::DefaultHasher)
fn hash<T: std::hash::Hash>(hashable: T) -> Hash { hash(hashable) }
fn from_hash<T: std::hash::Hash>(hashable: T) -> Self
where Self: Sized {
Self::from(Self::hash(hashable))
}
}
impl<T: From<Hash>> FromHash for T {}

View File

@ -1,69 +1,268 @@
// © 2023 John Breaux
//! Iterates over [`&str`](str), producing [`Token`s](Token)
// © 2023-2024 John Breaux
//! The [Lexer] turns a [sequence of characters](str) into a stream of
//! [lexically-tagged tokens](token)
pub mod context;
pub mod error;
pub mod ignore;
pub mod preprocessed;
pub mod token;
pub mod token_stream;
use context::Context;
use error::LexError;
use token::{Token, Type};
use token_stream::TokenStream;
use self::token::{Special, TokenKind, *};
use crate::util::Span;
use std::{
iter::Peekable,
str::{CharIndices, FromStr},
};
use unicode_ident::*;
/// Iterates over &[str], producing [Token]s
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Tokenizer<'t> {
const DEFAULT_BASE: u32 = 10;
/// Turns a [sequence of characters](str) into a stream of [lexically identified tokens](token).
///
/// # Examples
/// ```rust
/// # use libmsp430::lexer::{Lexer, token::*};
/// let text = "mov r14, r15";
/// let mut lexer = Lexer::new(text);
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::TwoArg(TwoArg::Mov));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R14));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Comma);
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R15));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Eof);
/// ```
#[derive(Clone, Debug)]
pub struct Lexer<'t> {
/// Keeps track of the byte offset into the string
iter: Peekable<CharIndices<'t>>,
text: &'t str,
idx: usize,
context: Context,
start: usize,
index: usize,
}
impl<'t> Tokenizer<'t> {
/// Produces a new [Tokenizer] from a [str]ing slice
pub fn new<T>(text: &'t T) -> Self
where T: AsRef<str> + ?Sized {
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
impl<'t> Lexer<'t> {
/// Creates a new [Lexer] over some [text](str)
pub fn new(text: &'t str) -> Self {
Self { iter: text.char_indices().peekable(), text, start: 0, index: 0 }
}
/// Gets the current byte-position
pub fn location(&self) -> usize {
self.start
}
/// Internal: Emits a token with the provided [TokenKind], providing its extents.
fn emit(&mut self, kind: TokenKind) -> Option<Token<'t>> {
let out =
Some(Token::new(self.next_lexeme(), kind, Span { start: self.start, end: self.index }));
self.start = self.index;
out
}
fn next_lexeme(&self) -> &'t str {
&self.text[self.start..self.index]
}
fn repeat(&mut self, f: impl Fn(char) -> bool) -> &mut Self {
while let Some(&c) = self.peek() {
if !f(c) {
break;
}
self.next();
}
self
}
fn space(&mut self) -> Option<&mut Self> {
while self.peek()?.is_whitespace() && *self.peek()? != '\n' {
self.next();
}
self.start = self.index;
Some(self)
}
/// Consumes a [char] without checking, for ergonomic chaining
fn then(&mut self) -> &mut Self {
self.next();
self
}
fn peek(&mut self) -> Option<&char> {
self.iter.peek().map(|(_, c)| c)
}
fn next(&mut self) -> Option<char> {
let (index, c) = self.iter.next()?;
self.index = index + c.len_utf8();
Some(c)
}
fn count(&mut self, token: &Token) {
// update the context
self.context.count(token);
// advance the index
self.idx += token.len();
/// Scans for the next [Token] in the stream
pub fn scan(&mut self) -> Option<Token<'t>> {
if self.space().is_none() {
return self.emit(TokenKind::Eof);
}
let Some(c) = self.peek() else {
return self.emit(TokenKind::Eof);
};
match c {
'\n' => self.then().emit(TokenKind::Newline),
'!' => self.then().emit(TokenKind::Bang),
'#' => self.then().emit(TokenKind::Hash),
'$' => self.then().emit(TokenKind::Dollar),
'%' => self.then().emit(TokenKind::Percent),
'&' => self.then().emit(TokenKind::Amp),
'\'' => self.then().char(),
'"' => self.then().string(),
'(' => self.then().emit(TokenKind::OpenParen),
')' => self.then().emit(TokenKind::CloseParen),
'*' => self.then().emit(TokenKind::Star),
'+' => self.then().emit(TokenKind::Plus),
',' => self.then().emit(TokenKind::Comma),
'-' => self.then().emit(TokenKind::Minus),
'.' => self.then().directive_or_bw(),
'/' => self.then().comment_or_slash(),
'0' => self.then().number_with_base(),
':' => self.then().emit(TokenKind::Colon),
';' => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
'<' => self.then().less(),
'>' => self.then().greater(),
'@' => self.then().emit(TokenKind::At),
'[' => self.then().emit(TokenKind::OpenBrace),
']' => self.then().emit(TokenKind::CloseBrace),
'^' => self.then().emit(TokenKind::Caret),
'_' => self.then().identifier(),
'{' => self.then().emit(TokenKind::OpenCurly),
'|' => self.then().emit(TokenKind::Bar),
'}' => self.then().emit(TokenKind::CloseCurly),
c if c.is_numeric() => self.number::<DEFAULT_BASE>(),
&c if is_xid_start(c) => self.then().identifier(),
c => todo!("Unrecognized character: {c}"),
}
}
fn number_with_base(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('x') => self.then().number::<16>(),
Some('d') => self.then().number::<10>(),
Some('o') => self.then().number::<8>(),
Some('b') => self.then().number::<2>(),
Some(c) if c.is_ascii_digit() => self.number::<DEFAULT_BASE>(),
_ => self.emit(TokenKind::Number(0, 10)),
}
}
fn number<const B: u32>(&mut self) -> Option<Token<'t>> {
let mut num = self.digit::<B>()?;
while let Some(digit) = self.digit::<B>() {
num = num * B + digit;
}
if num > u16::MAX as u32 {
None
} else {
self.emit(TokenKind::Number(num as u16, B as u8))
}
}
fn digit<const B: u32>(&mut self) -> Option<u32> {
let digit = self.peek()?.to_digit(B)?;
self.then();
Some(digit)
}
fn comment_or_slash(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('/') => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
_ => self.emit(TokenKind::Slash),
}
}
fn less(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('<') => self.then().emit(TokenKind::Lsh),
_ => todo!("less"),
}
}
fn greater(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('>') => self.then().emit(TokenKind::Lsh),
_ => todo!("greater"),
}
}
fn identifier(&mut self) -> Option<Token<'t>> {
while let Some(c) = self.then().peek() {
if !is_xid_continue(*c) {
break;
}
}
let lexeme = self.next_lexeme();
if let Ok(op) = Reg::from_str(lexeme) {
self.emit(TokenKind::Reg(op))
} else if let Ok(op) = NoEm::from_str(lexeme) {
self.emit(TokenKind::NoEm(op))
} else if let Ok(op) = OneEm::from_str(lexeme) {
self.emit(TokenKind::OneEm(op))
} else if let Ok(op) = Special::from_str(lexeme) {
self.emit(TokenKind::Special(op))
} else if let Ok(op) = OneArg::from_str(lexeme) {
self.emit(TokenKind::OneArg(op))
} else if let Ok(op) = TwoArg::from_str(lexeme) {
self.emit(TokenKind::TwoArg(op))
} else if let Ok(op) = Jump::from_str(lexeme) {
self.emit(TokenKind::Jump(op))
} else {
self.emit(TokenKind::Identifier)
}
}
fn directive_or_bw(&mut self) -> Option<Token<'t>> {
while let Some(c) = self.then().peek() {
if !is_xid_continue(*c) {
break;
}
}
match self.next_lexeme() {
".b" => self.emit(TokenKind::Byte),
".w" => self.emit(TokenKind::Word),
_ => self.emit(TokenKind::Directive),
}
}
/// Todo: Character unescaping in Lexer::string
fn string(&mut self) -> Option<Token<'t>> {
while '"' != self.next()? {}
self.emit(TokenKind::String)
}
fn char(&mut self) -> Option<Token<'t>> {
let out = self.unescape()?;
self.next().filter(|c| *c == '\'').and_then(|_| self.emit(TokenKind::Char(out)))
}
/// Unescape a single character
fn unescape(&mut self) -> Option<char> {
match self.next() {
Some('\\') => (),
other => return other,
}
Some(match self.next()? {
'a' => '\x07',
'b' => '\x08',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'x' => self.hex_escape()?,
'u' => self.unicode_escape()?,
'0' => '\0',
chr => chr,
})
}
/// unescape a single 2-digit hex escape
fn hex_escape(&mut self) -> Option<char> {
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
char::from_u32(out) //.ok_or(Error::bad_unicode(out, self.line(), self.col()))
}
/// unescape a single \u{} unicode escape
fn unicode_escape(&mut self) -> Option<char> {
let mut out = 0;
let Some('{') = self.peek() else {
return None; //Err(Error::invalid_escape('u', self.line(), self.col()));
};
self.then();
while let Some(c) = self.peek() {
match c {
'}' => {
self.then();
return char::from_u32(out); //.ok_or(Error::bad_unicode(out, self.line(), self.col()));
}
_ => out = (out << 4) + self.digit::<16>()?,
}
}
None //Err(Error::invalid_escape('u', self.line(), self.col()))
}
}
impl<'text> Iterator for Tokenizer<'text> {
type Item = Token<'text>;
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.text.len() {
return None;
}
let token = Token::from(&self.text[self.idx..]);
// Process [Type::Directive]s
// Count the token
self.count(&token);
Some(token)
}
}
impl<'text> TokenStream<'text> for Tokenizer<'text> {
fn context(&self) -> Context { self.context }
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
// registers are valid identifiers.
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
self.count(&token);
Ok(token)
}
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))
}
}
#[cfg(test)]
mod tests;

View File

@ -1,38 +0,0 @@
// © 2023 John Breaux
//! A [Context] stores contextual information about the current tokenizer state
//!
//! This data is trivially copyable and can be provided in error messages using the
//! [Error::Contextual] specialization)
use super::*;
/// Stores contextual information about the current tokenizer state, useful for printing errors
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Context {
line: usize,
position: usize,
tokens: usize,
}
impl Context {
pub fn new() -> Self { Default::default() }
pub fn line(&self) -> usize { self.line }
pub fn tokens(&self) -> usize { self.tokens }
pub fn position(&self) -> usize { self.position }
pub(super) fn count(&mut self, t: &Token) {
match t.variant() {
Type::EndOfFile => return,
Type::Endl => {
self.line += 1;
self.position = 1;
}
_ => self.position += t.len(),
}
self.tokens += 1;
}
}
impl Default for Context {
fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } }
}
impl std::fmt::Display for Context {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) }
}

View File

@ -1,68 +0,0 @@
// © 2023 John Breauxs
use super::{
context::Context,
token::{OwnedToken, *},
};
use std::fmt::Display;
#[derive(Debug)]
pub enum LexError {
/// Any other error, tagged with [Context]. Created by [`Error::context()`]
Contextual(Context, Box<Self>),
/// Produced by [Token] when the input is entirely unexpected.
UnexpectedSymbol(String),
/// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type]
UnexpectedToken { expected: Type, got: OwnedToken },
/// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the
/// expected [Types](Type)
AllExpectationsFailed { expected: Types, got: OwnedToken },
}
impl LexError {
pub fn context(self, c: Context) -> Self {
match self {
Self::Contextual(..) => self,
_ => Self::Contextual(c, Box::new(self)),
}
}
// Extracts the root of the error tree
pub fn bare(self) -> Self {
match self {
Self::Contextual(_, bare) => bare.bare(),
_ => self,
}
}
pub fn expected<E: AsRef<[Type]>, T: Into<OwnedToken>>(expected: E, got: T) -> Self {
match expected.as_ref().len() {
1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() },
_ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() },
}
}
pub fn mask_expectation(mut self, expected: Type) -> Self {
match self {
LexError::UnexpectedToken { got, .. } => self = LexError::UnexpectedToken { expected, got },
LexError::AllExpectationsFailed { got, .. } => self = LexError::UnexpectedToken { expected, got },
LexError::Contextual(context, err) => {
self = LexError::Contextual(context, Box::new(err.mask_expectation(expected)))
}
_ => (),
}
self
}
}
impl Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexError::Contextual(ctx, error) => write!(f, "{ctx}: {error}"),
LexError::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""),
LexError::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."),
LexError::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."),
}
}
}
impl std::error::Error for LexError {}

View File

@ -1,55 +0,0 @@
// © 2023 John Breaux
//! Removes a single [kind](Type) of [`Token`] from a [`TokenStream`]
use super::*;
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Ignore<'t, T>
where T: TokenStream<'t>
{
ignore: Type,
inner: &'t mut T,
}
impl<'t, T> Ignore<'t, T>
where T: TokenStream<'t>
{
/// Creates a new [Ignore], which ignores the [ignore Type](Type)
pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } }
/// Gets a mutable reference to the inner [Iterator]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
}
impl<'t, T> Iterator for Ignore<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.inner.next()?;
// Space tokens are greedy, so the next token shouldn't be a Space
match next.variant() {
Type::Space => self.next(),
_ => Some(next),
}
}
}
impl<'t, T> TokenStream<'t> for Ignore<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
self.inner.allow(self.ignore);
self.inner.expect(expected)
}
fn peek(&mut self) -> Self::Item {
self.inner.allow(self.ignore);
self.inner.peek()
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
self.inner.allow(self.ignore);
self.inner.peek_expect(expected)
}
}

View File

@ -1,174 +0,0 @@
// © 2023 John Breaux
//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band
//! ".define" rules
use super::*;
use std::collections::{HashMap, VecDeque};
// TODO: Clean this spaghetti mess up
/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define"
/// rules
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(PartialEq, Eq)]
pub struct Preprocessed<'t, T>
where T: TokenStream<'t>
{
sub_table: HashMap<Token<'t>, Vec<Token<'t>>>,
sub_types: Vec<Type>,
queue: VecDeque<Token<'t>>,
inner: &'t mut T,
}
impl<'t, T> Iterator for Preprocessed<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
match self.queue.pop_front() {
Some(token) => Some(token),
None => {
let next = self.inner.next()?;
if let Some(subs) = self.sub_table.get(&next) {
self.queue.extend(subs);
return self.next();
}
Some(next)
}
}
}
}
impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> {
/// Creates a new [Preprocessed] [TokenStream]
pub fn new(inner: &'t mut T) -> Self {
Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner }
}
/// Gets a mutable reference to the inner [TokenStream]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
/// Preserve the next token in the queue
fn enqueue(&mut self, token: Token<'t>) -> Token<'t> {
self.queue.push_back(token);
token
}
/// Process .define directives in the preprocessor
fn define(&mut self, token: Token<'t>) -> Result<(), LexError> {
if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) {
return Ok(());
}
// Tokenize the subdocument
self.allow(Type::Directive);
self.allow(Type::Space);
let Some(k) = self.inner.next() else { return Ok(()) };
if !self.sub_types.contains(&k.variant()) {
self.sub_types.push(k.variant());
};
self.allow(Type::Space);
let mut replacement = vec![];
loop {
match self.inner.peek().variant() {
Type::Endl | Type::EndOfFile => break,
Type::Comment | Type::Space => {
// ignore comments
self.inner.next();
}
_ => {
let next = self.inner.next().unwrap();
replacement.push(self.enqueue(next));
}
}
}
self.sub_table.insert(k, replacement);
Ok(())
}
/// Does the preprocessing step
fn preprocess(&mut self, token: Token<'t>) {
if let Some(subs) = self.sub_table.get(&token) {
self.queue.extend(subs);
self.inner.next();
}
}
}
impl<'t, T> TokenStream<'t> for Preprocessed<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
match self.queue.front() {
Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()),
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
None => {
// Only resolve defines when expecting, otherwise you'll run into issues.
if let Ok(next) = self.inner.expect(expected) {
self.define(next)?;
return Ok(next);
}
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
if let Some(subs) = self.sub_table.get(&next) {
self.inner.allow_any_of(&self.sub_types);
self.queue.extend(subs);
}
return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) };
}
Err(LexError::expected([expected], self.inner.peek()).context(self.context()))
}
}
}
fn peek(&mut self) -> Self::Item {
match self.queue.front() {
Some(token) => *token,
None => {
// Only allow substitution when the next token is unexpected
let old = self.inner.peek();
self.preprocess(old);
match self.queue.front() {
Some(&new) => new,
None => old,
}
}
}
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
match self.queue.front() {
Some(&token) if token.is_variant(expected) => Ok(token),
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
None => {
if let Ok(next) = self.inner.peek_expect(expected) {
return Ok(next);
}
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
self.preprocess(next);
return if self.queue.is_empty() {
self.inner.peek_expect(expected)
} else {
self.peek_expect(expected)
};
}
Err(LexError::expected([expected], self.inner.peek()))
}
}
}
}
impl<'t, T> std::fmt::Debug for Preprocessed<'t, T>
where T: TokenStream<'t>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Preprocessed")
.field("sub_table", &self.sub_table)
.field("sub_types", &self.sub_types)
.field("queue", &self.queue)
.field("context", &self.context())
.finish_non_exhaustive()
}
}

66
src/lexer/tests.rs Normal file
View File

@ -0,0 +1,66 @@
use super::*;
macro_rules! lex {
(type ($t:tt), $expected:expr) => {
let token = Lexer::new(stringify!($t)).scan().expect(stringify!($t:tt should yield a valid token));
assert_eq!(token.kind, $expected);
};
({ $($t:tt)* }) => {
Lexer::new(stringify!($($t)*))
};
}
#[test]
fn ascii_char() {
lex!(type ('A'), TokenKind::Char('A')); // 'A' should be a valid char
lex!(type ('\x1b'), TokenKind::Char('\x1b')); // '\\x1b' should be a valid char
}
#[test]
fn unicode_escape_char() {
lex!(type ('\u{1f988}'), TokenKind::Char('🦈')); // '\\u{1f988}' should be a valid 🦈
}
#[test]
fn number_with_base() {
lex!(type (0), TokenKind::Number(0, 10)); // 0 should be a 16-bit base-10 number
lex!(type (42069), TokenKind::Number(42069, 10)); // 42069 should be a 16-bit base-10 number
lex!(type (0x420), TokenKind::Number(0x420, 16)); // 0x420 should be a 16-bit base-16 number
lex!(type (0d100), TokenKind::Number(100, 10)); // 0d100 should be a 16-bit base-10 number
lex!(type (0o100), TokenKind::Number(64, 8)); // 0o100 should be a 16-bit base-8 number
lex!(type (0b100), TokenKind::Number(4, 2)); // 0b100 should be a 16-bit base-8 number
}
#[test]
fn no_operand_emulated() {
lex!(type (nop), TokenKind::NoEm(NoEm::Nop)); // nop should be a valid NoEm
lex!(type (ret), TokenKind::NoEm(NoEm::Ret)); // ret should be a valid NoEm
lex!(type (clrc), TokenKind::NoEm(NoEm::Clrc)); // clrc should be a valid NoEm
lex!(type (clrz), TokenKind::NoEm(NoEm::Clrz)); // clrz should be a valid NoEm
lex!(type (clrn), TokenKind::NoEm(NoEm::Clrn)); // clrn should be a valid NoEm
lex!(type (setc), TokenKind::NoEm(NoEm::Setc)); // setc should be a valid NoEm
lex!(type (setz), TokenKind::NoEm(NoEm::Setz)); // setz should be a valid NoEm
lex!(type (setn), TokenKind::NoEm(NoEm::Setn)); // setn should be a valid NoEm
lex!(type (dint), TokenKind::NoEm(NoEm::Dint)); // dint should be a valid NoEm
lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm
}
#[test]
fn registers() {
lex!(type(pc), TokenKind::Reg(Reg::PC));
lex!(type(sp), TokenKind::Reg(Reg::SP));
lex!(type(sr), TokenKind::Reg(Reg::SR));
lex!(type(cg), TokenKind::Reg(Reg::CG));
lex!(type(r0), TokenKind::Reg(Reg::PC));
lex!(type(r1), TokenKind::Reg(Reg::SP));
lex!(type(r2), TokenKind::Reg(Reg::SR));
lex!(type(r3), TokenKind::Reg(Reg::CG));
lex!(type(r4), TokenKind::Reg(Reg::R4));
lex!(type(r5), TokenKind::Reg(Reg::R5));
lex!(type(r6), TokenKind::Reg(Reg::R6));
lex!(type(r7), TokenKind::Reg(Reg::R7));
lex!(type(r8), TokenKind::Reg(Reg::R8));
lex!(type(r9), TokenKind::Reg(Reg::R9));
lex!(type(r10), TokenKind::Reg(Reg::R10));
lex!(type(r11), TokenKind::Reg(Reg::R11));
lex!(type(r12), TokenKind::Reg(Reg::R12));
lex!(type(r13), TokenKind::Reg(Reg::R13));
lex!(type(r14), TokenKind::Reg(Reg::R14));
lex!(type(r15), TokenKind::Reg(Reg::R15));
}
// TODO: opcode tests, misc. special character tests, etc.

View File

@ -1,335 +1,479 @@
// © 2023 John Breaux
//! A [Token] is a [semantically tagged](Type) sequence of characters.
// © 2023-2024 John Breaux
//! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span]
//!
//! Token, and the tokenizer, intend to copy as little as possible.
use super::error::LexError;
use regex::Regex;
use std::{
fmt::{Debug, Display},
sync::OnceLock,
};
/// Implements regex matching functions on [`Token`] for each [`Type`],
/// and implements [`From<&str>`] for [`Token`]
macro_rules! regex_impl {
(<$t:lifetime> $type:ty {$(
$(#[$meta:meta])*
pub fn $func:ident (text: &str) -> Option<Self> {
regex!($out:path = $re:literal)
//! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`)
use crate::util::Span;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'t> {
pub lexeme: &'t str,
pub kind: TokenKind,
pub pos: Span<usize>,
}
impl<'t> Token<'t> {
pub fn new(lexeme: &'t str, kind: TokenKind, pos: Span<usize>) -> Self {
Self { lexeme, kind, pos }
}
)*}) => {
impl<$t> $type {
/// Lexes a token only for the expected `variant`
///
/// Warning: This bypasses precedence rules. Only use for specific patterns.
pub fn expect(text: &$t str, expected: Type) -> Result<Self, LexError> {
match expected {$(
$out => Self::$func(text),
)*}.ok_or(LexError::UnexpectedToken {
expected,
got: Self::from(text).into(),
pub fn kind(&self) -> TokenKind {
self.kind
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum TokenKind {
Eof,
Newline, // \n
OpenParen, // (
CloseParen, // )
OpenCurly, // {
CloseCurly, // }
OpenBrace, // [
CloseBrace, // ]
Comma, // ,
Colon, // :
Bang, // !
At, // @
Amp, // &
Bar, // |
Caret, // ^
Star, // *
Hash, // #
Dollar, // $
Percent, // %
Plus, // +
Minus, // -
Slash, // /
Lsh, // <<
Rsh, // >>
Comment, // (';' | '//') .* '\n' |
Directive, // '.' XID_CONTINUE*
Identifier, // XID_START XID_CONTINUE*
Number(u16, u8), // varies depending on base
Char(char), // '\'' ('\' Escape | .) '\''
String, // '"' .* '"'
Reg(Reg),
NoEm(NoEm),
OneEm(OneEm),
Special(Special),
OneArg(OneArg),
TwoArg(TwoArg),
Jump(Jump),
Byte, // .b
Word, // .w
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Reg {
PC,
SP,
SR,
CG,
R4,
R5,
R6,
R7,
R8,
R9,
R10,
R11,
R12,
R13,
R14,
R15,
}
/// Fake instructions of the form `opcode`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum NoEm {
Nop,
Ret,
Clrc,
Clrz,
Clrn,
Setc,
Setz,
Setn,
Dint,
Eint,
}
/// Fake instructions of the form `opcode dst`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum OneEm {
Pop,
Rla,
Rlc,
Inv,
Clr,
Tst,
Dec,
Decd,
Inc,
Incd,
Adc,
Dadc,
Sbc,
}
/// These opcodes have bespoke grammatical rules
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Special {
/// Br = "br" Src
Br,
}
/// Real instructions of the form `opcode src`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum OneArg {
Rrc,
Swpb,
Rra,
Sxt,
Push,
Call,
Reti,
}
/// Real instructions of the form `opcode src, dst`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum TwoArg {
Mov,
Add,
Addc,
Subc,
Sub,
Cmp,
Dadd,
Bit,
Bic,
Bis,
Xor,
And,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Jump {
Jne,
Jnz,
Jeq,
Jz,
Jnc,
Jlo,
Jc,
Jhs,
Jn,
Jge,
Jl,
Jmp,
}
mod convert {
//! Implementations of [FromStr] for [token](super) types.
use super::*;
use std::str::FromStr;
impl FromStr for Reg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"pc" => Reg::PC,
"sp" => Reg::SP,
"sr" => Reg::SR,
"cg" => Reg::CG,
"r0" => Reg::PC,
"r1" => Reg::SP,
"r2" => Reg::SR,
"r3" => Reg::CG,
"r4" => Reg::R4,
"r5" => Reg::R5,
"r6" => Reg::R6,
"r7" => Reg::R7,
"r8" => Reg::R8,
"r9" => Reg::R9,
"r10" => Reg::R10,
"r11" => Reg::R11,
"r12" => Reg::R12,
"r13" => Reg::R13,
"r14" => Reg::R14,
"r15" => Reg::R15,
_ => Err(())?,
})
}
}
impl FromStr for NoEm {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"nop" => NoEm::Nop,
"ret" => NoEm::Ret,
"clrc" => NoEm::Clrc,
"clrz" => NoEm::Clrz,
"clrn" => NoEm::Clrn,
"setc" => NoEm::Setc,
"setz" => NoEm::Setz,
"setn" => NoEm::Setn,
"dint" => NoEm::Dint,
"eint" => NoEm::Eint,
_ => Err(())?,
})
}
}
impl FromStr for OneEm {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"pop" => OneEm::Pop,
"rla" => OneEm::Rla,
"rlc" => OneEm::Rlc,
"inv" => OneEm::Inv,
"clr" => OneEm::Clr,
"tst" => OneEm::Tst,
"dec" => OneEm::Dec,
"decd" => OneEm::Decd,
"inc" => OneEm::Inc,
"incd" => OneEm::Incd,
"adc" => OneEm::Adc,
"dadc" => OneEm::Dadc,
"sbc" => OneEm::Sbc,
_ => Err(())?,
})
}
}
impl FromStr for Special {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"br" => Special::Br,
_ => Err(())?,
})
}
}
impl FromStr for OneArg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"rrc" => OneArg::Rrc,
"swpb" => OneArg::Swpb,
"rra" => OneArg::Rra,
"sxt" => OneArg::Sxt,
"push" => OneArg::Push,
"call" => OneArg::Call,
"reti" => OneArg::Reti,
_ => Err(())?,
})
}
}
impl FromStr for TwoArg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"mov" => TwoArg::Mov,
"add" => TwoArg::Add,
"addc" => TwoArg::Addc,
"subc" => TwoArg::Subc,
"sub" => TwoArg::Sub,
"cmp" => TwoArg::Cmp,
"dadd" => TwoArg::Dadd,
"bit" => TwoArg::Bit,
"bic" => TwoArg::Bic,
"bis" => TwoArg::Bis,
"xor" => TwoArg::Xor,
"and" => TwoArg::And,
_ => Err(())?,
})
}
}
impl FromStr for Jump {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"jne" => Jump::Jne,
"jnz" => Jump::Jnz,
"jeq" => Jump::Jeq,
"jz" => Jump::Jz,
"jnc" => Jump::Jnc,
"jlo" => Jump::Jlo,
"jc" => Jump::Jc,
"jhs" => Jump::Jhs,
"jn" => Jump::Jn,
"jge" => Jump::Jge,
"jl" => Jump::Jl,
"jmp" => Jump::Jmp,
_ => Err(())?,
})
}
$(
$(#[$meta])*
/// Tries to read [`
#[doc = stringify!($out)]
/// `] from `text`
pub fn $func(text: &$t str) -> Option<Self> {
static RE: OnceLock<Regex> = OnceLock::new();
let lexeme = RE.get_or_init(|| Regex::new($re).unwrap())
.find(text)?.into();
Some(Self { variant: $out, lexeme })
})*
}
impl<$t> From<&$t str> for $type {
fn from (value: &$t str) -> Self {
$(
if let Some(token) = Self::$func(value) {
token
} else
)*
{todo!("Unexpected input: {value:#?} (Tokenization failure)")}
}
}
};
}
/// A [Token] is a [semantically tagged](Type) sequence of characters
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'text> {
/// The type of this token
variant: Type,
/// The sub[str]ing corresponding to this token
lexeme: &'text str,
}
impl<'text> Token<'text> {
/// Returns the [Type] of this [Token]
pub fn variant(&self) -> Type { self.variant }
/// Returns the lexeme (originating string slice) of this token
pub fn lexeme(&self) -> &'text str { self.lexeme }
/// Parses this [Token] into another type
pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
where F: std::str::FromStr {
self.lexeme.parse()
}
/// Returns whether the Lexeme is the expected [Type]
pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected }
/// Returns the length of [Self::lexeme] in bytes.
pub fn len(&self) -> usize { self.lexeme.len() }
/// Returns `true` if [Self::lexeme] has a length of zero bytes.
pub fn is_empty(&self) -> bool { self.lexeme.is_empty() }
}
impl<'text> Debug for Token<'text> {
mod display {
//! Implementations of [Display] for [token](super) types.
use super::*;
use std::fmt::Display;
impl<'t> Display for Token<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list().entry(&self.variant).entry(&self.lexeme).finish()
let Self { lexeme, kind, pos: _ } = self;
match kind {
TokenKind::Comment
| TokenKind::Directive
| TokenKind::Identifier
| TokenKind::String => {
write!(f, "{}", lexeme)
}
}
impl<'text> Display for Token<'text> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.variant {
Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f),
v => write!(f, "{v} \"{}\"", self.lexeme),
ty => ty.fmt(f),
}
}
}
/// A [token Type](Type) is a semantic tag for a sequence of characters
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Type {
/// contiguous whitespace, excluding newline
Space,
/// newline and contiguous whitespace
Endl,
/// A line-comment
Comment,
/// Jump label *definition*
Label,
/// Instructions
Insn,
/// Operand width is byte
ByteWidth,
/// Operand width is word
WordWidth,
/// Register mnemonic (i.e. `pc`, `r14`)
Register,
/// Marker for base-10
RadixMarkerDec,
/// Marker for base-16
RadixMarkerHex,
/// Marker for base-8
RadixMarkerOct,
/// Marker for base-2
RadixMarkerBin,
/// 1-4 hexadigit numbers only
Number,
/// Negative number marker
Minus,
/// post-increment mode marker
Plus,
/// Open-Indexed-Mode marker
LParen,
/// Close-Indexed-Mode marker
RParen,
/// Open Square Bracket
LBracket,
/// Closed Square Bracket
RBracket,
/// Indirect mode marker
Indirect,
/// absolute address marker
Absolute,
/// immediate value marker
Immediate,
/// Valid identifier. Identifiers must start with a Latin alphabetic character or underline
Identifier,
/// A string, encased in "quotes"
String,
/// Assembler directive
Directive,
/// Separator (comma)
Separator,
/// End of File marker
#[default]
EndOfFile,
/// Invalid token
Invalid,
}
regex_impl! {<'text> Token<'text> {
pub fn expect_space(text: &str) -> Option<Self> {
regex!(Type::Space = r"^[\s--\n]+")
}
pub fn expect_endl(text: &str) -> Option<Self> {
regex!(Type::Endl = r"^\n[\s--\n]*")
}
pub fn expect_comment(text: &str) -> Option<Self> {
regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*")
}
pub fn expect_label(text: &str) -> Option<Self> {
regex!(Type::Label = r"^:")
}
pub fn expect_insn(text: &str) -> Option<Self> {
regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)")
}
pub fn expect_byte_width(text: &str) -> Option<Self> {
regex!(Type::ByteWidth = r"(?i)^\.b")
}
pub fn expect_word_width(text: &str) -> Option<Self> {
regex!(Type::WordWidth = r"(?i)^\.w")
}
pub fn expect_register(text: &str) -> Option<Self> {
// old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)")
}
pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerDec = r"(?i)^0d")
}
pub fn expect_radix_marker_hex(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)")
}
pub fn expect_radix_marker_oct(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerOct = r"(?i)^0o")
}
pub fn expect_radix_marker_bin(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerBin = r"(?i)^0b")
}
pub fn expect_number(text: &str) -> Option<Self> {
regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)")
}
pub fn expect_minus(text: &str) -> Option<Self> {
regex!(Type::Minus = r"^-")
}
pub fn expect_plus(text: &str) -> Option<Self> {
regex!(Type::Plus = r"^\+")
}
pub fn expect_l_paren(text: &str) -> Option<Self> {
regex!(Type::LParen = r"^\(")
}
pub fn expect_r_paren(text: &str) -> Option<Self> {
regex!(Type::RParen = r"^\)")
}
pub fn expect_l_bracket(text: &str) -> Option<Self> {
regex!(Type::LBracket = r"^\[")
}
pub fn expect_r_bracket(text: &str) -> Option<Self> {
regex!(Type::RBracket = r"^]")
}
pub fn expect_indrect(text: &str) -> Option<Self> {
regex!(Type::Indirect = r"^@")
}
pub fn expect_absolute(text: &str) -> Option<Self> {
regex!(Type::Absolute = r"^&")
}
pub fn expect_immediate(text: &str) -> Option<Self> {
regex!(Type::Immediate = r"^#")
}
pub fn expect_string(text: &str) -> Option<Self> {
regex!(Type::String = r#"^"[^"]*""#)
}
pub fn expect_directive(text: &str) -> Option<Self> {
regex!(Type::Directive = r"^\.\w+")
}
pub fn expect_identifier(text: &str) -> Option<Self> {
regex!(Type::Identifier = r"^[A-Za-z_]\w*")
}
pub fn expect_separator(text: &str) -> Option<Self> {
regex!(Type::Separator = r"^,")
}
pub fn expect_end_of_file(text: &str) -> Option<Self> {
regex!(Type::EndOfFile = r"^$")
}
pub fn expect_anything(text: &str) -> Option<Self> {
regex!(Type::Invalid = r"^.*")
}
}}
impl Display for Type {
impl Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Space => Display::fmt("space", f),
Self::Endl => Display::fmt("newline", f),
Self::Comment => Display::fmt("comment", f),
Self::Label => Display::fmt("label definition", f),
Self::Insn => Display::fmt("opcode", f),
Self::ByteWidth => Display::fmt("byte-width", f),
Self::WordWidth => Display::fmt("word-width", f),
Self::Register => Display::fmt("register", f),
Self::RadixMarkerDec => Display::fmt("decimal marker", f),
Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f),
Self::RadixMarkerOct => Display::fmt("octal marker", f),
Self::RadixMarkerBin => Display::fmt("binary marker", f),
Self::Number => Display::fmt("number", f),
Self::Minus => Display::fmt("minus sign", f),
Self::Plus => Display::fmt("plus sign", f),
Self::LParen => Display::fmt("left parenthesis", f),
Self::RParen => Display::fmt("right parenthesis", f),
Self::LBracket => Display::fmt("left bracket", f),
Self::RBracket => Display::fmt("right bracket", f),
Self::Indirect => Display::fmt("indirect", f),
Self::Absolute => Display::fmt("absolute", f),
Self::Immediate => Display::fmt("immediate", f),
Self::Identifier => Display::fmt("identifier", f),
Self::String => Display::fmt("string", f),
Self::Directive => Display::fmt("directive", f),
Self::Separator => Display::fmt("comma", f),
Self::EndOfFile => Display::fmt("EOF", f),
Self::Invalid => Display::fmt("invalid token", f),
TokenKind::Eof => write!(f, "[EOF]"),
TokenKind::Newline => writeln!(f),
TokenKind::OpenParen => write!(f, "("),
TokenKind::CloseParen => write!(f, ")"),
TokenKind::OpenCurly => write!(f, "{{"),
TokenKind::CloseCurly => write!(f, "}}"),
TokenKind::OpenBrace => write!(f, "["),
TokenKind::CloseBrace => write!(f, "]"),
TokenKind::Comma => write!(f, ","),
TokenKind::Colon => write!(f, ":"),
TokenKind::Bang => write!(f, "!"),
TokenKind::At => write!(f, "@"),
TokenKind::Amp => write!(f, "&"),
TokenKind::Bar => write!(f, "|"),
TokenKind::Caret => write!(f, "^"),
TokenKind::Star => write!(f, "*"),
TokenKind::Hash => write!(f, "#"),
TokenKind::Dollar => write!(f, "$"),
TokenKind::Percent => write!(f, "%"),
TokenKind::Plus => write!(f, "+"),
TokenKind::Minus => write!(f, "-"),
TokenKind::Slash => write!(f, "/"),
TokenKind::Lsh => write!(f, "<<"),
TokenKind::Rsh => write!(f, ">>"),
TokenKind::Comment => write!(f, "; "),
TokenKind::Directive => write!(f, "."),
TokenKind::Identifier => write!(f, "Identifier"),
TokenKind::Number(val, 2) => write!(f, "0b{val:b}"),
TokenKind::Number(val, 8) => write!(f, "0o{val:o}"),
TokenKind::Number(val, 16) => write!(f, "0x{val:x}"),
TokenKind::Number(val, _) => write!(f, "{val}"),
TokenKind::Char(c) => write!(f, "'{c}'"),
TokenKind::String => write!(f, "\"String\""),
TokenKind::Reg(kw) => write!(f, "{kw}"),
TokenKind::NoEm(kw) => write!(f, "{kw}"),
TokenKind::OneEm(kw) => write!(f, "{kw}"),
TokenKind::Special(kw) => write!(f, "{kw}"),
TokenKind::OneArg(kw) => write!(f, "{kw}"),
TokenKind::TwoArg(kw) => write!(f, "{kw}"),
TokenKind::Jump(kw) => write!(f, "{kw}"),
TokenKind::Byte => write!(f, ".b"),
TokenKind::Word => write!(f, ".w"),
}
}
}
/// A [Token] which can outlive its parent buffer
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct OwnedToken {
/// The type of this token
variant: Type,
/// The sub[String] corresponding to this token
lexeme: String,
}
impl Display for OwnedToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) }
}
impl<'t> From<&'t OwnedToken> for Token<'t> {
fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } }
}
impl From<Token<'_>> for OwnedToken {
fn from(value: Token<'_>) -> Self {
let Token { variant, lexeme } = value;
OwnedToken { variant, lexeme: lexeme.to_owned() }
}
}
/// [Types] are an owned array of [types](Type), with a custom [Display] implementation
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Types(Vec<Type>);
impl<T: AsRef<[Type]>> From<T> for Types {
// TODO: Possibly bad. Check out in rust playground.
fn from(value: T) -> Self { Self(value.as_ref().to_owned()) }
}
impl Display for Types {
impl Display for Reg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (idx, t) in self.0.iter().enumerate() {
Display::fmt(t, f)?;
match idx {
i if i < self.0.len() - 2 => Display::fmt(", ", f)?,
i if i < self.0.len() - 1 => Display::fmt(" or ", f)?,
_ => (),
match self {
Reg::PC => "pc".fmt(f),
Reg::SP => "sp".fmt(f),
Reg::SR => "sr".fmt(f),
Reg::CG => "cg".fmt(f),
Reg::R4 => "r4".fmt(f),
Reg::R5 => "r5".fmt(f),
Reg::R6 => "r6".fmt(f),
Reg::R7 => "r7".fmt(f),
Reg::R8 => "r8".fmt(f),
Reg::R9 => "r9".fmt(f),
Reg::R10 => "r10".fmt(f),
Reg::R11 => "r11".fmt(f),
Reg::R12 => "r12".fmt(f),
Reg::R13 => "r13".fmt(f),
Reg::R14 => "r14".fmt(f),
Reg::R15 => "r15".fmt(f),
}
}
}
impl Display for NoEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
NoEm::Nop => "nop".fmt(f),
NoEm::Ret => "ret".fmt(f),
NoEm::Clrc => "clrc".fmt(f),
NoEm::Clrz => "clrz".fmt(f),
NoEm::Clrn => "clrn".fmt(f),
NoEm::Setc => "setc".fmt(f),
NoEm::Setz => "setz".fmt(f),
NoEm::Setn => "setn".fmt(f),
NoEm::Dint => "dint".fmt(f),
NoEm::Eint => "eint".fmt(f),
}
}
}
impl Display for OneEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OneEm::Pop => "pop".fmt(f),
OneEm::Rla => "rla".fmt(f),
OneEm::Rlc => "rlc".fmt(f),
OneEm::Inv => "inv".fmt(f),
OneEm::Clr => "clr".fmt(f),
OneEm::Tst => "tst".fmt(f),
OneEm::Dec => "dec".fmt(f),
OneEm::Decd => "decd".fmt(f),
OneEm::Inc => "inc".fmt(f),
OneEm::Incd => "incd".fmt(f),
OneEm::Adc => "adc".fmt(f),
OneEm::Dadc => "dadc".fmt(f),
OneEm::Sbc => "sbc".fmt(f),
}
}
}
impl Display for Special {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Special::Br => "br".fmt(f),
}
}
}
impl Display for OneArg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OneArg::Rrc => "rrc".fmt(f),
OneArg::Swpb => "swpb".fmt(f),
OneArg::Rra => "rra".fmt(f),
OneArg::Sxt => "sxt".fmt(f),
OneArg::Push => "push".fmt(f),
OneArg::Call => "call".fmt(f),
OneArg::Reti => "reti".fmt(f),
}
}
}
impl Display for TwoArg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TwoArg::Mov => "mov".fmt(f),
TwoArg::Add => "add".fmt(f),
TwoArg::Addc => "addc".fmt(f),
TwoArg::Subc => "subc".fmt(f),
TwoArg::Sub => "sub".fmt(f),
TwoArg::Cmp => "cmp".fmt(f),
TwoArg::Dadd => "dadd".fmt(f),
TwoArg::Bit => "bit".fmt(f),
TwoArg::Bic => "bic".fmt(f),
TwoArg::Bis => "bis".fmt(f),
TwoArg::Xor => "xor".fmt(f),
TwoArg::And => "and".fmt(f),
}
}
}
impl Display for Jump {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Jump::Jne => "jne".fmt(f),
Jump::Jnz => "jnz".fmt(f),
Jump::Jeq => "jeq".fmt(f),
Jump::Jz => "jz".fmt(f),
Jump::Jnc => "jnc".fmt(f),
Jump::Jlo => "jlo".fmt(f),
Jump::Jc => "jc".fmt(f),
Jump::Jhs => "jhs".fmt(f),
Jump::Jn => "jn".fmt(f),
Jump::Jge => "jge".fmt(f),
Jump::Jl => "jl".fmt(f),
Jump::Jmp => "jmp".fmt(f),
}
}
Ok(())
}
}

View File

@ -1,85 +0,0 @@
// © 2023 John Breaux
//! A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
use super::*;
use super::ignore::Ignore;
use super::preprocessed::Preprocessed;
/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> + std::fmt::Debug {
/// Gets this stream's [Context]
fn context(&self) -> Context;
/// Creates an iterator that skips [Type::Space] in the input
#[inline]
fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self>
where Self: Sized {
Ignore::new(variant, self)
}
/// Creates a [TokenStream] that performs live substitution of the input
#[inline]
fn preprocessed(&'text mut self) -> Preprocessed<'text, Self>
where Self: Sized {
Preprocessed::new(self)
}
/// Returns the next [Token] without advancing
fn peek(&mut self) -> Self::Item;
/// Returns the next [Token] if it is of the expected [Type], without advancing
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
/// Consumes and returns a [Token] if it is the expected [Type]
///
/// Otherwise, does not consume a [Token]
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
/// Ignores a [Token] of the expected [Type], propegating errors.
#[inline]
fn require(&mut self, expected: Type) -> Result<(), LexError> { self.expect(expected).map(|_| ()) }
/// Ignores a [Token] of the expected [Type], discarding errors.
#[inline]
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
/// Runs a function on each
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, LexError>, expected: T) -> Result<U, LexError>
where T: AsRef<[Type]> {
for &expected in expected.as_ref() {
match f(self, expected).map_err(|e| e.bare()) {
Ok(t) => return Ok(t),
Err(LexError::UnexpectedToken { .. }) => continue,
Err(e) => return Err(e.context(self.context())),
}
}
Err(LexError::expected(expected, self.peek()).context(self.context()))
}
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
#[inline]
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
where T: AsRef<[Type]> {
self.any_of(Self::peek_expect, expected)
}
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
///
/// Otherwise, does not consume a [Token]
#[inline]
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
where T: AsRef<[Type]> {
self.any_of(Self::expect, expected)
}
/// Ignores a [Token] of any expected [Type], discarding errors.
#[inline]
fn allow_any_of<T>(&mut self, expected: T)
where T: AsRef<[Type]> {
let _ = self.expect_any_of(expected);
}
/// Ignores a [Token] of any expected [Type], propegating errors.
#[inline]
fn require_any_of<T>(&mut self, expected: T) -> Result<(), LexError>
where T: AsRef<[Type]> {
self.any_of(Self::require, expected)
}
}

View File

@ -54,23 +54,55 @@
//! └─ EndOfFile
//! ```
pub mod preamble {
//! Common imports for msp430-asm
use super::*;
pub use assembler::Assembler;
pub use error::Error;
pub use lexer::{
context::Context,
token::{Token, Type},
token_stream::TokenStream,
Tokenizer,
pub mod util {
use std::{
fmt::{Debug, Display},
ops::{Index, Range},
};
pub use parser::Parser;
/// A <code> [Clone] + [Copy] + [!Iterator](Iterator) <\code> version of a [Range]
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Span<Idx> {
pub start: Idx,
pub end: Idx,
}
impl<Idx> From<Span<Idx>> for Range<Idx> {
fn from(value: Span<Idx>) -> Self {
value.start..value.end
}
}
impl<Idx> From<Range<Idx>> for Span<Idx> {
fn from(value: Range<Idx>) -> Self {
Self { start: value.start, end: value.end }
}
}
impl<T> Index<Span<usize>> for [T] {
type Output = [T];
fn index(&self, index: Span<usize>) -> &Self::Output {
self.index(Range::from(index))
}
}
impl Index<Span<usize>> for str {
type Output = str;
fn index(&self, index: Span<usize>) -> &Self::Output {
self.index(Range::from(index))
}
}
impl<Idx: Debug> Debug for Span<Idx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}..{:?}", self.start, self.end)
}
}
impl<Idx: Display> Display for Span<Idx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}..{}", self.start, self.end)
}
}
}
use preamble::*;
pub mod error;
pub mod lexer;
pub mod preprocessor;
pub mod parser;
pub mod assembler;
pub mod lexer;
pub mod parser;

View File

@ -1,81 +1,591 @@
// © 2023 John Breaux
//! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root)
// © 2023-2024 John Breaux
//! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast)
pub mod ast;
use crate::{TokenStream, Type};
use error::ParseError;
use preamble::*;
use std::{
fmt::{Debug, Display},
path::Path,
use self::error::{
Error,
ErrorKind::{self, *},
PResult, Parsing,
};
pub mod preamble {
//! All the different AST node types
use super::*;
// Traits
pub use parsable::Parsable;
// Nodes
pub use comment::Comment;
pub use directive::Directive;
pub use identifier::Identifier;
pub use instruction::{
encoding::{
encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand,
register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding,
use crate::{
lexer::{
token::{Reg, Special, Token, TokenKind as Kind},
Lexer,
},
opcode::Opcode,
Instruction,
preprocessor::Preprocessor,
util::Span,
};
use ast::*;
#[derive(Clone, Debug)]
pub struct Parser<'t> {
lexer: Preprocessor<'t>,
next: Option<Token<'t>>,
loc: Span<usize>,
}
impl<'t> Parser<'t> {
/// Creates a new [Parser]
pub fn new(text: &'t str) -> Self {
let lexer = Preprocessor::new(text);
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
}
/// Createes a new [Parser] from an existing [Lexer]
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
let lexer = Preprocessor::with_lexer(lexer);
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
}
pub fn parse<T: Parsable<'t>>(&mut self) -> PResult<T> {
Parsable::parse(self)
}
pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error {
Error { parsing, kind, loc: self.loc }
}
/// Peek a token out of the lexer
pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> {
if self.next.is_none() {
self.next = self.lexer.scan();
}
self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p))
}
pub fn next(&mut self, p: Parsing) -> PResult<Token<'t>> {
Ok(match self.take() {
Some(token) => token,
None => {
self.peek(p)?;
self.take().expect("should have been populated by peek")
}
})
}
/// Consumes the next token
pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> {
match self.peek(p)?.kind {
kind if kind == expect => {
self.take();
Ok(self)
}
kind => Err(self.error(Unexpected(kind), p)),
}
}
/// Consumes the next token without checking it
pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> {
self.next(p)?;
Ok(self)
}
/// Take the last peeked token
pub fn take(&mut self) -> Option<Token<'t>> {
self.next.take()
}
}
// Expressions
impl<'t> Parser<'t> {
/// Parses an expression
pub fn expr(&mut self) -> PResult<Expr<'t>> {
self.term()
}
/// Parses a term-expression (binary `*`mul, `/`div, `%`rem)
pub fn term(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.factor()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)),
Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)),
Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a factor expression (binary `+`add, `-`sub)
pub fn factor(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.shift()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)),
Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right)
pub fn shift(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.bin()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)),
Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
pub fn bin(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.unary()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)),
Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)),
Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a unary expression (`!`invert, `-`negate)
pub fn unary(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let mut ops = vec![];
loop {
match self.peek(p)?.kind {
Kind::Star => ops.push(UnOp::Deref),
Kind::Minus => ops.push(UnOp::Neg),
Kind::Bang => ops.push(UnOp::Not),
_ if ops.is_empty() => break Ok(self.primary()?),
_ => break Ok(Expr::Unary(ops, self.primary()?.into())),
}
self.take();
}
}
/// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier
pub fn primary(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let Token { lexeme, kind, .. } = *self.peek(p)?;
Ok(match kind {
Kind::OpenParen => {
let out = Expr::Group(self.then(p)?.parse()?);
self.assert(Kind::CloseParen, p)?;
out
}
Kind::Number(n, _) => {
self.take();
Expr::Number(n)
}
Kind::Identifier => {
self.take();
Expr::Ident(lexeme)
}
Kind::Amp => self.then(p)?.addrof()?,
ty => Err(self.error(NonNumeric(ty), p))?,
})
}
pub fn addrof(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let token = self.peek(p)?;
let out = match token.kind {
Kind::Identifier => Expr::AddrOf(token.lexeme),
Kind::Number(n, _) => Expr::Number(n),
ty => Err(self.error(Unexpected(ty), p))?,
};
pub use label::Label;
pub use line::Line;
pub use root::Root;
// Error
pub use error::ParseError;
self.take();
Ok(out)
}
}
pub mod parsable;
pub mod comment;
pub mod directive;
pub mod error;
pub mod identifier;
pub mod instruction;
pub mod label;
pub mod line;
pub mod root;
pub struct Parser {
radix: u32,
pub trait Parsable<'t>: Sized {
fn parse(p: &mut Parser<'t>) -> PResult<Self>;
}
impl Parser {
pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result<Root, ParseError> {
Root::parse(&self, &mut stream.ignore(Type::Space))
impl<'t> Parsable<'t> for Statements<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let mut stmts = vec![];
while p.peek(Parsing::File)?.kind != Kind::Eof {
stmts.push(p.parse()?)
}
pub fn parse<T>(self, input: &T) -> Result<Root, ParseError>
where T: AsRef<str> + ?Sized {
Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
Ok(Self { stmts })
}
pub fn parse_file<P>(self, path: &P) -> Result<Root, ParseError>
where P: AsRef<Path> + ?Sized {
self.parse(&std::fs::read_to_string(path.as_ref())?).map(|r| r.set_file(path.as_ref().into()))
}
pub fn parse_one<T>(self, input: &T) -> Result<Line, ParseError>
where T: AsRef<str> + ?Sized {
Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
}
/// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number]
/// conversion
pub fn radix(mut self, radix: u32) { self.radix = radix; }
}
impl Default for Parser {
fn default() -> Self { Self { radix: 16 } }
impl<'t> Parsable<'t> for Statement<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let token = *p.peek(Parsing::Stmt)?;
Ok(match token.kind {
Kind::Comment => {
p.take();
Statement::Comment(token.lexeme)
}
Kind::Directive => Statement::Directive(p.parse()?),
Kind::Identifier => Statement::Label(p.label()?),
_ => Statement::Insn(p.parse()?),
})
}
}
impl<'t> Parsable<'t> for Directive<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Directive;
let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?;
let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) };
p.take();
Ok(match lexeme {
".define" => Directive::Define(p.parse()?),
".org" => Directive::Org(p.expr()?.into()),
".word" => Directive::Word(p.parse()?),
".words" => Directive::Words(p.parse()?),
".string" => Directive::String(p.string()?),
_ => Err(p.error(Unexpected(Kind::Directive), parsing))?,
})
}
}
impl<'t> Parsable<'t> for Vec<Token<'t>> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Directive;
let mut tokens = vec![];
loop {
if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind {
break;
}
tokens.push(p.next(parsing)?)
}
p.take();
Ok(tokens)
}
}
impl<'t> Parsable<'t> for Instruction<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let start = p.peek(Parsing::Instruction)?.pos.start;
Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } })
}
}
impl<'t> Parsable<'t> for InstructionKind<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
use crate::lexer::token::OneArg;
// an instruction starts with an opcode
Ok(match p.peek(Parsing::Instruction)?.kind() {
Kind::NoEm(_) => Self::NoEm(p.parse()?),
Kind::OneEm(_) => Self::OneEm(p.parse()?),
Kind::Special(Special::Br) => Self::Br(p.parse()?),
Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?),
Kind::OneArg(_) => Self::OneArg(p.parse()?),
Kind::TwoArg(_) => Self::TwoArg(p.parse()?),
Kind::Jump(_) => Self::Jump(p.parse()?),
ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?,
})
}
}
impl<'t> Parsable<'t> for NoEm {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
match p.next(Parsing::NoEm)?.kind {
Kind::NoEm(opcode) => Ok(Self { opcode }),
ty => Err(p.error(Unexpected(ty), Parsing::NoEm)),
}
}
}
impl<'t> Parsable<'t> for OneEm<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Self {
opcode: match p.next(Parsing::OneEm)?.kind {
Kind::OneEm(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?,
},
width: p.parse()?,
dst: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for OneArg<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Self {
opcode: match p.next(Parsing::OneArg)?.kind {
Kind::OneArg(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?,
},
width: p.parse()?,
src: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for TwoArg<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::TwoArg;
Ok(Self {
opcode: match p.next(parsing)?.kind {
Kind::TwoArg(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
width: p.parse()?,
src: p.parse()?,
dst: p.assert(Kind::Comma, parsing)?.parse()?,
})
}
}
impl<'t> Parsable<'t> for Jump<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Jump;
Ok(Self {
opcode: match p.next(parsing)?.kind {
Kind::Jump(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
dst: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for Reti {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
use crate::lexer::token::OneArg;
p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?;
Ok(Reti)
}
}
impl<'t> Parsable<'t> for Br<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
p.assert(Kind::Special(Special::Br), Parsing::Br)?;
Ok(Self { src: p.parse()? })
}
}
impl Debug for Parser {
impl<'t> Parsable<'t> for Src<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Src;
Ok(match p.peek(parsing)?.kind {
Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special
Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?), // &addr
Kind::At => {
let reg = match p.then(parsing)?.next(parsing)?.kind {
Kind::Reg(r) => r,
ty => Err(p.error(Unexpected(ty), parsing))?,
};
if let Kind::Plus = p.peek(parsing)?.kind {
p.take();
Src::PostInc(reg)
} else {
Src::Indirect(reg)
}
} // @reg+, @reg
Kind::Reg(_) => Src::Direct(p.parse()?),
_ => {
let expr = p.parse()?;
match p.peek(parsing)?.kind {
Kind::OpenParen => Src::Indexed(expr, {
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
p.assert(Kind::CloseParen, parsing)?;
reg
}),
_ => Src::BareExpr(expr),
}
}
})
}
}
impl<'t> Parsable<'t> for Dst<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Dst;
Ok(match p.peek(parsing)?.kind {
Kind::Hash => match p.then(parsing)?.next(parsing)?.kind {
Kind::Number(0, _) => Dst::Special(DstSpecial::Zero),
Kind::Number(1, _) => Dst::Special(DstSpecial::One),
Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?),
Kind::Reg(_) => Dst::Direct(p.parse()?),
_ => Dst::Indexed(p.expr()?.into(), {
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
p.assert(Kind::CloseParen, parsing)?;
reg
}),
})
}
}
impl<'t> Parsable<'t> for JumpDst<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Jump;
let mut neg = false;
let out = loop {
let token = p.peek(parsing)?;
match token.kind {
Kind::Minus => {
neg = !neg;
}
Kind::Plus => {}
Kind::Identifier => break Self::Label(token.lexeme),
Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }),
ty => Err(p.error(Unexpected(ty), parsing))?,
}
p.take();
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Width {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let out = match p.peek(Parsing::Width)?.kind() {
Kind::Byte => Width::Byte,
Kind::Word => Width::Word,
_ => return Ok(Width::Word),
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Reg {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let out = match p.peek(Parsing::Reg)?.kind {
Kind::Reg(r) => r,
ty => Err(p.error(Unexpected(ty), Parsing::Reg))?,
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Expr<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
p.expr()
}
}
impl<'t, T: Parsable<'t>> Parsable<'t> for Box<T> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Box::new(p.parse()?))
}
}
impl<'t, T: Parsable<'t>> Parsable<'t> for Vec<T> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Vec;
p.assert(Kind::OpenBrace, parsing)?;
let mut out = vec![];
while Kind::CloseBrace != p.peek(parsing)?.kind {
out.push(p.parse()?)
}
p.assert(Kind::CloseBrace, parsing)?;
Ok(out)
}
}
/// Context-sensitive parsing rules
impl<'t> Parser<'t> {
pub fn string(&mut self) -> PResult<&'t str> {
let token = *self.peek(Parsing::Directive)?;
match token.kind {
Kind::String => {
self.take();
Ok(&token.lexeme[1..token.lexeme.len() - 1])
}
ty => Err(self.error(Unexpected(ty), Parsing::Directive)),
}
}
pub fn label(&mut self) -> PResult<&'t str> {
let p = Parsing::Label;
let token = self.next(p)?;
assert_eq!(Kind::Identifier, token.kind);
self.assert(Kind::Colon, p)?;
Ok(token.lexeme)
}
pub fn reg(&mut self) -> PResult<Reg> {
match self.peek(Parsing::Reg)?.kind {
Kind::Reg(r) => {
self.take();
Ok(r)
}
ty => Err(self.error(Unexpected(ty), Parsing::Reg)),
}
}
}
pub mod error {
use super::Kind;
use crate::util::Span;
use std::{fmt::Display, num::TryFromIntError};
pub type PResult<T> = Result<T, Error>;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Error {
pub parsing: Parsing,
pub kind: ErrorKind,
pub loc: Span<usize>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
LexError,
/// Returned when [Parsing::ConstExpr] fails without consuming
NotExpr,
DivZero,
NonNumeric(Kind),
BadIntForDst(u16),
TryFromIntError(TryFromIntError),
Unexpected(Kind),
BufEmpty,
Todo,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Parsing {
File,
Stmt,
Label,
Directive,
Instruction,
NoEm,
OneEm,
Reti,
Br,
OneArg,
TwoArg,
Jump,
Width,
Src,
Dst,
Reg,
Expr,
Vec,
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive()
write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing)
}
}
impl Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorKind::LexError => write!(f, "lexical error"),
ErrorKind::TryFromIntError(e) => write!(f, "{e}"),
ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"),
ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"),
ErrorKind::DivZero => write!(f, "Division by zero"),
ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"),
ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"),
ErrorKind::BufEmpty => write!(f, "Peek buffer empty"),
ErrorKind::Todo => write!(f, "Not yet implemented"),
}
}
}
impl Display for Parsing {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Parsing::File => "a file".fmt(f),
Parsing::Stmt => "a line".fmt(f),
Parsing::Label => "a label".fmt(f),
Parsing::Directive => "a directive".fmt(f),
Parsing::Instruction => "an instruction".fmt(f),
Parsing::NoEm => "a no-operand emulated instruction".fmt(f),
Parsing::OneEm => "a one-operand emulated instruction".fmt(f),
Parsing::Reti => "a `reti` instruction".fmt(f),
Parsing::Br => "a `br` instruction".fmt(f),
Parsing::OneArg => "a one-operand instruction".fmt(f),
Parsing::TwoArg => "a two-operand instruction".fmt(f),
Parsing::Jump => "a jump instruction".fmt(f),
Parsing::Width => "an instruction width".fmt(f),
Parsing::Src => "a source".fmt(f),
Parsing::Dst => "a destination".fmt(f),
Parsing::Reg => "a register".fmt(f),
Parsing::Expr => "a constant expression".fmt(f),
Parsing::Vec => "a list".fmt(f),
}
}
}
impl std::error::Error for Error {}
}

679
src/parser/ast.rs Normal file
View File

@ -0,0 +1,679 @@
// © 2023-2024 John Breaux
/// Represents MSP430 instructions,
use crate::{
lexer::token::{self, Reg, Token},
util::Span,
};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Statements<'t> {
pub stmts: Vec<Statement<'t>>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Statement<'t> {
Label(&'t str),
Insn(Instruction<'t>),
Directive(Directive<'t>),
Comment(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Directive<'t> {
/// TODO: Store define as a vec of tokens. This will require help from the
/// [preprocessor](crate::preprocessor)
Define(Vec<Token<'t>>),
Org(Box<Expr<'t>>),
Word(Box<Expr<'t>>),
Words(Vec<Expr<'t>>),
String(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Instruction<'t> {
pub span: Span<usize>,
pub kind: InstructionKind<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum InstructionKind<'t> {
NoEm(NoEm),
OneEm(OneEm<'t>),
OneArg(OneArg<'t>),
TwoArg(TwoArg<'t>),
Jump(Jump<'t>),
Reti(Reti),
Br(Br<'t>),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct NoEm {
pub opcode: token::NoEm,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct OneEm<'t> {
pub opcode: token::OneEm,
pub width: Width,
pub dst: Dst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct OneArg<'t> {
pub opcode: token::OneArg,
pub width: Width,
pub src: Src<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct TwoArg<'t> {
pub opcode: token::TwoArg,
pub width: Width,
pub src: Src<'t>,
pub dst: Dst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Jump<'t> {
pub opcode: token::Jump,
pub dst: JumpDst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Reti;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Br<'t> {
pub src: Src<'t>,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub enum Width {
#[default]
Word,
Byte,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Src<'t> {
Direct(Reg),
Indexed(Box<Expr<'t>>, Reg),
Indirect(Reg),
PostInc(Reg),
Absolute(Box<Expr<'t>>),
Immediate(Box<Expr<'t>>),
Special(SrcSpecial),
BareExpr(Box<Expr<'t>>),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum SrcSpecial {
Zero,
One,
Four,
Two,
Eight,
NegOne,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Dst<'t> {
Direct(Reg),
Indexed(Box<Expr<'t>>, Reg),
Absolute(Box<Expr<'t>>),
Special(DstSpecial),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum DstSpecial {
Zero,
One,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum JumpDst<'t> {
/// A relative offset, nominally an even number from -0x400..=0x3fe
Rel(i16),
Label(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Expr<'t> {
Binary(Box<Expr<'t>>, Vec<(BinOp, Expr<'t>)>),
Unary(Vec<UnOp>, Box<Expr<'t>>),
Group(Box<Expr<'t>>),
Number(u16),
Ident(&'t str),
AddrOf(&'t str),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum BinOp {
Mul,
Div,
Rem,
Add,
Sub,
Lsh,
Rsh,
And,
Xor,
Or,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum UnOp {
Deref,
Not,
Neg,
}
pub mod conv {
//! Conversions between [ast](super) types, via [From], or via `new` constructor
use super::{InstructionKind as Ik, *};
macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$(
impl<'t> From<$src> for $dst {
fn from(value: $src) -> Self {
$expr(value)
}
}
)*}}
// sure am glad macros aren't hygenic over lifetimes
impl_from! { Ik<'t> {
NoEm => Ik::NoEm,
OneEm<'t> => Ik::OneEm,
OneArg<'t> => Ik::OneArg,
TwoArg<'t> => Ik::TwoArg,
Jump<'t> => Ik::Jump,
Reti => Ik::Reti,
Br<'t> => Ik::Br,
}}
impl_from! { Expr<'t> {
u16 => Expr::Number
}}
impl<'t> From<Dst<'t>> for Src<'t> {
fn from(value: Dst<'t>) -> Self {
match value {
Dst::Special(v) => Src::Special(v.into()),
Dst::Absolute(v) => Src::Absolute(v),
Dst::Indexed(i, r) => Src::Indexed(i, r),
Dst::Direct(r) => Src::Direct(r),
}
}
}
impl From<DstSpecial> for SrcSpecial {
fn from(value: DstSpecial) -> Self {
match value {
DstSpecial::Zero => SrcSpecial::Zero,
DstSpecial::One => SrcSpecial::One,
}
}
}
impl<'t> TwoArg<'t> {
pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self {
Self { opcode, width, src, dst }
}
}
}
pub mod display {
use super::*;
use std::fmt::Display;
impl<'t> Display for Statements<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for stmt in &self.stmts {
writeln!(f, "{stmt}")?;
}
Ok(())
}
}
impl<'t> Display for Statement<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Statement::Label(v) => write!(f, "{v}:"),
Statement::Insn(v) => write!(f, "{v}"),
Statement::Directive(v) => write!(f, "{v}"),
Statement::Comment(v) => write!(f, "{v}"),
}
}
}
impl<'t> Display for Directive<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Directive::Define(_) => write!(f, ".directive"),
Directive::Org(e) => write!(f, ".org {e}"),
Directive::Word(w) => write!(f, ".word {w}"),
Directive::Words(words) => {
write!(f, ".words [ ")?;
for word in words {
write!(f, "{word} ")?;
}
write!(f, "]")
}
Directive::String(s) => write!(f, ".string \"{s}\""),
}
}
}
impl<'t> Display for Instruction<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { span: _, kind } = self;
write!(f, "{kind}")
}
}
impl<'t> Display for InstructionKind<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InstructionKind::NoEm(v) => v.fmt(f),
InstructionKind::OneEm(v) => v.fmt(f),
InstructionKind::OneArg(v) => v.fmt(f),
InstructionKind::TwoArg(v) => v.fmt(f),
InstructionKind::Jump(v) => v.fmt(f),
InstructionKind::Reti(v) => v.fmt(f),
InstructionKind::Br(v) => v.fmt(f),
}
}
}
impl Display for NoEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode } = self;
write!(f, "{opcode}")
}
}
impl<'t> Display for OneEm<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, dst } = self;
write!(f, "{opcode}{width}\t{dst}")
}
}
impl<'t> Display for OneArg<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, src } = self;
write!(f, "{opcode}{width}\t{src}")
}
}
impl<'t> Display for TwoArg<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, src, dst } = self;
write!(f, "{opcode}{width}\t{src}, {dst}")
}
}
impl<'t> Display for Jump<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, dst } = self;
write!(f, "{opcode}\t{dst}")
}
}
impl Display for Reti {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "reti")
}
}
impl<'t> Display for Br<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { src } = self;
write!(f, "br\t{src}")
}
}
impl<'t> Display for Src<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Src::Direct(r) => write!(f, "{r}"),
Src::Indexed(e, r) => write!(f, "{e}({r})"),
Src::Indirect(r) => write!(f, "@{r}"),
Src::PostInc(r) => write!(f, "@{r}+"),
Src::Absolute(e) => write!(f, "&{e}"),
Src::Immediate(e) => write!(f, "#{e}"),
Src::Special(i) => write!(f, "#{i}"),
Src::BareExpr(id) => write!(f, "{id}"),
}
}
}
impl Display for SrcSpecial {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SrcSpecial::Zero => write!(f, "0"),
SrcSpecial::One => write!(f, "1"),
SrcSpecial::Four => write!(f, "4"),
SrcSpecial::Two => write!(f, "2"),
SrcSpecial::Eight => write!(f, "8"),
SrcSpecial::NegOne => write!(f, "-1"),
}
}
}
impl<'t> Display for Dst<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Dst::Direct(r) => write!(f, "{r}"),
Dst::Indexed(e, r) => write!(f, "{e}({r})"),
Dst::Absolute(e) => write!(f, "&{e}"),
Dst::Special(i) => write!(f, "#{i}"),
}
}
}
impl Display for DstSpecial {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DstSpecial::Zero => write!(f, "0"),
DstSpecial::One => write!(f, "1"),
}
}
}
impl<'t> Display for JumpDst<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
JumpDst::Rel(i) => write!(f, "{i}"),
JumpDst::Label(l) => write!(f, "{l}"),
}
}
}
impl<'t> Display for Expr<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expr::Binary(head, tail) => {
write!(f, "{head}")?;
for (op, tail) in tail {
write!(f, "{op}{tail}")?;
}
Ok(())
}
Expr::Unary(ops, tail) => {
for op in ops {
write!(f, "{op}")?
}
write!(f, "{tail}")
}
Expr::Group(e) => write!(f, "({e})"),
Expr::Number(n) => write!(f, "{n:x}"),
Expr::Ident(n) => write!(f, "{n}"),
Expr::AddrOf(n) => write!(f, "&{n}"),
}
}
}
impl Display for BinOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BinOp::Mul => write!(f, "*"),
BinOp::Div => write!(f, "/"),
BinOp::Rem => write!(f, "%"),
BinOp::Add => write!(f, "+"),
BinOp::Sub => write!(f, "-"),
BinOp::Lsh => write!(f, "<<"),
BinOp::Rsh => write!(f, ">>"),
BinOp::And => write!(f, "&"),
BinOp::Xor => write!(f, "^"),
BinOp::Or => write!(f, "|"),
}
}
}
impl Display for UnOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
UnOp::Deref => write!(f, "*"),
UnOp::Not => write!(f, "!"),
UnOp::Neg => write!(f, "-"),
}
}
}
impl Display for Width {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Width::Word => Ok(()),
Width::Byte => write!(f, ".b"),
}
}
}
}
pub mod canonical {
use std::iter;
use super::*;
use token::TwoArg::*;
pub trait Canonicalize {
/// The output after canonicalization
type Output;
/// Transmutes Self into its "canonical" form. "Emulated" instructions are converted
/// into their respective non-emulated forms.
fn to_canonical(self) -> Self::Output;
}
impl<'t> Canonicalize for Statements<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() }
}
}
impl<'t> Canonicalize for Statement<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Statement::Insn(i) => Self::Insn(i.to_canonical()),
_ => self,
}
}
}
impl<'t> Canonicalize for Instruction<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
Self { kind: self.kind.to_canonical(), ..self }
}
}
impl<'t> Canonicalize for InstructionKind<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Self::NoEm(v) => Self::TwoArg(v.to_canonical()),
Self::OneEm(v) => Self::TwoArg(v.to_canonical()),
Self::Reti(v) => Self::Reti(v.to_canonical()),
Self::Br(v) => Self::TwoArg(v.to_canonical()),
Self::OneArg(v) => Self::OneArg(v.to_canonical()),
Self::TwoArg(v) => Self::TwoArg(v.to_canonical()),
Self::Jump(v) => Self::Jump(v.to_canonical()),
}
}
}
impl Canonicalize for NoEm {
type Output = TwoArg<'static>;
fn to_canonical(self) -> Self::Output {
let Self { opcode } = self;
use SrcSpecial::*;
use Width::*;
match opcode {
token::NoEm::Nop => {
TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG))
}
token::NoEm::Ret => {
TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC))
}
token::NoEm::Clrc => {
TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR))
}
token::NoEm::Clrz => {
TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR))
}
token::NoEm::Clrn => {
TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR))
}
token::NoEm::Setc => {
TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR))
}
token::NoEm::Setz => {
TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR))
}
token::NoEm::Setn => {
TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR))
}
token::NoEm::Dint => {
TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
}
token::NoEm::Eint => {
TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
}
}
}
}
impl<'t> Canonicalize for OneEm<'t> {
type Output = TwoArg<'t>;
fn to_canonical(self) -> Self::Output {
use SrcSpecial::*;
let Self { opcode, width, dst } = self;
match opcode {
token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst),
token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst),
token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst),
token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst),
token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst),
token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst),
token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst),
token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst),
token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst),
token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst),
token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst),
token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst),
token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst),
}
}
}
impl<'t> Canonicalize for OneArg<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, width, src } = self;
Self {
opcode,
width: match opcode {
token::OneArg::Call => Width::Word,
_ => width,
},
src: src.to_canonical(),
}
}
}
impl<'t> Canonicalize for TwoArg<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, width, src, dst } = self;
Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() }
}
}
impl<'t> Canonicalize for Jump<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, dst } = self;
Self {
opcode: match opcode {
token::Jump::Jnz => token::Jump::Jne,
token::Jump::Jz => token::Jump::Jeq,
token::Jump::Jnc => token::Jump::Jlo,
token::Jump::Jc => token::Jump::Jhs,
t => t,
},
dst: dst.to_canonical(),
}
}
}
impl Canonicalize for Reti {
type Output = Self;
fn to_canonical(self) -> Self::Output {
self
}
}
impl<'t> Canonicalize for Br<'t> {
type Output = TwoArg<'t>;
fn to_canonical(self) -> Self::Output {
let Self { src } = self;
TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC))
}
}
impl<'t> Canonicalize for Src<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
use SrcSpecial::*;
match self {
Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self,
Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r),
Src::Absolute(e) => Src::Absolute(e.to_canonical().into()),
Src::Immediate(e) => match e.to_canonical() {
Expr::Number(0) => Src::Special(Zero),
Expr::Number(1) => Src::Special(One),
Expr::Number(2) => Src::Special(Two),
Expr::Number(4) => Src::Special(Four),
Expr::Number(8) => Src::Special(Eight),
Expr::Number(0xffff) => Src::Special(NegOne),
expr => Src::Immediate(expr.into()),
},
Src::BareExpr(_) => self,
}
}
}
impl<'t> Canonicalize for Dst<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Dst::Direct(_) | Dst::Special(_) => self,
Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r),
Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()),
}
}
}
impl<'t> Canonicalize for JumpDst<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
self
}
}
impl<'t> Canonicalize for Expr<'t> {
type Output = Self;
/// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number],
/// this returns a single [Expr::Number]. If not, it evaluates until
/// it runs into an unevaluatable leaf.
fn to_canonical(self) -> Self::Output {
match self {
Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self,
Expr::Group(e) => e.to_canonical(),
Expr::Unary(ops, tail) => {
let mut tail = match tail.to_canonical() {
Expr::Number(n) => n,
other => return other,
};
// If the tail is dereferenced, canonicalization must halt,
// since we have no knowledge of memory layout
let mut ops = ops.into_iter();
for op in ops.by_ref() {
tail = match op {
UnOp::Deref => {
return Expr::Unary(
iter::once(op).chain(ops).collect(),
Box::new(tail.into()),
)
}
UnOp::Not => !tail,
UnOp::Neg => 0u16.wrapping_sub(tail),
}
}
Expr::Number(tail)
}
Expr::Binary(head, tails) => {
let mut head = match head.to_canonical() {
Expr::Number(n) => n,
head => return Expr::Binary(head.into(), tails),
};
let mut tails = tails.into_iter();
for (op, tail) in &mut tails {
let tail = tail.to_canonical();
// If the canonical tail isn't a number, rebuild and return
let Expr::Number(tail) = tail else {
return Expr::Binary(
Box::new(head.into()),
iter::once((op, tail)).chain(tails).collect(),
);
};
head = match op {
BinOp::Mul => head.wrapping_mul(tail),
BinOp::Div => head.wrapping_div(tail),
BinOp::Rem => head.wrapping_rem(tail),
BinOp::Add => head.wrapping_add(tail),
BinOp::Sub => head.wrapping_sub(tail),
BinOp::Lsh => head.wrapping_shl(tail as u32),
BinOp::Rsh => head.wrapping_shr(tail as u32),
BinOp::And => head & tail,
BinOp::Xor => head ^ tail,
BinOp::Or => head | tail,
};
}
Expr::Number(head)
}
}
}
}
}

View File

@ -1,15 +0,0 @@
// © 2023 John Breaux
//! A [`Comment`] stores the contents of a line comment, including the preceding `;` or `//`
use super::*;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Comment(pub String);
impl Parsable for Comment {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(Self(stream.expect(Type::Comment)?.lexeme().to_string()))
}
}
impl Display for Comment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) }
}

View File

@ -1,90 +0,0 @@
// © 2023 John Breaux
//! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and
//! [Linker](crate::Linker)
use std::path::PathBuf;
use super::*;
use crate::lexer::token::OwnedToken;
// TODO: Parse each kind of *postprocessor* directive into an AST node
// - .org 8000: Directive::Org { base: Number }
// - .define ident tt... Directive::Define { } ; should this be in the AST? How do I put this
// in the AST?
// - .include "<filename>" Directive::Include { Root } ; should this include an entire AST in
// the AST?
// - .word 8000 Directive::Word(Number)
// - .words dead beef Directive::Words(Vec<u16>|Vec<Number>)
// - .byte ff Directive::Byte(Number)
// - .bytes de, ad, be, ef Directive::Bytes(Vec<u8>)
// - .string "string" Directive::String(String)
// - .ascii "string" Directive::Ascii(Vec<u8>)
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Directive {
Org(Number),
Define(Vec<OwnedToken>),
Include(Root),
Byte(Number),
Bytes(Vec<Number>),
Word(Number),
Words(Vec<Number>),
String(String),
Strings(Vec<String>),
}
impl Directive {}
impl Parsable for Directive {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let d = stream.expect(Type::Directive)?;
// match on the directive
Ok(match d.lexeme() {
".org" => Self::Org(Number::parse(p, stream)?),
".define" => {
let mut tokens = vec![];
loop {
match stream.peek().variant() {
Type::Endl | Type::EndOfFile => break,
_ => tokens.push(stream.next().unwrap_or_default().into()),
}
}
Self::Define(tokens)
}
".include" => {
// Try to get path
Self::Include(Parser::default().parse_file(&PathBuf::parse(p, stream)?)?)
}
".byte" => Self::Byte(Number::parse(p, stream)?),
".bytes" => Self::Bytes(Vec::<Number>::parse(p, stream)?),
".word" => Self::Word(Number::parse(p, stream)?),
".words" => Self::Words(Vec::<Number>::parse(p, stream)?),
".string" => Self::String(String::parse(p, stream)?),
".strings" => Self::Strings(Vec::<String>::parse(p, stream)?),
e => Err(ParseError::UnrecognizedDirective(e.into()))?,
})
}
}
impl Display for Directive {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Directive::Org(num) => write!(f, ".org {num}"),
Directive::Define(rep) => {
write!(f, ".define")?;
for t in rep {
write!(f, " {t}")?;
}
Ok(())
}
Directive::Include(r) => Display::fmt(r, f),
Directive::Byte(num) => write!(f, ".org {num}"),
Directive::Bytes(v) => write!(f, ".bytes {v:?}"),
Directive::Word(num) => write!(f, ".org {num}"),
Directive::Words(v) => write!(f, ".bytes {v:?}"),
Directive::String(s) => write!(f, ".string \"{s}\""),
Directive::Strings(s) => write!(f, ".string \"{s:?}\""),
}
}
}

View File

@ -1,74 +0,0 @@
// © 2023 John Breauxs
use super::*;
use crate::lexer::error::LexError;
#[derive(Debug)]
pub enum ParseError {
/// Produced by [lexer](crate::lexer)
LexError(LexError),
/// Produced by [std::io]
IoError(std::io::Error),
/// Produced by [Number](Number)[::parse()](Parsable::parse())
/// when the parsed number contains digits too high for the specified radix
UnexpectedDigits(String, u32),
/// Produced by [Opcode](Opcode)[::parse()](Parsable::parse())
/// when the opcode passed lexing but did not match recognized opcodes.
///
/// This is always a lexer bug.
UnrecognizedOpcode(String),
/// Produced by [Directive](Directive)[::parse()](Parsable::parse())
/// when an unknown or unimplemented directive is used
UnrecognizedDirective(String),
/// Produced by [Register] when attempting to convert from a [str]
/// that isn't a register (pc, sp, sr, cg, or r{number})
NotARegister(String),
/// Produced by [Register] when the r{number} is outside the range 0-15
RegisterTooHigh(u16),
/// Produced by [SecondaryOperand] when the joke "secondary immediate" form
/// is out of range 0..=1
FatSecondaryImmediate(isize),
/// Produced by a [Number] too wide to fit in 16 bits
/// (outside the range `(-2^15) .. (2^16-1)` )
NumberTooWide(isize),
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
/// when the jump offset is outside the range (-0x3ff..0x3fc)
JumpedTooFar(isize),
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
JumpedOdd(isize),
}
impl From<LexError> for ParseError {
fn from(value: LexError) -> Self { Self::LexError(value) }
}
impl From<std::io::Error> for ParseError {
fn from(value: std::io::Error) -> Self { Self::IoError(value) }
}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LexError(error) => Display::fmt(error, f),
Self::IoError(error) => Display::fmt(error, f),
Self::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."),
Self::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"),
Self::UnrecognizedDirective(d) => write!(f, "{d} is not a directive."),
Self::NotARegister(reg) => write!(f, "{reg} is not a register"),
Self::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"),
Self::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"),
Self::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"),
Self::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"),
Self::JumpedOdd(num) => {
write!(f, "Jump targets only encode even numbers: {num} must not be odd.")
}
}
}
}
impl std::error::Error for ParseError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::LexError(e) => Some(e),
Self::IoError(e) => Some(e),
_ => None,
}
}
}

View File

@ -1,26 +0,0 @@
// © 2023 John Breaux
//! An [Identifier] stores the hash of an identifier
use super::*;
use std::rc::Rc;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Identifier {
str: Rc<str>,
}
impl Identifier {
fn str<T: AsRef<str>>(s: T) -> Self { Self { str: s.as_ref().to_owned().into() } }
}
impl Parsable for Identifier {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let token = stream.expect(Type::Identifier)?;
match token.variant() {
Type::Identifier => Ok(Self::str(token.lexeme())),
_ => unreachable!("Expected identifier, got {token:?}"),
}
}
}
impl Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.str, f) }
}

View File

@ -1,52 +0,0 @@
// © 2023 John Breaux
//! An [`Instruction`] contains the [`Opcode`] and [`Encoding`] information for a single msp430
//! instruction
//!
//!
//! Note: [`Opcode`] and [`Encoding`] are very tightly coupled, because they represent
//! interdependent parts of the same instruction. This is why [`Opcode`]::resolve() returns an
//! [`EncodingParser`] -- otherwise, there's an explosion of states that I can't really cope with on
//! my own. Really, there's about 9 valid classes of instruction, some of which are only used for
//! one or two of the MSP430's instructions.
use super::*;
pub mod encoding;
pub mod opcode;
/// Contains the [Opcode] and [Encoding] information for a single msp430 instruction
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Instruction(Opcode, Encoding);
impl Instruction {
pub fn opcode(&self) -> &Opcode { &self.0 }
pub fn encoding(&self) -> &Encoding { &self.1 }
/// Gets the Instruction as a [u16]
pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() }
/// Gets the [extension words]
pub fn ext_words(&self) -> [Option<u16>; 2] { self.1.extwords() }
}
impl Parsable for Instruction {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where
Self: Sized,
T: crate::TokenStream<'text>,
{
// parse an opcode
let opcode: Opcode = Opcode::parse(p, stream)?;
// resolve the opcode to a final opcode and an encoding
let (opcode, encoding) = opcode.resolve();
// parse the encoding
let encoding = encoding.parse(p, stream)?;
Ok(Self(opcode, encoding))
}
}
impl From<Instruction> for u16 {
fn from(value: Instruction) -> Self { value.word() }
}
impl Display for Instruction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) }
}

View File

@ -1,81 +0,0 @@
// © 2023 John Breaux
//! An [`Encoding`] represents the set of arguments for a given [msp430 opcode](Opcode)
use super::*;
pub mod number;
pub mod register;
pub mod width;
pub mod jump_target;
pub mod primary_operand;
pub mod secondary_operand;
mod builder;
pub mod encoding_parser;
use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder};
use encoding_parser::EncodingParser;
/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html)
///
/// # Examples
/// ```rust
/// use msp430_asm::{preamble::*, parser::preamble::*};
/// // Create a token sequence
/// let asm_file = r".b 8000(r15)";
/// // Create a single-operand encoding parser
/// let single: EncodingParser = Encoding::single().end();
/// // Parse an Encoding from it
/// let encoding: Encoding = single
/// .parse(&Default::default(), &mut Tokenizer::new(asm_file).ignore_spaces())
/// .unwrap();
/// // Print the Encoding
/// println!("{encoding}");
/// ```
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Encoding {
Single { width: Width, dst: PrimaryOperand },
Jump { target: JumpTarget },
Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand },
}
impl Encoding {
/// Returns a builder for [Encoding::Single]
pub fn single() -> SingleBuilder { Default::default() }
/// Returns a builder for [Encoding::Jump]
pub fn jump() -> JumpBuilder { Default::default() }
/// Returns a builder for [Encoding::Double]
pub fn double() -> DoubleBuilder { Default::default() }
/// Returns a builder for [Encoding::Double]
///
/// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and
/// dst are the same
pub fn reflexive() -> ReflexiveBuilder { Default::default() }
///
pub fn word(&self) -> u16 {
match self {
Encoding::Single { width, dst } => u16::from(*width) | dst.mode() | dst.register() as u16,
Encoding::Jump { target } => target.word().unwrap_or_default(),
Encoding::Double { width, src, dst } => {
u16::from(*width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8)
}
}
}
/// Returns extwords for instruction
pub fn extwords(&self) -> [Option<u16>; 2] {
match self {
Encoding::Double { src, dst, .. } => [src.ext_word(), dst.ext_word()],
Encoding::Single { dst, .. } => [dst.ext_word(), None],
Encoding::Jump { .. } => [None, None],
}
}
}
impl Display for Encoding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Encoding::Single { width, dst } => write!(f, "{width} {dst}"),
Encoding::Jump { target } => write!(f, " {target}"),
Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"),
}
}
}

View File

@ -1,76 +0,0 @@
// © 2023 John Breaux
//! Builder API for [`EncodingParser`]
use super::*;
#[derive(Debug, Default)]
pub struct SingleBuilder {
width: Option<Width>,
dst: Option<PrimaryOperand>,
}
impl SingleBuilder {
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
/// Sets the [PrimaryOperand] field
pub fn operand(mut self, dst: PrimaryOperand) -> Self {
self.dst = Some(dst);
self
}
/// Build
pub fn end(self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } }
}
#[derive(Debug, Default)]
pub struct JumpBuilder {
target: Option<JumpTarget>,
}
impl JumpBuilder {
pub fn target(mut self, target: JumpTarget) -> Self {
self.target = Some(target);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Jump { target: self.target } }
}
#[derive(Debug, Default)]
pub struct DoubleBuilder {
width: Option<Width>,
src: Option<PrimaryOperand>,
dst: Option<SecondaryOperand>,
}
impl DoubleBuilder {
/// Sets the [Width] field
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
/// Sets the [PrimaryOperand] field
pub fn src(mut self, src: PrimaryOperand) -> Self {
self.src = Some(src);
self
}
/// Sets the [PrimaryOperand] field
pub fn dst(mut self, dst: SecondaryOperand) -> Self {
self.dst = Some(dst);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } }
}
#[derive(Debug, Default)]
pub struct ReflexiveBuilder {
width: Option<Width>,
reg: Option<SecondaryOperand>,
}
impl ReflexiveBuilder {
/// Sets the [Width] field
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
pub fn reg(mut self, reg: SecondaryOperand) -> Self {
self.reg = Some(reg);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } }
}

View File

@ -1,37 +0,0 @@
// © 2023 John Breaux
//! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`]
use super::*;
#[derive(Clone, Debug)]
/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream]
pub enum EncodingParser {
Single { width: Option<Width>, dst: Option<PrimaryOperand> },
Jump { target: Option<JumpTarget> },
Double { width: Option<Width>, src: Option<PrimaryOperand>, dst: Option<SecondaryOperand> },
Reflexive { width: Option<Width>, reg: Option<SecondaryOperand> },
}
impl EncodingParser {
/// Constructs an [Encoding] from this [EncodingParser], filling holes
/// with the tokenstream
pub fn parse<'text, T>(self, p: &Parser, stream: &mut T) -> Result<Encoding, ParseError>
where T: crate::TokenStream<'text> {
Ok(match self {
Self::Single { width, dst } => Encoding::Single {
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
dst: if let Some(dst) = dst { dst } else { PrimaryOperand::parse(p, stream)? },
},
Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) },
Self::Double { width, src, dst } => Encoding::Double {
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
src: if let Some(src) = src { src } else { PrimaryOperand::parse(p, stream)? },
dst: if let Some(dst) = dst { dst } else { SecondaryOperand::parse(p, stream)? },
},
Self::Reflexive { width, reg } => {
let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default());
let reg = if let Some(reg) = reg { reg } else { SecondaryOperand::parse(p, stream)? };
Encoding::Double { width, src: reg.clone().into(), dst: reg }
}
})
}
}

View File

@ -1,58 +0,0 @@
// © 2023 John Breaux
//! A [`JumpTarget`] contains the [pc-relative offset](Number) or [label](Identifier)
//! for a [Jump](Encoding::Jump) [instruction]
use super::*;
/// Contains the [pc-relative offset](Number) or [label](Identifier)
/// for a [Jump](Encoding::Jump) [Instruction]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum JumpTarget {
Number(Number),
Identifier(Identifier),
}
impl JumpTarget {
pub fn word(&self) -> Option<u16> {
match self {
JumpTarget::Number(n) => Some(u16::from(*n) & 0x3ff),
JumpTarget::Identifier(_) => None,
}
}
pub fn squish(value: isize) -> Result<u16, ParseError> {
match value {
i if i % 2 != 0 => Err(ParseError::JumpedOdd(i))?,
i if (-1024..=1022).contains(&(i - 2)) => Ok(((value >> 1) - 1) as u16 & 0x3ff),
i => Err(ParseError::JumpedTooFar(i))?,
}
}
pub fn unsquish(value: u16) -> isize { (value as isize + 1) << 1 }
}
impl Parsable for JumpTarget {
// - Identifier
// - Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
// Try to parse a number
if let Some(num) = Number::try_parse(p, stream)? {
Self::try_from(num)
} else {
// if that fails, try to parse an identifier instead
Ok(Self::Identifier(Identifier::parse(p, stream)?))
}
}
}
impl TryFrom<Number> for JumpTarget {
type Error = ParseError;
fn try_from(value: Number) -> Result<Self, Self::Error> { Ok(Self::Number(Self::squish(value.into())?.into())) }
}
impl Display for JumpTarget {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Number(num) => write!(f, "{:x}", Self::unsquish(u16::from(*num))),
Self::Identifier(id) => write!(f, "{id}"),
}
}
}

View File

@ -1,81 +0,0 @@
// © 2023 John Breaux
//! A [`Number`] represents a 16-bit signed or unsigned word
use super::*;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Number(isize, u32); // (value, radix)
impl Parsable for Number {
// A number is:
// [Minus|Plus]? RadixMarker[Hex|Dec|Oct|Bin]? Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
use Type as Ty;
// The number is negative when it begins with a Minus, but Plus is also acceptable.
let negative = stream.expect_any_of([Ty::Minus, Ty::Plus]).map_or(false, |t| t.is_variant(Ty::Minus));
let radix = match stream
.expect_any_of([Ty::RadixMarkerHex, Ty::RadixMarkerDec, Ty::RadixMarkerOct, Ty::RadixMarkerBin])
.ok()
.map(|t| t.variant())
{
Some(Ty::RadixMarkerHex) => 16,
Some(Ty::RadixMarkerDec) => 10,
Some(Ty::RadixMarkerOct) => 8,
Some(Ty::RadixMarkerBin) => 2,
_ => p.radix,
};
let number = stream.expect(Ty::Number)?;
// TODO: Reintroduce error context
let number = isize::from_str_radix(number.lexeme(), radix)
.map_err(|_| ParseError::UnexpectedDigits(number.lexeme().into(), radix))?
* if negative { -1 } else { 1 };
// Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit)
Ok(Self(
if (-0x8000..0x10000).contains(&number) { number } else { Err(ParseError::NumberTooWide(number))? },
radix,
))
}
}
impl From<isize> for Number {
fn from(value: isize) -> Self { Self(value, 16) }
}
impl From<Number> for isize {
fn from(value: Number) -> Self { value.0 as Self }
}
impl From<u16> for Number {
fn from(value: u16) -> Self { Self(value as isize, 16) }
}
impl From<Number> for u16 {
fn from(value: Number) -> Self { value.0 as Self }
}
impl std::ops::Sub<isize> for Number {
type Output = Self;
fn sub(mut self, rhs: isize) -> Self::Output {
self.0 -= rhs;
self
}
}
impl std::ops::Shr<usize> for Number {
type Output = Self;
fn shr(mut self, rhs: usize) -> Self::Output {
self.0 >>= rhs;
self
}
}
impl std::fmt::Display for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.1 {
2 => std::fmt::Binary::fmt(&self.0, f),
8 => std::fmt::Octal::fmt(&self.0, f),
16 => std::fmt::LowerHex::fmt(&self.0, f),
_ => std::fmt::Display::fmt(&self.0, f),
}
}
}

View File

@ -1,146 +0,0 @@
// © 2023 John Breaux
//! A [`PrimaryOperand`] contains the first [`Register`], addressing mode, and Extension
//! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [`Instruction`]
use super::*;
/// Contains the first [Register], addressing mode, and Extension Word for a
/// [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum PrimaryOperand {
Direct(Register),
Indirect(Register),
PostInc(Register),
Indexed(Register, Number),
Relative(Identifier),
Absolute(Number),
Immediate(Number),
Four,
Eight,
Zero,
One,
Two,
MinusOne,
}
impl PrimaryOperand {
/// Returns the mode bits
pub fn mode(&self) -> u16 {
use PrimaryOperand::*;
match self {
Direct(_) | Zero => 0,
Indexed(_, _) | Relative(_) | Absolute(_) | One => 1 << 4,
Indirect(_) | Two | Four => 2 << 4,
PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4,
}
}
/// Gets the register
pub fn register(&self) -> Register {
use PrimaryOperand::*;
match self {
Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r,
Immediate(_) | Relative(_) => Register::pc,
Absolute(_) | Four | Eight => Register::sr,
Zero | One | Two | MinusOne => Register::cg,
}
}
/// Gets the extension word, if present
pub fn ext_word(&self) -> Option<u16> {
use PrimaryOperand::*;
match self {
Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()),
_ => None,
}
}
}
impl Parsable for PrimaryOperand {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
// Try parsing as Register (Direct)
if let Some(r) = Register::try_parse(p, stream)? {
return Ok(Self::Direct(r));
}
// Try parsing as Number (Indexed)
if let Some(idx) = Number::try_parse(p, stream)? {
stream.expect(Type::LParen)?;
let reg = Register::parse(p, stream)?;
stream.expect(Type::RParen)?;
return Ok(Self::Indexed(reg, idx));
}
// Try parsing as Identifier (Relative, label mode)
if let Some(id) = Identifier::try_parse(p, stream)? {
return Ok(Self::Relative(id));
}
// Or directly match any of the valid prefix markers
// Register, Number, and Identifier are included here to make error messages clearer.
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
// (a failure condition)
let token = stream.expect_any_of([
Type::Indirect,
Type::Absolute,
Type::Immediate,
Type::Register,
Type::Number,
Type::Identifier,
])?;
Ok(match token.variant() {
Type::Indirect => {
let reg = Register::parse(p, stream)?;
match stream.expect(Type::Plus) {
Ok(_) => Self::PostInc(reg),
Err(_) => Self::Indirect(reg),
}
}
Type::Absolute => Self::Absolute(Number::parse(p, stream)?),
Type::Immediate => {
let number = Number::parse(p, stream)?;
match number.into() {
// There are two representations for the all-ones constant, since Number preserves
// signedness.
-1_isize | 0xffff => Self::MinusOne,
0 => Self::Zero,
1 => Self::One,
2 => Self::Two,
4 => Self::Four,
8 => Self::Eight,
_ => Self::Immediate(number),
}
}
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
})
}
}
impl From<SecondaryOperand> for PrimaryOperand {
fn from(value: SecondaryOperand) -> Self {
match value {
SecondaryOperand::Direct(r) => Self::Direct(r),
SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n),
SecondaryOperand::Absolute(n) => Self::Absolute(n),
SecondaryOperand::Relative(id) => Self::Relative(id),
SecondaryOperand::Zero => Self::Zero,
SecondaryOperand::One => Self::One,
}
}
}
impl Display for PrimaryOperand {
// Turn the operand back into a form which parses into the same type
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Direct(r) => Display::fmt(r, f),
Self::Indirect(r) => write!(f, "@{r}"),
Self::PostInc(r) => write!(f, "@{r}+"),
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
Self::Relative(id) => Display::fmt(id, f),
Self::Absolute(n) => write!(f, "&{n}"),
Self::Immediate(n) => write!(f, "#{n}"),
Self::Four => Display::fmt("#4", f),
Self::Eight => Display::fmt("#8", f),
Self::Zero => Display::fmt("#0", f),
Self::One => Display::fmt("#1", f),
Self::Two => Display::fmt("#2", f),
Self::MinusOne => Display::fmt("#-1", f),
}
}
}

View File

@ -1,112 +0,0 @@
// © 2023 John Breaux
//! A [`Register`] represents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
use super::*;
use std::str::FromStr;
/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Register {
/// Program Counter
pc,
/// Stack Pointer
sp,
/// Status Register
sr,
/// Constant Generator
cg,
r4,
r5,
r6,
r7,
r8,
r9,
r10,
r11,
r12,
r13,
r14,
r15,
}
impl Parsable for Register {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
stream.expect(Type::Register)?.lexeme().parse()
}
}
impl From<Register> for u16 {
fn from(value: Register) -> Self { value as u16 }
}
impl TryFrom<u16> for Register {
type Error = ParseError;
fn try_from(value: u16) -> Result<Self, Self::Error> {
use Register::*;
Ok(match value {
0 => pc,
1 => sp,
2 => sr,
3 => cg,
4 => r4,
5 => r5,
6 => r6,
7 => r7,
8 => r8,
9 => r9,
10 => r10,
11 => r11,
12 => r12,
13 => r13,
14 => r14,
15 => r15,
_ => return Err(ParseError::RegisterTooHigh(value)),
})
}
}
impl FromStr for Register {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use Register::*;
match s {
"pc" => Ok(pc),
"sp" => Ok(sp),
"sr" => Ok(sr),
"cg" => Ok(cg),
_ => {
str::parse::<u16>(&s[1..]).map_err(|_| -> Self::Err { ParseError::NotARegister(s.into()) })?.try_into()
}
}
}
}
impl From<Register> for &str {
fn from(value: Register) -> Self {
use Register::*;
match value {
pc => "pc",
sp => "sp",
sr => "sr",
cg => "cg",
r4 => "r4",
r5 => "r5",
r6 => "r6",
r7 => "r7",
r8 => "r8",
r9 => "r9",
r10 => "r10",
r11 => "r11",
r12 => "r12",
r13 => "r13",
r14 => "r14",
r15 => "r15",
}
}
}
impl std::fmt::Display for Register {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) }
}

View File

@ -1,105 +0,0 @@
// © 2023 John Breaux
//! A [`SecondaryOperand`] contains the second [`Register`], addressing mode, and Extension
//! Word for a [two-operand](Encoding::Double) [instruction]
use super::*;
/// The destination of a [Double](Encoding::Double)
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SecondaryOperand {
Direct(Register),
Indexed(Register, Number),
Relative(Identifier),
Absolute(Number),
// Joke encodings?
Zero,
One,
}
use SecondaryOperand as So;
impl SecondaryOperand {
pub fn mode(&self) -> u16 {
match self {
So::Direct(_) | So::Zero => 0,
So::Indexed(_, _) | So::Relative(_) | So::Absolute(_) | So::One => 1 << 7,
}
}
pub fn register(&self) -> Register {
use SecondaryOperand::*;
match self {
Direct(r) | Indexed(r, _) => *r,
Relative(_) => Register::pc,
Absolute(_) => Register::sr,
Zero | One => Register::cg,
}
}
/// This is the only way to have an extension word
pub fn ext_word(&self) -> Option<u16> {
use SecondaryOperand::*;
match self {
Indexed(_, w) | Absolute(w) => Some((*w).into()),
_ => None,
}
}
}
impl Parsable for SecondaryOperand {
// Separator
// - Register => Direct
// - Number => Indexed
// - OpenIdx
// - Register
// - CloseIdx
// - Absolute
// - Number
// - Immediate
// - Number == 0, 1
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
use SecondaryOperand::*;
stream.allow(Type::Separator);
// Try parsing as Register (Direct)
if let Some(r) = Register::try_parse(p, stream)? {
return Ok(Self::Direct(r));
}
// Try parsing as Number (Indexed)
if let Some(idx) = Number::try_parse(p, stream)? {
stream.expect(Type::LParen)?;
let reg = Register::parse(p, stream)?;
stream.expect(Type::RParen)?;
return Ok(Self::Indexed(reg, idx));
}
// Try parsing as Identifier (Relative, label mode)
if let Some(id) = Identifier::try_parse(p, stream)? {
return Ok(Self::Relative(id));
}
// Register, Number, and Identifier are included here to make error messages clearer.
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
// (a failure condition) but should not match a token
let token =
stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number, Type::Identifier])?;
Ok(match token.variant() {
Type::Absolute => Absolute(Number::parse(p, stream)?),
// TODO: Reintroduce error context
Type::Immediate => match Number::parse(p, stream)?.into() {
0 => Zero,
1 => One,
n => Err(ParseError::FatSecondaryImmediate(n))?,
},
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
})
}
}
impl Display for SecondaryOperand {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Direct(r) => Display::fmt(r, f),
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
Self::Relative(id) => Display::fmt(id, f),
Self::Absolute(n) => write!(f, "&{n}"),
Self::Zero => Display::fmt("#0", f),
Self::One => Display::fmt("#1", f),
}
}
}

View File

@ -1,32 +0,0 @@
// © 2023 John Breaux
//! A [`Width`] represents whether an instruction operates on whole words or bytes
use super::*;
/// Represents an instruction's operand width.
///
/// Evaluates to false when instruction takes word-sized operands, or true when
/// instruction takes byte-sized operands
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Width(bool);
impl Parsable for Width {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else {
return Ok(Self(false));
};
Ok(Self(token.is_variant(Type::ByteWidth)))
}
}
impl From<Width> for u16 {
fn from(value: Width) -> Self { (value.0 as Self) << 6 }
}
impl From<Width> for bool {
fn from(value: Width) -> Self { value.0 }
}
impl From<bool> for Width {
fn from(value: bool) -> Self { Width(value) }
}
impl std::fmt::Display for Width {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) }
}

View File

@ -1,261 +0,0 @@
// © 2023 John Breaux
//! An [`Opcode`] encodes an msp430 operation
use super::*;
use std::str::FromStr;
/// Opcode from the [MSPGCC Manual][1]
///
/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will
/// extract from a [TokenStream] only the required arguments for that call.
///
/// [1]: https://mspgcc.sourceforge.net/manual/x223.html
#[allow(clippy::identity_op)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Opcode {
// "Emulated" opcodes
Nop,
Pop,
Br,
Ret,
Clrc,
Setc,
Clrz,
Setz,
Clrn,
Setn,
Dint,
Eint,
Rla,
Rlc,
Inv,
Clr,
Tst,
Dec,
Decd,
Inc,
Incd,
Adc,
Dadc,
Sbc,
// Single
Rrc = 0x1000 | 0 << 7,
Swpb = 0x1000 | 1 << 7,
Rra = 0x1000 | 2 << 7,
Sxt = 0x1000 | 3 << 7,
Push = 0x1000 | 4 << 7,
Call = 0x1000 | 5 << 7,
Reti = 0x1000 | 6 << 7,
// Jump
Jnz = 0x2000 | 0 << 10,
Jz = 0x2000 | 1 << 10,
Jnc = 0x2000 | 2 << 10,
Jc = 0x2000 | 3 << 10,
Jn = 0x2000 | 4 << 10,
Jge = 0x2000 | 5 << 10,
Jl = 0x2000 | 6 << 10,
Jmp = 0x2000 | 7 << 10,
// Double
Mov = 0x4000,
Add = 0x5000,
Addc = 0x6000,
Subc = 0x7000,
Sub = 0x8000,
Cmp = 0x9000,
Dadd = 0xa000,
Bit = 0xb000,
Bic = 0xc000,
Bis = 0xd000,
Xor = 0xe000,
And = 0xf000,
}
impl Opcode {
/// Resolve an Opcode into an [Opcode] and an [EncodingParser]
pub fn resolve(self) -> (Opcode, EncodingParser) {
use super::Encoding as Enc;
use Register as Reg;
use {PrimaryOperand as Src, SecondaryOperand as Dst};
match self {
Self::Rrc | Self::Rra | Self::Push => (self, Enc::single().end()),
// these instructions do not take a width specifier (though they may still behave properly)
Self::Swpb | Self::Sxt | Self::Call => (self, Enc::single().width(false).end()),
// `reti` does not take any operands.
Self::Reti => (self, Enc::single().operand(Src::Direct(Reg::pc)).end()),
Self::Jnz | Self::Jz | Self::Jnc | Self::Jc | Self::Jn | Self::Jge | Self::Jl | Self::Jmp => {
(self, Enc::jump().end())
}
Self::Mov
| Self::Add
| Self::Addc
| Self::Subc
| Self::Sub
| Self::Cmp
| Self::Dadd
| Self::Bit
| Self::Bic
| Self::Bis
| Self::Xor
| Self::And => (self, Enc::double().end()),
Self::Nop => (Self::Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()),
Self::Pop => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).end()),
Self::Br => (Self::Mov, Enc::double().dst(Dst::Direct(Reg::pc)).end()),
Self::Ret => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).dst(Dst::Direct(Reg::pc)).end()),
Self::Clrc => (Self::Bic, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
Self::Setc => (Self::Bis, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
Self::Clrz => (Self::Bic, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
Self::Setz => (Self::Bis, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
Self::Clrn => (Self::Bic, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
Self::Setn => (Self::Bis, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
Self::Dint => (Self::Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
Self::Eint => (Self::Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
Self::Rla => (Self::Add, Enc::reflexive().end()),
Self::Rlc => (Self::Addc, Enc::reflexive().end()),
Self::Inv => (Self::Xor, Enc::double().src(Src::MinusOne).end()),
Self::Clr => (Self::Mov, Enc::double().src(Src::Zero).end()),
Self::Tst => (Self::Cmp, Enc::double().src(Src::Zero).end()),
Self::Dec => (Self::Sub, Enc::double().src(Src::One).end()),
Self::Decd => (Self::Sub, Enc::double().src(Src::Two).end()),
Self::Inc => (Self::Add, Enc::double().src(Src::One).end()),
Self::Incd => (Self::Add, Enc::double().src(Src::Two).end()),
Self::Adc => (Self::Addc, Enc::double().src(Src::Zero).end()),
Self::Dadc => (Self::Dadd, Enc::double().src(Src::Zero).end()),
Self::Sbc => (Self::Subc, Enc::double().src(Src::Zero).end()),
}
}
}
impl Parsable for Opcode {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
// TODO: Reintroduce error context
stream.expect(Type::Insn)?.parse()
}
}
impl FromStr for Opcode {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
//TODO: Reduce allocations here?
let s = s.to_ascii_lowercase();
Ok(match s.as_str() {
"rrc" => Self::Rrc,
"swpb" => Self::Swpb,
"rra" => Self::Rra,
"sxt" => Self::Sxt,
"push" => Self::Push,
"call" => Self::Call,
"reti" => Self::Reti,
"jne" | "jnz" => Self::Jnz,
"jeq" | "jz" => Self::Jz,
"jnc" | "jlo" => Self::Jnc,
"jc" | "jhs" => Self::Jc,
"jn" => Self::Jn,
"jge" => Self::Jge,
"jl" => Self::Jl,
"jmp" => Self::Jmp,
"mov" => Self::Mov,
"add" => Self::Add,
"addc" => Self::Addc,
"subc" => Self::Subc,
"sub" => Self::Sub,
"cmp" => Self::Cmp,
"dadd" => Self::Dadd,
"bit" => Self::Bit,
"bic" => Self::Bic,
"bis" => Self::Bis,
"xor" => Self::Xor,
"and" => Self::And,
"nop" => Self::Nop,
"pop" => Self::Pop,
"br" => Self::Br,
"ret" => Self::Ret,
"clrc" => Self::Clrc,
"setc" => Self::Setc,
"clrz" => Self::Clrz,
"setz" => Self::Setz,
"clrn" => Self::Clrn,
"setn" => Self::Setn,
"dint" => Self::Dint,
"eint" => Self::Eint,
"rla" => Self::Rla,
"rlc" => Self::Rlc,
"inv" => Self::Inv,
"clr" => Self::Clr,
"tst" => Self::Tst,
"dec" => Self::Dec,
"decd" => Self::Decd,
"inc" => Self::Inc,
"incd" => Self::Incd,
"adc" => Self::Adc,
"dadc" => Self::Dadc,
"sbc" => Self::Sbc,
_ => Err(ParseError::UnrecognizedOpcode(s))?,
})
}
}
impl Display for Opcode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
Self::Nop => "nop",
Self::Pop => "pop",
Self::Br => "br",
Self::Ret => "ret",
Self::Clrc => "clrc",
Self::Setc => "setc",
Self::Clrz => "clrz",
Self::Setz => "setz",
Self::Clrn => "clrn",
Self::Setn => "setn",
Self::Dint => "dint",
Self::Eint => "eint",
Self::Rla => "rla",
Self::Rlc => "rlc",
Self::Inv => "inv",
Self::Clr => "clr",
Self::Tst => "tst",
Self::Dec => "dec",
Self::Decd => "decd",
Self::Inc => "inc",
Self::Incd => "incd",
Self::Adc => "adc",
Self::Dadc => "dadc",
Self::Sbc => "sbc",
Self::Rrc => "rrc",
Self::Swpb => "swpb",
Self::Rra => "rra",
Self::Sxt => "sxt",
Self::Push => "push",
Self::Call => "call",
Self::Reti => "reti",
Self::Jnz => "jnz",
Self::Jz => "jz",
Self::Jnc => "jnc",
Self::Jc => "jc",
Self::Jn => "jn",
Self::Jge => "jge",
Self::Jl => "jl",
Self::Jmp => "jmp",
Self::Mov => "mov",
Self::Add => "add",
Self::Addc => "addc",
Self::Subc => "subc",
Self::Sub => "sub",
Self::Cmp => "cmp",
Self::Dadd => "dadd",
Self::Bit => "bit",
Self::Bic => "bic",
Self::Bis => "bis",
Self::Xor => "xor",
Self::And => "and",
}
)
}
}

View File

@ -1,21 +0,0 @@
// © 2023 John Breaux
//! The definition of a label
use super::*;
/// The definition of a label
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Label(pub Identifier);
impl Parsable for Label {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(Self(Identifier::parse(p, stream).and_then(|t| {
stream.require(Type::Label)?;
Ok(t)
})?))
}
}
impl Display for Label {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) }
}

View File

@ -1,72 +0,0 @@
// © 2023 John Breaux
//! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same
//! document line will be treated as if they took up multiple [`Line`s](Line).
//!
//! A line contains one of:
//! - [`Label`]
//! - [`Instruction`]
//! - [`Directive`]
//! - [`Comment`]
//! - [Nothing](Line::Empty)
use super::*;
/// A line contains any one of:
/// - [`Label`] (definition)
/// - [`Instruction`]
/// - [`Directive`]
/// - [`Comment`]
/// - Nothing at all
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Line {
Empty,
Insn(Instruction),
Comment(Comment),
Directive(Directive),
Label(Label),
EndOfFile, // Expected end of file
}
impl Parsable for Line {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(
match stream
.peek_expect_any_of([
Type::Endl,
Type::Insn,
Type::Comment,
Type::Directive,
Type::Identifier,
Type::EndOfFile,
])?
.variant()
{
Type::Endl => {
stream.next();
Self::Empty
}
Type::Insn => Self::Insn(Instruction::parse(p, stream)?),
Type::Comment => Self::Comment(Comment::parse(p, stream)?),
Type::Directive => Self::Directive(Directive::parse(p, stream)?),
Type::Identifier => Self::Label(Label::parse(p, stream)?),
Type::EndOfFile => {
stream.next();
Self::EndOfFile
}
_ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"),
},
)
}
}
impl Display for Line {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Empty => writeln!(f, "\n"),
Self::Label(arg0) => Display::fmt(arg0, f),
Self::Insn(arg0) => Display::fmt(arg0, f),
Self::Directive(arg0) => Display::fmt(arg0, f),
Self::Comment(arg0) => Display::fmt(arg0, f),
Self::EndOfFile => write!(f, "; End of file."),
}
}
}

View File

@ -1,85 +0,0 @@
// © 2023 John Breaux
//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html)
use super::*;
/// Parses tokens from [stream](TokenStream) into Self node
pub trait Parsable {
/// Parses tokens from [TokenStream](TokenStream) into Self nodes
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where
Self: Sized,
T: TokenStream<'text>;
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
///
/// Masks failed expectations.
fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, ParseError>
where
Self: Sized,
T: TokenStream<'text>,
{
match Self::parse(p, stream) {
Ok(some) => Ok(Some(some)),
Err(ParseError::LexError(_)) => Ok(None),
Err(e) => Err(e),
}
}
fn parse_and<'text, T, R>(
p: &Parser,
stream: &mut T,
f: fn(p: &Parser, &mut T) -> R,
) -> Result<(Self, R), ParseError>
where
Self: Sized,
T: TokenStream<'text>,
{
Ok((Self::parse(p, stream)?, f(p, stream)))
}
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
///
/// Returns [`Self::default()`](Default::default()) on error
fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self
where
Self: Sized + Default,
T: TokenStream<'text>,
{
Self::parse(p, stream).unwrap_or_default()
}
}
macro_rules! parsable_str_types {
($($t:ty),*$(,)?) => {$(
impl Parsable for $t {
fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into())
}
}
)*};
}
use std::{path::PathBuf, rc::Rc};
parsable_str_types![String, Rc<str>, Box<str>, PathBuf];
/// Vectors of arbitrary parsables are cool
impl<P: Parsable> Parsable for Vec<P> {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
// [dead beef]
// [A, B,]
// [c d e f]
// [ something
// else ]
stream.require(Type::LBracket)?;
stream.allow(Type::Endl);
let mut out = vec![];
while let Some(t) = P::try_parse(p, stream)? {
out.push(t);
stream.allow(Type::Separator);
stream.allow(Type::Endl);
}
stream.require(Type::RBracket)?;
Ok(out)
}
}

View File

@ -1,51 +0,0 @@
use std::path::{Path, PathBuf};
// © 2023 John Breaux
use super::*;
/// Contains the entire AST
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Root(Option<PathBuf>, Vec<(usize, Line)>);
// pub struct Root { pub path: PathBuf, pub lines: Vec<Line> }
impl Root {
pub fn file(&self) -> Option<&Path> { self.0.as_deref() }
pub(crate) fn set_file(mut self, path: PathBuf) -> Self {
self.0 = Some(path);
self
}
pub fn lines(&self) -> &[(usize, Line)] { &self.1 }
}
impl Parsable for Root {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let mut lines = vec![];
loop {
let number = stream.context().line();
match Line::parse(p, stream)? {
Line::EndOfFile => break,
line => lines.push((number, line)),
}
}
Ok(Root(None, lines))
}
}
impl Display for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (num, line) in &self.1 {
f.pad(&format!("{num:3}: {line} "))?;
}
Ok(())
}
}
impl Debug for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for line in self.0.iter() {
Debug::fmt(line, f)?;
}
Ok(())
}
}

87
src/preprocessor.rs Normal file
View File

@ -0,0 +1,87 @@
// © 2023-2024 John Breaux
use crate::{
lexer::{
token::{Token, TokenKind as Kind},
Lexer,
},
util::Span,
};
use std::collections::{HashMap, VecDeque};
#[derive(Clone, Debug)]
pub struct Preprocessor<'t> {
lexer: Lexer<'t>,
buf: VecDeque<Token<'t>>,
defn: HashMap<&'t str, Vec<Token<'t>>>,
/// Location for injected tokens
pos: Span<usize>,
}
impl<'t> Preprocessor<'t> {
pub fn new(text: &'t str) -> Self {
Self {
lexer: Lexer::new(text),
buf: Default::default(),
defn: Default::default(),
pos: Default::default(),
}
}
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
Self { lexer, buf: Default::default(), defn: Default::default(), pos: Default::default() }
}
pub fn scan(&mut self) -> Option<Token<'t>> {
self.buf.pop_front().or_else(|| self.next()).inspect(|t| self.pos = t.pos)
}
pub fn start(&self) -> usize {
self.lexer.location()
}
/// Grabs a token from the lexer, and attempts to match its lexeme
fn next(&mut self) -> Option<Token<'t>> {
let token = self.lexer.scan()?;
if let Some(tokens) = self.defn.get(token.lexeme) {
self.buf.extend(tokens.iter().copied().map(|mut t| {
t.pos = self.pos;
t
}));
return self.scan();
} else {
match token.kind {
Kind::Directive => self.directive(token),
Kind::Newline => return self.scan(),
_ => {}
}
Some(token)
}
}
/// Passes a token through while parsing a directive
fn tee(&mut self) -> Option<Token<'t>> {
let token = self.lexer.scan()?;
self.buf.push_back(token);
// self.buf.push_back(token);
Some(token)
}
/// Parses and executes a directive
pub fn directive(&mut self, token: Token<'t>) {
if ".define" == token.lexeme {
self.define()
}
}
pub fn define(&mut self) {
let Some(key) = self.tee() else {
return;
};
let mut value = vec![];
while let Some(token) = self.tee() {
match token.kind {
Kind::Comment => {
self.buf.push_back(token);
break;
}
Kind::Newline => break,
_ => value.push(token),
}
}
self.defn.insert(key.lexeme, value);
}
}