v0.3.0: Total overhaul
- Everything has been rewritten - Modularity is improved somewhat - No dependency injection in preprocessor/parser, though - There are now early and late constant evaluation engines - This engine allows for by-value access to already-assembled code - Performs basic math operations, remainder, bitwise logic, bit shifts, negation, and bit inversion - Also allows for indexing into already-generated code using pointer-arithmetic syntax: `*(&main + 10)`. This is subject to change? It's clunky, and only allows word-aligned access. However, this rewrite is taking far too long, so I'll call the bikeshedding here. - Pretty sure this constant evaluation is computationally equivalent to Deadfish?
This commit is contained in:
parent
e4a1b889c2
commit
fc8f8b9622
@ -1,12 +1,12 @@
|
||||
unstable_features = true
|
||||
max_width = 120
|
||||
max_width = 100
|
||||
wrap_comments = true
|
||||
comment_width = 100
|
||||
|
||||
# Allow structs to fill an entire line
|
||||
use_small_heuristics = "Max"
|
||||
# Allow small functions on single line
|
||||
fn_single_line = true
|
||||
# fn_single_line = true
|
||||
|
||||
# Alignment
|
||||
enum_discrim_align_threshold = 12
|
||||
|
33
Cargo.toml
33
Cargo.toml
@ -1,23 +1,24 @@
|
||||
[package]
|
||||
name = "msp430-asm"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
rust-version = "1.70"
|
||||
[workspace]
|
||||
members = ["msp430-asm"]
|
||||
# default-members = ["msp430-asm"]
|
||||
|
||||
[workspace.package]
|
||||
authors = ["John Breaux <j@soft.fish>"]
|
||||
version = "0.3.0"
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
|
||||
[features]
|
||||
default = []
|
||||
[package]
|
||||
name = "libmsp430"
|
||||
authors.workspace = true
|
||||
version.workspace = true
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
publish.workspace = true
|
||||
|
||||
[[example]]
|
||||
name = "msp430-asm"
|
||||
path = "examples/msp430-asm/main.rs"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
regex = "1.9.3"
|
||||
# TODO: Remove dependency on regex
|
||||
|
||||
[dev-dependencies]
|
||||
anes = { version = "0.1.6" }
|
||||
argp = { version = "0.3.0" }
|
||||
# Provides very quick boolean tests for XID_START and XID_CONTINUE
|
||||
unicode-ident = "1.0.12"
|
||||
|
9
LICENSE.md
Normal file
9
LICENSE.md
Normal file
@ -0,0 +1,9 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright © 2023-2024 John Breaux
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@ -1,222 +1,224 @@
|
||||
//! TODO: rewrite for v0.3.0
|
||||
use super::*;
|
||||
|
||||
/// Creates a [Parsable] implementation for an enum whose variants
|
||||
/// are named after other [Parsable] items
|
||||
macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) {
|
||||
$( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* }
|
||||
impl ::msp430_asm::parser::parsable::Parsable for $id {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
$(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )*
|
||||
{ Err(ParseError::UnrecognizedDirective("".into())) }
|
||||
}
|
||||
}
|
||||
impl TryFrom<&str> for $id {
|
||||
type Error = ParseError;
|
||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||
Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
make_parsable! {
|
||||
#[derive(Debug)]
|
||||
pub enum SyntaxFragment {
|
||||
Opcode,
|
||||
PrimaryOperand,
|
||||
Number,
|
||||
}
|
||||
}
|
||||
|
||||
impl SyntaxFragment {
|
||||
pub fn info(&self) {
|
||||
match self {
|
||||
SyntaxFragment::Opcode(o) => Self::opcode_info(o),
|
||||
SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o),
|
||||
SyntaxFragment::Number(n) => println!("The number {n}"),
|
||||
}
|
||||
}
|
||||
fn opcode_info(o: &Opcode) {
|
||||
let (desc, as_rust) = usage(o);
|
||||
println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o));
|
||||
footer!("https://mspgcc.sourceforge.net/manual/x223.html");
|
||||
}
|
||||
// TODO: re-enable full instruction decoding
|
||||
// fn encoding_info(e: &Encoding) {
|
||||
// match e {
|
||||
// Encoding::Single { dst, .. } => Self::operand_info(dst),
|
||||
// Encoding::Jump { target } => println!("Jumps to (pc + {target})"),
|
||||
// Encoding::Double { src, dst, .. } => {
|
||||
// Self::operand_info(src);
|
||||
// Self::operand_info(&dst.clone().into())
|
||||
// /// Creates a [Parsable] implementation for an enum whose variants
|
||||
// /// are named after other [Parsable] items
|
||||
// macro make_parsable($(#[$meta:meta])* $vis:vis enum $id:ident {$($(#[$vmeta:meta])*$v:ident),*$(,)?}) {
|
||||
// $( #[$meta] )* $vis enum $id {$($(#[$vmeta])*$v($v),)* }
|
||||
// impl ::msp430_asm::parser::parsable::Parsable for $id {
|
||||
// fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
// where T: TokenStream<'text> {
|
||||
// $(if let Some(v) = Parsable::try_parse(p, stream)? { Ok(Self::$v(v)) } else )*
|
||||
// { Err(ParseError::UnrecognizedDirective("".into())) }
|
||||
// }
|
||||
// }
|
||||
// impl TryFrom<&str> for $id {
|
||||
// type Error = ParseError;
|
||||
// fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||
// Parsable::parse(&Parser::default(), &mut Tokenizer::new(value).ignore(Type::Space).preprocessed())
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
fn operand_info(o: &PrimaryOperand) {
|
||||
match o {
|
||||
PrimaryOperand::Direct(r) => Self::register_info(r),
|
||||
PrimaryOperand::Indirect(r) => {
|
||||
Self::register_info(r);
|
||||
println!("Indirect addressing mode: use data pointed to by {r}");
|
||||
}
|
||||
PrimaryOperand::PostInc(r) => {
|
||||
Self::register_info(r);
|
||||
println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}");
|
||||
}
|
||||
PrimaryOperand::Indexed(r, n) => {
|
||||
Self::register_info(r);
|
||||
println!("Indexed mode: use the data at {r}[{n}]");
|
||||
}
|
||||
PrimaryOperand::Relative(_) => return,
|
||||
PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"),
|
||||
PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"),
|
||||
PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"),
|
||||
PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"),
|
||||
PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"),
|
||||
PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"),
|
||||
PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"),
|
||||
PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"),
|
||||
}
|
||||
footer!("https://mspgcc.sourceforge.net/manual/x82.html");
|
||||
}
|
||||
fn register_info(r: &Register) {
|
||||
use Register as Re;
|
||||
match r {
|
||||
Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."),
|
||||
Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."),
|
||||
Re::sr => println!(
|
||||
"sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC",
|
||||
),
|
||||
Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."),
|
||||
Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => {
|
||||
println!("{r} is a callee-saved general purpose register.")
|
||||
}
|
||||
Re::r12 | Re::r13 | Re::r14 | Re::r15 => {
|
||||
println!("{r} is a caller-saved general purpose register, allowed for return values.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Gets parameter usage information from the opcode's EncodingParser
|
||||
pub fn params(opcode: &Opcode) -> &'static str {
|
||||
match opcode.resolve().1 {
|
||||
EncodingParser::Jump { target: None } => " target (relative address or label)",
|
||||
EncodingParser::Single { width: None, dst: None } => "[.b] dst",
|
||||
EncodingParser::Single { dst: None, .. } => " dst",
|
||||
EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst",
|
||||
EncodingParser::Double { src: None, .. } => "[.b] src",
|
||||
EncodingParser::Double { dst: None, .. } => "[.b] dst",
|
||||
EncodingParser::Double { .. } => "[.b]",
|
||||
EncodingParser::Reflexive { reg: None, .. } => "[.b] dst",
|
||||
_ => "",
|
||||
}
|
||||
}
|
||||
// make_parsable! {
|
||||
// #[derive(Debug)]
|
||||
// pub enum SyntaxFragment {
|
||||
// Opcode,
|
||||
// PrimaryOperand,
|
||||
// Number,
|
||||
// }
|
||||
// }
|
||||
|
||||
pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) {
|
||||
match opcode {
|
||||
// Single
|
||||
Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"),
|
||||
Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"),
|
||||
Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"),
|
||||
Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"),
|
||||
Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"),
|
||||
Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"),
|
||||
Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"),
|
||||
// Jump
|
||||
Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"),
|
||||
Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"),
|
||||
Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"),
|
||||
Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"),
|
||||
Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"),
|
||||
Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"),
|
||||
Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"),
|
||||
Opcode::Jmp => ("Jump unconditionally", "pc += target"),
|
||||
// Double
|
||||
Opcode::Mov => ("Copy src into dst", "dst = src"),
|
||||
Opcode::Add => ("Add src to dst", "dst += src"),
|
||||
Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"),
|
||||
Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"),
|
||||
Opcode::Sub => ("Subtract src from dst", "dst -= src"),
|
||||
Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"),
|
||||
Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"),
|
||||
Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"),
|
||||
Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"),
|
||||
Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"),
|
||||
Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"),
|
||||
Opcode::And => ("Bitwise And src into dst", "dst &= src"),
|
||||
// Emulated
|
||||
Opcode::Nop => ("Does nothing", "{}"),
|
||||
Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"),
|
||||
Opcode::Br => ("Branches to the absolute address in src", "pc = src"),
|
||||
Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"),
|
||||
Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"),
|
||||
Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"),
|
||||
Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"),
|
||||
Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"),
|
||||
Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"),
|
||||
Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"),
|
||||
Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"),
|
||||
Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"),
|
||||
Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"),
|
||||
Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"),
|
||||
Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"),
|
||||
Opcode::Clr => ("Sets dst to 0", "dst = 0"),
|
||||
Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""),
|
||||
Opcode::Dec => ("Decrements dst", "dst -= 1"),
|
||||
Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"),
|
||||
Opcode::Inc => ("Increments dst", "dst += 1"),
|
||||
Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"),
|
||||
Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"),
|
||||
Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"),
|
||||
Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"),
|
||||
}
|
||||
}
|
||||
// impl SyntaxFragment {
|
||||
// pub fn info(&self) {
|
||||
// match self {
|
||||
// SyntaxFragment::Opcode(o) => Self::opcode_info(o),
|
||||
// SyntaxFragment::PrimaryOperand(o) => Self::operand_info(o),
|
||||
// SyntaxFragment::Number(n) => println!("The number {n}"),
|
||||
// }
|
||||
// }
|
||||
// fn opcode_info(o: &Opcode) {
|
||||
// let (desc, as_rust) = usage(o);
|
||||
// println!("Usage: {o}{}\n{desc} ( {as_rust} )", params(o));
|
||||
// footer!("https://mspgcc.sourceforge.net/manual/x223.html");
|
||||
// }
|
||||
// // TODO: re-enable full instruction decoding
|
||||
// // fn encoding_info(e: &Encoding) {
|
||||
// // match e {
|
||||
// // Encoding::Single { dst, .. } => Self::operand_info(dst),
|
||||
// // Encoding::Jump { target } => println!("Jumps to (pc + {target})"),
|
||||
// // Encoding::Double { src, dst, .. } => {
|
||||
// // Self::operand_info(src);
|
||||
// // Self::operand_info(&dst.clone().into())
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
// fn operand_info(o: &PrimaryOperand) {
|
||||
// match o {
|
||||
// PrimaryOperand::Direct(r) => Self::register_info(r),
|
||||
// PrimaryOperand::Indirect(r) => {
|
||||
// Self::register_info(r);
|
||||
// println!("Indirect addressing mode: use data pointed to by {r}");
|
||||
// }
|
||||
// PrimaryOperand::PostInc(r) => {
|
||||
// Self::register_info(r);
|
||||
// println!("Indirect post-increment mode: use data pointed to by {r}, then increment {r}");
|
||||
// }
|
||||
// PrimaryOperand::Indexed(r, n) => {
|
||||
// Self::register_info(r);
|
||||
// println!("Indexed mode: use the data at {r}[{n}]");
|
||||
// }
|
||||
// PrimaryOperand::Relative(_) => return,
|
||||
// PrimaryOperand::Absolute(n) => println!("Absolute mode: use the data at absolute address {n}"),
|
||||
// PrimaryOperand::Immediate(n) => println!("Immediate mode: the constant {n}"),
|
||||
// PrimaryOperand::Four => println!("#4 mode: Immediate 4 is encoded @sr"),
|
||||
// PrimaryOperand::Eight => println!("#8 mode: Immediate 8 is encoded @sr+"),
|
||||
// PrimaryOperand::Zero => println!("#0 mode: Immediate 0 is encoded cg (r3)"),
|
||||
// PrimaryOperand::One => println!("#1 mode: Immediate 1 is encoded _(cg), where _ is a nonexistent ext-word"),
|
||||
// PrimaryOperand::Two => println!("#2 mode: Immediate 2 is encoded @cg"),
|
||||
// PrimaryOperand::MinusOne => println!("#-1 mode: the all-ones constant, is encoded @cg+"),
|
||||
// }
|
||||
// footer!("https://mspgcc.sourceforge.net/manual/x82.html");
|
||||
// }
|
||||
// fn register_info(r: &Register) {
|
||||
// use Register as Re;
|
||||
// match r {
|
||||
// Re::pc => println!("pc (r0) is the Program Counter. Post-increment addressing will increase it by 2."),
|
||||
// Re::sp => println!("sp (r1) is the Stack Pointer. Post-increment addressing will increase it by 2."),
|
||||
// Re::sr => println!(
|
||||
// "sr (r2) is the Status Register. It has arithmetic flags: oVerflow, Negative, Zero, and Carry;\nInterrupt Enable; and toggles for various clock/sleep functions.\n8\t7\t6\t5\t4\t3\t2\t1\t0\nV\tSCG1\tSCG1\tOSCOFF\tCPUOFF\tGIE\tN\tZ\tC",
|
||||
// ),
|
||||
// Re::cg => println!("cg (r3) is the Constant Generator. It's hard-wired to zero."),
|
||||
// Re::r4 | Re::r5 | Re::r6 | Re::r7 | Re::r8 | Re::r9 | Re::r10 | Re::r11 => {
|
||||
// println!("{r} is a callee-saved general purpose register.")
|
||||
// }
|
||||
// Re::r12 | Re::r13 | Re::r14 | Re::r15 => {
|
||||
// println!("{r} is a caller-saved general purpose register, allowed for return values.")
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
const SINGLE: [Opcode; 7] =
|
||||
[Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti];
|
||||
// // Gets parameter usage information from the opcode's EncodingParser
|
||||
// pub fn params(opcode: &Opcode) -> &'static str {
|
||||
// match opcode.resolve().1 {
|
||||
// EncodingParser::Jump { target: None } => " target (relative address or label)",
|
||||
// EncodingParser::Single { width: None, dst: None } => "[.b] dst",
|
||||
// EncodingParser::Single { dst: None, .. } => " dst",
|
||||
// EncodingParser::Double { src: None, dst: None, .. } => "[.b] src, dst",
|
||||
// EncodingParser::Double { src: None, .. } => "[.b] src",
|
||||
// EncodingParser::Double { dst: None, .. } => "[.b] dst",
|
||||
// EncodingParser::Double { .. } => "[.b]",
|
||||
// EncodingParser::Reflexive { reg: None, .. } => "[.b] dst",
|
||||
// _ => "",
|
||||
// }
|
||||
// }
|
||||
|
||||
const JUMP: [Opcode; 8] =
|
||||
[Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp];
|
||||
// pub fn usage(opcode: &Opcode) -> (&'static str, &'static str) {
|
||||
// match opcode {
|
||||
// // Single
|
||||
// Opcode::Rrc => ("Rotates dst right, through carry flag", "dst = (dst >> 1) | (sr[C] << 15)"),
|
||||
// Opcode::Swpb => ("Swaps the high and low byte of dst", "dst.swap_bytes()"),
|
||||
// Opcode::Rra => ("Shifts dst right, sign-extending the result", "dst >>= 1"),
|
||||
// Opcode::Sxt => ("Sign-extends the 8-bit dst to 16-bits", "dst as i16 << 8 >> 8"),
|
||||
// Opcode::Push => ("Pushes dst to the stack", "stack.push(dst)"),
|
||||
// Opcode::Call => ("Calls a subroutine at an absolute address", "dst()"),
|
||||
// Opcode::Reti => ("Return from interrupt handler", "{ sr = stack.pop(); pc = stack.pop() }"),
|
||||
// // Jump
|
||||
// Opcode::Jnz => ("Jump if the last result was not zero", "if !Z { pc += target }"),
|
||||
// Opcode::Jz => ("Jump if the last result was zero", "if Z { pc += target }"),
|
||||
// Opcode::Jnc => ("Jump if the last operation did not carry", "if !C { pc += target }"),
|
||||
// Opcode::Jc => ("Jump if the last operation produced a carry bit", "if C { pc += target }"),
|
||||
// Opcode::Jn => ("Jump if the last result was negative", "if N { pc += target }"),
|
||||
// Opcode::Jge => ("Jump if the flags indicate src >= dst", "if sr[C] == sr[V] { pc += target }"),
|
||||
// Opcode::Jl => ("Jump if the flags indicate src < dst", "if sr[C] != sr[V] { pc += target }"),
|
||||
// Opcode::Jmp => ("Jump unconditionally", "pc += target"),
|
||||
// // Double
|
||||
// Opcode::Mov => ("Copy src into dst", "dst = src"),
|
||||
// Opcode::Add => ("Add src to dst", "dst += src"),
|
||||
// Opcode::Addc => ("Add src to dst with carry", "dst += src + sr[C]"),
|
||||
// Opcode::Subc => ("Subtract src from dst with carry", "dst -= src - sr[C]"),
|
||||
// Opcode::Sub => ("Subtract src from dst", "dst -= src"),
|
||||
// Opcode::Cmp => ("Subtract src from dst, but discard the result, keeping the flags", "dst - src"),
|
||||
// Opcode::Dadd => ("Add src to dst in Binary Coded Decimal", "dst = dst as BCD + src as BCD"),
|
||||
// Opcode::Bit => ("Test if bits in src are set in dst", "(src & dst).cmp(0)"),
|
||||
// Opcode::Bic => ("Clear bits in dst that are set in src, without changing flags", "dst &= !src"),
|
||||
// Opcode::Bis => ("Set bits in dst that are set in src, without changing flags", "dst |= src"),
|
||||
// Opcode::Xor => ("Bitwise Xor src into dst", "dst ^= src"),
|
||||
// Opcode::And => ("Bitwise And src into dst", "dst &= src"),
|
||||
// // Emulated
|
||||
// Opcode::Nop => ("Does nothing", "{}"),
|
||||
// Opcode::Pop => ("Pops a value from the stack", "dst = stack.pop()"),
|
||||
// Opcode::Br => ("Branches to the absolute address in src", "pc = src"),
|
||||
// Opcode::Ret => ("Returns from subroutine", "pc = stack.pop()"),
|
||||
// Opcode::Clrc => ("Clears the carry flag", "sr[C] = 0"),
|
||||
// Opcode::Setc => ("Sets the carry flag", "sr[C] = 1"),
|
||||
// Opcode::Clrz => ("Clears the zero flag", "sr[Z] = 0"),
|
||||
// Opcode::Setz => ("Sets the zero flag", "sr[Z] = 1"),
|
||||
// Opcode::Clrn => ("Clears the negative flag", "sr[N] = 0"),
|
||||
// Opcode::Setn => ("Sets the negative flag", "sr[N] = 1"),
|
||||
// Opcode::Dint => ("Disables interrupts", "sr[GIE] = 0"),
|
||||
// Opcode::Eint => ("Enables interrupts", "sr[GIE] = 1"),
|
||||
// Opcode::Rla => ("Shifts dst to the left, padding with zeros", "dst <<= 1"),
|
||||
// Opcode::Rlc => ("Rotates dst to the left, through carry flag", "dst = (dst << 1) + sr[C]"),
|
||||
// Opcode::Inv => ("Inverts the bits in dst", "dst = !dst"),
|
||||
// Opcode::Clr => ("Sets dst to 0", "dst = 0"),
|
||||
// Opcode::Tst => ("Sets the status register flags (CNZV) using dst", ""),
|
||||
// Opcode::Dec => ("Decrements dst", "dst -= 1"),
|
||||
// Opcode::Decd => ("Decrements dst by 2 (one processor word)", "dst -= 2"),
|
||||
// Opcode::Inc => ("Increments dst", "dst += 1"),
|
||||
// Opcode::Incd => ("Increments dst by 2 (one processor word)", "dst += 2"),
|
||||
// Opcode::Adc => ("Adds the carry bit to dst", "dst += sr[C]"),
|
||||
// Opcode::Dadc => ("Adds the carry bit to dst, in Binary Coded Decimal", "dst as BCD = sr[C]"),
|
||||
// Opcode::Sbc => ("Subtracts the carry bit from dst", "dst -= sr[C]"),
|
||||
// }
|
||||
// }
|
||||
|
||||
#[rustfmt::skip]
|
||||
const DOUBLE: [Opcode; 12] = [
|
||||
Opcode::Mov, Opcode::Add, Opcode::Addc, Opcode::Subc, Opcode::Sub, Opcode::Cmp,
|
||||
Opcode::Dadd, Opcode::Bit, Opcode::Bic, Opcode::Bis, Opcode::Xor, Opcode::And,
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
const SIMULATED: [Opcode; 24] = [
|
||||
Opcode::Nop, Opcode::Pop, Opcode::Br, Opcode::Ret, Opcode::Clrc, Opcode::Setc,
|
||||
Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint,
|
||||
Opcode::Rla, Opcode::Rlc, Opcode::Inv, Opcode::Clr, Opcode::Tst, Opcode::Dec,
|
||||
Opcode::Decd, Opcode::Inc, Opcode::Incd, Opcode::Adc, Opcode::Dadc, Opcode::Sbc,
|
||||
];
|
||||
// const SINGLE: [Opcode; 7] =
|
||||
// [Opcode::Rrc, Opcode::Swpb, Opcode::Rra, Opcode::Sxt, Opcode::Push, Opcode::Call, Opcode::Reti];
|
||||
|
||||
pub fn list_opcodes() {
|
||||
let mut stdout = std::io::stdout().lock();
|
||||
header!(stdout, "Single-operand instructions:");
|
||||
let _ = write_opcode_list(&mut stdout, &SINGLE);
|
||||
header!(stdout, "Relative Jump instructions:");
|
||||
let _ = write_opcode_list(&mut stdout, &JUMP);
|
||||
header!(stdout, "Double-operand instructions:");
|
||||
let _ = write_opcode_list(&mut stdout, &DOUBLE);
|
||||
header!(stdout, "Simulated instructions:");
|
||||
let _ = write_opcode_list(&mut stdout, &SIMULATED);
|
||||
}
|
||||
// const JUMP: [Opcode; 8] =
|
||||
// [Opcode::Jnz, Opcode::Jz, Opcode::Jnc, Opcode::Jc, Opcode::Jn, Opcode::Jge, Opcode::Jl, Opcode::Jmp];
|
||||
|
||||
fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> {
|
||||
for (idx, opcode) in list.iter().enumerate() {
|
||||
write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?;
|
||||
}
|
||||
if list.len() % 6 != 0 {
|
||||
writeln!(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// #[rustfmt::skip]
|
||||
// const DOUBLE: [Opcode; 12] = [
|
||||
// Opcode::Mov, Opcode::Add, Opcode::Addc, Opcode::Subc, Opcode::Sub, Opcode::Cmp,
|
||||
// Opcode::Dadd, Opcode::Bit, Opcode::Bic, Opcode::Bis, Opcode::Xor, Opcode::And,
|
||||
// ];
|
||||
// #[rustfmt::skip]
|
||||
// const SIMULATED: [Opcode; 24] = [
|
||||
// Opcode::Nop, Opcode::Pop, Opcode::Br, Opcode::Ret, Opcode::Clrc, Opcode::Setc,
|
||||
// Opcode::Clrz, Opcode::Setz, Opcode::Clrn, Opcode::Setn, Opcode::Dint, Opcode::Eint,
|
||||
// Opcode::Rla, Opcode::Rlc, Opcode::Inv, Opcode::Clr, Opcode::Tst, Opcode::Dec,
|
||||
// Opcode::Decd, Opcode::Inc, Opcode::Incd, Opcode::Adc, Opcode::Dadc, Opcode::Sbc,
|
||||
// ];
|
||||
|
||||
macro header ($f:ident, $($x: expr),+) {
|
||||
{write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();}
|
||||
}
|
||||
macro footer ($($x: expr),+) {
|
||||
{print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);}
|
||||
}
|
||||
// pub fn list_opcodes() {
|
||||
// let mut stdout = std::io::stdout().lock();
|
||||
// header!(stdout, "Single-operand instructions:");
|
||||
// let _ = write_opcode_list(&mut stdout, &SINGLE);
|
||||
// header!(stdout, "Relative Jump instructions:");
|
||||
// let _ = write_opcode_list(&mut stdout, &JUMP);
|
||||
// header!(stdout, "Double-operand instructions:");
|
||||
// let _ = write_opcode_list(&mut stdout, &DOUBLE);
|
||||
// header!(stdout, "Simulated instructions:");
|
||||
// let _ = write_opcode_list(&mut stdout, &SIMULATED);
|
||||
// }
|
||||
|
||||
// fn write_opcode_list(mut f: impl std::io::Write, list: &[Opcode]) -> std::io::Result<()> {
|
||||
// for (idx, opcode) in list.iter().enumerate() {
|
||||
// write!(f, "{opcode}{}", if idx % 6 == 5 { "\n" } else { "\t" })?;
|
||||
// }
|
||||
// if list.len() % 6 != 0 {
|
||||
// writeln!(f)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
// macro header ($f:ident, $($x: expr),+) {
|
||||
// {write!($f, "{}",SetForegroundColor(Color::Cyan)).ok();write!($f, $($x),+).ok();writeln!($f, "{}",ResetAttributes).ok();}
|
||||
// }
|
||||
// macro footer ($($x: expr),+) {
|
||||
// {print!("{}",SetForegroundColor(Color::DarkGray));print!($($x),+);println!("{}",ResetAttributes);}
|
||||
// }
|
||||
|
@ -3,59 +3,63 @@
|
||||
// https://mspgcc.sourceforge.net/manual/ln16.html
|
||||
#![feature(decl_macro)]
|
||||
|
||||
use anes::{Color, ResetAttributes, SetForegroundColor};
|
||||
use msp430_asm::parser::preamble::*;
|
||||
use msp430_asm::preamble::*;
|
||||
use std::{
|
||||
error::Error,
|
||||
io::{stdin, IsTerminal, Write},
|
||||
};
|
||||
|
||||
type AsmResult<T> = Result<T, Box<dyn Error>>;
|
||||
|
||||
mod data;
|
||||
|
||||
fn main() -> AsmResult<()> {
|
||||
if stdin().is_terminal() {
|
||||
hello();
|
||||
}
|
||||
repl()
|
||||
fn main() {
|
||||
println!("Hello, world!")
|
||||
}
|
||||
|
||||
fn hello() {
|
||||
println!(
|
||||
"{}{} v{}
|
||||
This software contains instruction and register descriptions adapted from
|
||||
the mspgcc project's fantastic documentation, which is licensed under the GPL.
|
||||
https://mspgcc.sourceforge.net/manual/book1.html{}\n",
|
||||
SetForegroundColor(Color::DarkGray),
|
||||
env!("CARGO_BIN_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
ResetAttributes
|
||||
);
|
||||
}
|
||||
// use anes::{Color, ResetAttributes, SetForegroundColor};
|
||||
// use msp430_asm::parser::preamble::*;
|
||||
// use msp430_asm::preamble::*;
|
||||
// use std::{
|
||||
// error::Error,
|
||||
// io::{stdin, IsTerminal, Write},
|
||||
// };
|
||||
|
||||
fn repl() -> AsmResult<()> {
|
||||
printflush!("> ");
|
||||
let mut line = String::new();
|
||||
while let Ok(len) = stdin().read_line(&mut line) {
|
||||
match len {
|
||||
0 => break, // No newline (reached EOF)
|
||||
1 => (), // Line is empty
|
||||
_ => {
|
||||
if line.starts_with('?') || line.starts_with("help") {
|
||||
data::list_opcodes()
|
||||
} else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) {
|
||||
sf.info();
|
||||
}
|
||||
}
|
||||
}
|
||||
printflush!("> ");
|
||||
line.clear();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// type AsmResult<T> = Result<T, Box<dyn Error>>;
|
||||
|
||||
macro printflush ($($x: expr),+) {
|
||||
{print!($($x),+); let _ = ::std::io::stdout().flush();}
|
||||
}
|
||||
// mod data;
|
||||
|
||||
// fn main() -> AsmResult<()> {
|
||||
// if stdin().is_terminal() {
|
||||
// hello();
|
||||
// }
|
||||
// repl()
|
||||
// }
|
||||
|
||||
// fn hello() {
|
||||
// println!(
|
||||
// "{}{} v{}
|
||||
// This software contains instruction and register descriptions adapted from
|
||||
// the mspgcc project's fantastic documentation, which is licensed under the GPL.
|
||||
// https://mspgcc.sourceforge.net/manual/book1.html{}\n",
|
||||
// SetForegroundColor(Color::DarkGray),
|
||||
// env!("CARGO_BIN_NAME"),
|
||||
// env!("CARGO_PKG_VERSION"),
|
||||
// ResetAttributes
|
||||
// );
|
||||
// }
|
||||
|
||||
// fn repl() -> AsmResult<()> {
|
||||
// printflush!("> ");
|
||||
// let mut line = String::new();
|
||||
// while let Ok(len) = stdin().read_line(&mut line) {
|
||||
// match len {
|
||||
// 0 => break, // No newline (reached EOF)
|
||||
// 1 => (), // Line is empty
|
||||
// _ => {
|
||||
// if line.starts_with('?') || line.starts_with("help") {
|
||||
// data::list_opcodes()
|
||||
// } else if let Ok(sf) = data::SyntaxFragment::try_from(line.as_str()) {
|
||||
// sf.info();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// printflush!("> ");
|
||||
// line.clear();
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
// macro printflush ($($x: expr),+) {
|
||||
// {print!($($x),+); let _ = ::std::io::stdout().flush();}
|
||||
// }
|
||||
|
14
msp430-asm/Cargo.toml
Normal file
14
msp430-asm/Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "msp430-asm"
|
||||
authors.workspace = true
|
||||
version.workspace = true
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
publish.workspace = true
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
libmsp430 = { path = ".." }
|
||||
anes = { version = "0.2.0" }
|
||||
argp = { version = "0.3.0" }
|
116
msp430-asm/src/lib.rs
Normal file
116
msp430-asm/src/lib.rs
Normal file
@ -0,0 +1,116 @@
|
||||
//! Helper library for msp430-asm
|
||||
#![feature(decl_macro)]
|
||||
pub mod split_twice {
|
||||
/// Slices a collection into a beginning, middle, and end, based on two unordered indices
|
||||
pub trait SplitTwice<'t> {
|
||||
type Slice;
|
||||
type Idx;
|
||||
/// Splits a collection into a beginning, middle, and end slice,
|
||||
/// based on two unordered indices
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// # use msp430_asm::split_twice::SplitTwice;
|
||||
/// let string = "foo,bar,baz";
|
||||
/// let (foo, bar, baz) = string.split_twice(4, 8);
|
||||
/// assert_eq!(foo, "foo,");
|
||||
/// assert_eq!(bar, "bar,");
|
||||
/// assert_eq!(baz, "baz");
|
||||
/// ```
|
||||
fn split_twice(
|
||||
&'t self,
|
||||
a: Self::Idx,
|
||||
b: Self::Idx,
|
||||
) -> (Self::Slice, Self::Slice, Self::Slice);
|
||||
}
|
||||
|
||||
impl<'t, T: 't> SplitTwice<'t> for [T] {
|
||||
type Slice = &'t [T];
|
||||
type Idx = usize;
|
||||
fn split_twice(
|
||||
&'t self,
|
||||
a: Self::Idx,
|
||||
b: Self::Idx,
|
||||
) -> (Self::Slice, Self::Slice, Self::Slice) {
|
||||
let (a, b) = if a < b { (a, b) } else { (b, a) };
|
||||
let (mid, end) =
|
||||
if b < self.len() { self.split_at(b) } else { (self, Default::default()) };
|
||||
let (start, mid) =
|
||||
if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) };
|
||||
(start, mid, end)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> SplitTwice<'t> for str {
|
||||
type Slice = &'t str;
|
||||
type Idx = usize;
|
||||
fn split_twice(
|
||||
&'t self,
|
||||
a: Self::Idx,
|
||||
b: Self::Idx,
|
||||
) -> (Self::Slice, Self::Slice, Self::Slice) {
|
||||
let (a, b) = if a < b { (a, b) } else { (b, a) };
|
||||
let (mid, end) =
|
||||
if b < self.len() { self.split_at(b) } else { (self, Default::default()) };
|
||||
let (start, mid) =
|
||||
if a < mid.len() { mid.split_at(a) } else { (self, Default::default()) };
|
||||
(start, mid, end)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod cursor {
|
||||
use std::fmt::{Arguments, Display};
|
||||
|
||||
pub macro csi($($t:tt)*) {format_args!("\x1b[{}", format_args!($($t)*))}
|
||||
|
||||
pub macro color($fg:expr, $($t:tt)*) {
|
||||
Colorized::new(Some($fg), None, format_args!($($t)*))
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Color {
|
||||
#[default]
|
||||
Black = 30,
|
||||
Red,
|
||||
Green,
|
||||
Yellow,
|
||||
Blue,
|
||||
Magenta,
|
||||
Cyan,
|
||||
Gray,
|
||||
DarkGray = 90,
|
||||
Pink,
|
||||
Lime,
|
||||
Sunflower,
|
||||
SkyBlue,
|
||||
HotPink,
|
||||
Turquoise,
|
||||
White,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Colorized<'args> {
|
||||
fg: Option<Color>,
|
||||
bg: Option<Color>,
|
||||
args: Arguments<'args>,
|
||||
}
|
||||
|
||||
impl<'t> Colorized<'t> {
|
||||
pub fn new(fg: Option<Color>, bg: Option<Color>, args: Arguments<'t>) -> Self {
|
||||
Self { fg, bg, args }
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Colorized<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let &Self { fg, bg, args } = self;
|
||||
if let Some(fg) = fg {
|
||||
write!(f, "{}", csi!("{}m", fg as u8))?;
|
||||
}
|
||||
if let Some(bg) = bg {
|
||||
write!(f, "{}", csi!("{}m", bg as u8 + 10))?;
|
||||
}
|
||||
write!(f, "{args}{}", csi!("0m"))
|
||||
}
|
||||
}
|
||||
}
|
@ -1,7 +1,15 @@
|
||||
//! Simple frontend for the assembler
|
||||
#![feature(decl_macro)]
|
||||
use argp::parse_args_or_exit;
|
||||
use msp430_asm::preamble::*;
|
||||
use libmsp430::{
|
||||
assembler::Assemble,
|
||||
parser::ast::{canonical::Canonicalize, *},
|
||||
parser::{error::Error as PError, Parser},
|
||||
};
|
||||
use msp430_asm::{
|
||||
cursor::{color, Color::*},
|
||||
split_twice::SplitTwice,
|
||||
};
|
||||
use std::{
|
||||
error::Error,
|
||||
io::{stdin, IsTerminal, Read},
|
||||
@ -36,21 +44,18 @@ mod args {
|
||||
}
|
||||
mod repl {
|
||||
use super::*;
|
||||
use anes::{Color, MoveCursorToPreviousLine, ResetAttributes, SetForegroundColor};
|
||||
use msp430_asm::{
|
||||
assembler::error::AssemblyError, error::Error as MspError, lexer::error::LexError, parser::error::ParseError,
|
||||
};
|
||||
use anes::MoveCursorToPreviousLine;
|
||||
use std::io::{stderr, Write};
|
||||
|
||||
macro color ($color: expr, $fmt: literal, $($str: expr),*) {
|
||||
format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)* ::anes::ResetAttributes)
|
||||
}
|
||||
// macro color ($color: expr, $fmt: literal, $($str: expr),*) {
|
||||
// format_args!(concat!("{}", $fmt, "{}"), ::anes::SetForegroundColor($color),$($str,)*
|
||||
// ::anes::ResetAttributes) }
|
||||
|
||||
macro linenr($n: expr) {
|
||||
format_args!("{:4}: ", $n)
|
||||
}
|
||||
|
||||
macro printflush ($($x: expr),+) {
|
||||
macro printfl ($($x: expr),+) {
|
||||
{print!($($x),+); let _ = ::std::io::stdout().flush();}
|
||||
}
|
||||
|
||||
@ -62,13 +67,10 @@ mod repl {
|
||||
let mut line = String::new();
|
||||
let mut linenr = 1;
|
||||
println!(
|
||||
"{}{} v{}{}",
|
||||
SetForegroundColor(Color::DarkGray),
|
||||
env!("CARGO_BIN_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
ResetAttributes
|
||||
"{}",
|
||||
color!(DarkGray, "{} v{}", env!("CARGO_BIN_NAME"), env!("CARGO_PKG_VERSION"))
|
||||
);
|
||||
printflush!("{}", linenr!(linenr));
|
||||
printfl!("{}", linenr!(linenr));
|
||||
while let Ok(len) = stdin().read_line(&mut line) {
|
||||
match len {
|
||||
0 => break, // No newline (reached EOF)
|
||||
@ -76,8 +78,8 @@ mod repl {
|
||||
_ => (),
|
||||
}
|
||||
// Try to parse this line in isolation (this restricts preprocessing)
|
||||
match Parser::default().parse(&line) {
|
||||
Err(error) => errpp(&line, linenr, &error.into()),
|
||||
match Parser::new(&line).parse::<Statements>() {
|
||||
Err(error) => errpp(&line, linenr, &error),
|
||||
Ok(_) => {
|
||||
okpp(&line, linenr);
|
||||
*buf += &line;
|
||||
@ -85,36 +87,29 @@ mod repl {
|
||||
}
|
||||
}
|
||||
line.clear();
|
||||
printflush!("{}", linenr!(linenr));
|
||||
printfl!("{}", linenr!(linenr));
|
||||
}
|
||||
println!();
|
||||
println!("{}", color!(Gray, "[EOF]"));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn okpp(line: &str, linenr: i32) {
|
||||
println!("{}{}{}", move_cursor!(1, 5), color!(Color::Green, "{:4}", linenr!(linenr)), line.trim_end(),);
|
||||
println!(
|
||||
"{}{}{}",
|
||||
move_cursor!(1, 5),
|
||||
color!(Green, "{:4}", linenr!(linenr)),
|
||||
line.trim_end(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Pretty-prints a line error
|
||||
fn errpp(line: &str, linenr: i32, err: &msp430_asm::error::Error) {
|
||||
fn errpp(line: &str, linenr: i32, err: &PError) {
|
||||
let loc = err.loc;
|
||||
if stderr().is_terminal() {
|
||||
let line = line.trim_end();
|
||||
eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Color::Red, "{}", linenr!(linenr)));
|
||||
match err {
|
||||
// TODO: use a recursive enum to store all valid states
|
||||
MspError::LexError(LexError::Contextual(c, e))
|
||||
| MspError::ParseError(ParseError::LexError(LexError::Contextual(c, e)))
|
||||
| MspError::AssemblyError(AssemblyError::ParseError(ParseError::LexError(LexError::Contextual(
|
||||
c,
|
||||
e,
|
||||
)))) => {
|
||||
let (start, end) = line.split_at(c.position() - 1);
|
||||
eprintln!("{start}{} ({e})", color!(Color::Red, "{}", end));
|
||||
}
|
||||
_ => {
|
||||
eprintln!("{} ({err})", color!(Color::Red, "{}", line));
|
||||
}
|
||||
}
|
||||
eprint!("{}{}", MoveCursorToPreviousLine(1), color!(Red, "{}", linenr!(linenr)));
|
||||
let (start, mid, end) = line.split_twice(loc.start, loc.end);
|
||||
eprintln!("{start}{}{end} {}", color!(Red, "{}", mid), color!(DarkGray, "; {}", err));
|
||||
} else {
|
||||
eprintln!("{} ({err})", line.trim())
|
||||
}
|
||||
@ -123,7 +118,7 @@ mod repl {
|
||||
|
||||
// Parses and assembles a buffer, then prints it in hex to stdout
|
||||
fn asm(buf: &str) -> Result<(), Box<dyn Error>> {
|
||||
match Assembler::assemble(&Parser::default().parse(&buf)?) {
|
||||
match Parser::new(buf).parse::<Statements>()?.to_canonical().assemble() {
|
||||
Err(error) => println!("{error}"),
|
||||
Ok(out) => {
|
||||
for word in out {
|
@ -9,11 +9,11 @@ jmp main
|
||||
.string "ABA"
|
||||
.string "ABAB"
|
||||
.word 0b0101101001011010
|
||||
.words [dead beef]
|
||||
.words [0xdead 0xbeef 0x0000]
|
||||
|
||||
main:
|
||||
; testing defines
|
||||
.define asdfgh #1000
|
||||
.define asdfgh #0x1000
|
||||
.define qwerty @sp+
|
||||
br asdfgh
|
||||
mov qwerty, r15
|
||||
@ -88,73 +88,74 @@ mov @r13+, r14
|
||||
mov @r14+, r15
|
||||
|
||||
.define special r6
|
||||
;mov @pc+, r15 ; This is how mov-immediate is encoded, and is not valid
|
||||
;mov @sp+, r15 ; pop r15
|
||||
;mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
|
||||
;mov @cg+, r15
|
||||
; mov , r14
|
||||
; mov @pc+, r15 ; This is a mov-immediate, and may corrupt your output
|
||||
mov @sp+, r15 ; pop r15
|
||||
mov @sr+, r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
|
||||
mov @cg+, r15
|
||||
|
||||
indexed_mode:
|
||||
.define numbered r7
|
||||
mov.b 10(r0), r1
|
||||
mov 10(r1), r2
|
||||
mov.b 0x10(r0), r1
|
||||
mov 0x10(r1), r2
|
||||
;mov 10(r2), r3 ; Invalid: cannot index relative to sr
|
||||
;mov 10(r3), r4 ; Invalid: cannot index relative to cg
|
||||
mov 10(r4), r5
|
||||
mov 10(r5), r6
|
||||
mov 10(r6), r7
|
||||
mov 10(r7), r8
|
||||
mov 10(r8), r9
|
||||
mov 10(r9), r10
|
||||
mov 10(r10), r11
|
||||
mov 10(r11), r12
|
||||
mov 10(r12), r13
|
||||
mov 10(r13), r14
|
||||
mov 10(r14), r15
|
||||
mov 0x10(r4), r5
|
||||
mov 0x10(r5), r6
|
||||
mov 0x10(r6), r7
|
||||
mov 0x10(r7), r8
|
||||
mov 0x10(r8), r9
|
||||
mov 0x10(r9), r10
|
||||
mov 0x10(r10), r11
|
||||
mov 0x10(r11), r12
|
||||
mov 0x10(r12), r13
|
||||
mov 0x10(r13), r14
|
||||
mov 0x10(r14), r15
|
||||
|
||||
.define special r8
|
||||
mov 10(pc), r15
|
||||
mov 10(sp), r15
|
||||
mov 0x10(pc), r15
|
||||
mov 0x10(sp), r15
|
||||
;mov 10(sr), r15 ; These are part of encodings for #immediate values [-1, 0, 1, 2, 4, 8]
|
||||
;mov 10(cg), r15
|
||||
|
||||
_immediate_mode:
|
||||
.define numbered r9
|
||||
mov #beef, r0
|
||||
mov #beef, r1
|
||||
mov #beef, r2
|
||||
mov #beef, r3
|
||||
mov #beef, r4
|
||||
mov #beef, r5
|
||||
mov #beef, r6
|
||||
mov #beef, r7
|
||||
mov #beef, r8
|
||||
mov #beef, r9
|
||||
mov #beef, r10
|
||||
mov #beef, r11
|
||||
mov #beef, r12
|
||||
mov #beef, r13
|
||||
mov #beef, r14
|
||||
mov #beef, r15
|
||||
mov #0xbeef, r0
|
||||
mov #0xbeef, r1
|
||||
mov #0xbeef, r2
|
||||
mov #0xbeef, r3
|
||||
mov #0xbeef, r4
|
||||
mov #0xbeef, r5
|
||||
mov #0xbeef, r6
|
||||
mov #0xbeef, r7
|
||||
mov #0xbeef, r8
|
||||
mov #0xbeef, r9
|
||||
mov #0xbeef, r10
|
||||
mov #0xbeef, r11
|
||||
mov #0xbeef, r12
|
||||
mov #0xbeef, r13
|
||||
mov #0xbeef, r14
|
||||
mov #0xbeef, r15
|
||||
|
||||
.define special r10
|
||||
mov #beef, pc
|
||||
mov #beef, sp
|
||||
mov #beef, sr
|
||||
mov #beef, cg
|
||||
mov #0xbeef, pc
|
||||
mov #0xbeef, sp
|
||||
mov #0xbeef, sr
|
||||
mov #0xbeef, cg
|
||||
|
||||
jmp _register_mode
|
||||
jmp 3fe
|
||||
jmp -3fc
|
||||
jmp 0x3fe
|
||||
jmp -0x3fc
|
||||
ret
|
||||
|
||||
; Funky encodings
|
||||
mov r6, r4
|
||||
mov @r6, r4
|
||||
mov @r6+, r4
|
||||
mov 0(r6), r4
|
||||
mov 4141(r6), r4
|
||||
mov 0x0(r6), r4
|
||||
mov 0x4141(r6), r4
|
||||
mov #-1, r4
|
||||
mov #ffff, r4
|
||||
mov #0xffff, r4
|
||||
mov #0, r4
|
||||
mov #1, r4
|
||||
mov #2, r4
|
||||
@ -164,33 +165,33 @@ mov r6, 0(r4)
|
||||
mov @r6, 0(r4)
|
||||
mov @r6+, 0(r4)
|
||||
mov 0(r6), 0(r4)
|
||||
mov 4141(r6), 0(r4)
|
||||
mov 0x4141(r6), 0(r4)
|
||||
mov #-1, 0(r4)
|
||||
mov #ffff, 0(r4)
|
||||
mov #0xffff, 0(r4)
|
||||
mov #0, 0(r4)
|
||||
mov #1, 0(r4)
|
||||
mov #2, 0(r4)
|
||||
mov #4, 0(r4)
|
||||
mov #8, 0(r4)
|
||||
mov r6, 4141(r4)
|
||||
mov @r6, 4141(r4)
|
||||
mov @r6+, 4141(r4)
|
||||
mov 0(r6), 4141(r4)
|
||||
mov 4141(r6), 4141(r4)
|
||||
mov #-1, 4141(r4)
|
||||
mov #ffff, 4141(r4)
|
||||
mov #0, 4141(r4)
|
||||
mov #1, 4141(r4)
|
||||
mov #2, 4141(r4)
|
||||
mov #4, 4141(r4)
|
||||
mov #8, 4141(r4)
|
||||
mov r6, 0x4141(r4)
|
||||
mov @r6, 0x4141(r4)
|
||||
mov @r6+, 0x4141(r4)
|
||||
mov 0(r6), 0x4141(r4)
|
||||
mov 0x4141(r6), 0x4141(r4)
|
||||
mov #-1, 0x4141(r4)
|
||||
mov #0xffff, 0x4141(r4)
|
||||
mov #0, 0x4141(r4)
|
||||
mov #1, 0x4141(r4)
|
||||
mov #2, 0x4141(r4)
|
||||
mov #4, 0x4141(r4)
|
||||
mov #8, 0x4141(r4)
|
||||
mov r6, #0
|
||||
mov @r6, #0
|
||||
mov @r6+, #0
|
||||
mov 0(r6), #0
|
||||
mov 4141(r6), #0
|
||||
mov 0x4141(r6), #0
|
||||
mov #-1, #0
|
||||
mov #ffff, #0
|
||||
mov #0xffff, #0
|
||||
mov #0, #0
|
||||
mov #1, #0
|
||||
mov #2, #0
|
||||
@ -200,9 +201,9 @@ mov r6, #1
|
||||
mov @r6, #1
|
||||
mov @r6+, #1
|
||||
mov 0(r6), #1
|
||||
mov 4141(r6), #1
|
||||
mov 0x4141(r6), #1
|
||||
mov #-1, #1
|
||||
mov #ffff, #1
|
||||
mov #0xffff, #1
|
||||
mov #0, #1
|
||||
mov #1, #1
|
||||
mov #2, #1
|
||||
@ -211,14 +212,14 @@ mov #8, #1
|
||||
|
||||
; Instruction exercise
|
||||
; Jumps
|
||||
jne 10
|
||||
jeq 10
|
||||
jlo 10
|
||||
jhs 10
|
||||
jn 10
|
||||
jge 10
|
||||
jl 10
|
||||
jmp 10
|
||||
jne 0x10
|
||||
jeq 0x10
|
||||
jlo 0x10
|
||||
jhs 0x10
|
||||
jn 0x10
|
||||
jge 0x10
|
||||
jl 0x10
|
||||
jmp 0x10
|
||||
|
||||
; Two-ops
|
||||
mov r14, r15
|
||||
@ -232,7 +233,7 @@ bit r14, r15
|
||||
bic r14, r15
|
||||
bis r14, r15
|
||||
xor r14, r15
|
||||
and r14, 10(r15)
|
||||
and r14, 0x10(r15)
|
||||
|
||||
; One-ops
|
||||
rrc r15
|
||||
@ -241,13 +242,14 @@ rra r15
|
||||
sxt r15
|
||||
push r15
|
||||
call r15
|
||||
reti r15
|
||||
; reti is special
|
||||
reti
|
||||
|
||||
; Jump aliases
|
||||
jnc 10
|
||||
jnz 10
|
||||
jc 10
|
||||
jz 10
|
||||
jnc 0x10
|
||||
jnz 0x10
|
||||
jc 0x10
|
||||
jz 0x10
|
||||
|
||||
; "emulated" no-op instructions
|
||||
ret
|
582
src/assembler.rs
582
src/assembler.rs
@ -1,197 +1,423 @@
|
||||
// © 2023 John Breaux
|
||||
//! Traverses an AST, assembling instructions.
|
||||
//!
|
||||
//! [Assembler] carries *some* state
|
||||
//! Assembles a binary using the given [AST](crate::parser::ast)
|
||||
|
||||
use crate::parser::preamble::*;
|
||||
use error::AssemblyError;
|
||||
use error::{AResult, ErrorKind::*};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub mod error;
|
||||
use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, util::Span};
|
||||
|
||||
use self::error::{Error, ErrorKind};
|
||||
|
||||
/// Assembles a binary using the given [Assemble]-able item
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct Assembler<'t> {
|
||||
/// The assembled output
|
||||
output: Vec<u16>,
|
||||
/// Table of labels, for backpatching
|
||||
labels: HashMap<&'t str, usize>,
|
||||
/// Backpatch table for jump instructions
|
||||
jump_queue: Vec<(usize, &'t str)>,
|
||||
/// Backpatch table for immediate values
|
||||
expr_queue: Vec<(usize, Expr<'t>)>,
|
||||
/// Base address from .org directives
|
||||
org_base: usize,
|
||||
/// Last seen index in input
|
||||
loc: Span<usize>,
|
||||
}
|
||||
|
||||
impl<'t> Assembler<'t> {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
pub fn assemble<T: Assemble<'t>>(&mut self, t: &T) -> AResult<&mut Self> {
|
||||
t.assemble_in(self)
|
||||
}
|
||||
/// Gets the address of a label
|
||||
pub fn addrof(&self, name: &str) -> Option<u16> {
|
||||
self.labels.get(name).map(|v| *v as u16)
|
||||
}
|
||||
/// Gets the value at a label
|
||||
pub fn valueof(&self, name: &str) -> Option<u16> {
|
||||
self.output.get(self.addrof(name)? as usize).copied()
|
||||
}
|
||||
fn push(&mut self, word: u16) {
|
||||
self.output.push(word)
|
||||
}
|
||||
fn error(&self, kind: ErrorKind) -> Error {
|
||||
Error { span: self.loc, kind }
|
||||
}
|
||||
/// Backpatches everything, and yoinks the output buffer.
|
||||
pub fn out(&mut self) -> AResult<Vec<u16>> {
|
||||
// Resolve jumps
|
||||
for (idx, key) in &self.jump_queue {
|
||||
// eprintln!("Patching jump at {idx} with key {key}");
|
||||
match self.labels.get(key).map(|addr| addr.wrapping_sub(*idx as _) as i16) {
|
||||
None => Err(self.error(UndefinedLabel(key.to_string())))?,
|
||||
Some(value @ -0x3ff..=0x3fc) => self.output[*idx] |= (value - 1) as u16 & 0x3ff,
|
||||
Some(value) => Err(self.error(LongJump(value)))?,
|
||||
}
|
||||
}
|
||||
// Resolve immediates through late expression evaluation.
|
||||
for (idx, expr) in &self.expr_queue {
|
||||
// eprintln!("Patching immediate at {idx} with expression {expr:?}");
|
||||
self.output[*idx] = self.eval(expr)?;
|
||||
}
|
||||
let out = std::mem::take(&mut self.output);
|
||||
*self = Default::default();
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
pub fn add_label(&mut self, label: &'t str) -> AResult<()> {
|
||||
if *self.labels.entry(label).or_insert(self.output.len()) != self.output.len() {
|
||||
Err(self.error(RedefinedLabel(label.into())))?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Appends an expr as an extword, deferring its calculation for later
|
||||
pub fn defer_expr(&mut self, e: Expr<'t>) {
|
||||
self.expr_queue.push((self.output.len(), e));
|
||||
self.push(0);
|
||||
}
|
||||
/// Defers resolution of a jump label until output time
|
||||
/// The jump label will be later resolved to the NEXT word.
|
||||
pub fn defer_jump(&mut self, label: &'t str) {
|
||||
self.jump_queue.push((self.output.len(), label))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Assemble<'t> {
|
||||
fn assemble(&self) -> AResult<Vec<u16>> {
|
||||
self.assemble_in(&mut Default::default())?.out()
|
||||
}
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>>;
|
||||
}
|
||||
|
||||
impl<'t> Assemble<'t> for Statements<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
for stmt in &self.stmts {
|
||||
stmt.assemble_in(a)?;
|
||||
}
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Statement<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
match self {
|
||||
Statement::Label(label) => a.add_label(label).map(|_| a),
|
||||
Statement::Insn(i) => i.assemble_in(a),
|
||||
Statement::Directive(d) => d.assemble_in(a),
|
||||
Statement::Comment(_) => Ok(a),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Directive<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
match self {
|
||||
Directive::Define(_) => {}
|
||||
Directive::Org(base) => a.org_base = a.eval(base)? as usize,
|
||||
Directive::Word(expr) => a.defer_expr(*expr.clone()),
|
||||
Directive::Words(exprs) => {
|
||||
for expr in exprs {
|
||||
a.defer_expr(expr.clone())
|
||||
}
|
||||
}
|
||||
Directive::String(str) => {
|
||||
str.assemble_in(a)?;
|
||||
}
|
||||
}
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Assemble<'t> for &'t str {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
for chunk in self.as_bytes().chunks(2) {
|
||||
match chunk.len() {
|
||||
0 => a.push(0),
|
||||
1 => {
|
||||
a.push(chunk[0] as u16);
|
||||
return Ok(a);
|
||||
}
|
||||
2 => a.push((chunk[1] as u16) << 8 | chunk[0] as u16),
|
||||
n => unreachable!("expected chunks of length 2, got length {n}"),
|
||||
}
|
||||
}
|
||||
a.push(0);
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Assemble<'t> for Instruction<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
let Self { span, kind } = self;
|
||||
a.loc = *span;
|
||||
kind.assemble_in(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for InstructionKind<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
match self {
|
||||
InstructionKind::NoEm(v) => v.assemble_in(a),
|
||||
InstructionKind::OneEm(v) => v.assemble_in(a),
|
||||
InstructionKind::OneArg(v) => v.assemble_in(a),
|
||||
InstructionKind::TwoArg(v) => v.assemble_in(a),
|
||||
InstructionKind::Jump(v) => v.assemble_in(a),
|
||||
InstructionKind::Reti(v) => v.assemble_in(a),
|
||||
InstructionKind::Br(v) => v.assemble_in(a),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for NoEm {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
eprintln!(
|
||||
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
|
||||
);
|
||||
self.clone().to_canonical().assemble_in(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for OneEm<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
eprintln!(
|
||||
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
|
||||
);
|
||||
self.clone().to_canonical().assemble_in(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for OneArg<'t> {
|
||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
||||
/// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
let Self { opcode, width, src } = self;
|
||||
let (src_reg, src_mode, src_ext) = source(src);
|
||||
a.push(
|
||||
0b000100 << 10 | one_arg(*opcode) << 7 | (*width as u16) << 6 | src_mode << 4 | src_reg,
|
||||
);
|
||||
if let Some(expr) = src_ext {
|
||||
a.defer_expr(expr)
|
||||
}
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for TwoArg<'t> {
|
||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
||||
/// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
let Self { opcode, width, src, dst } = self;
|
||||
let (src_reg, src_mode, src_ext) = source(src);
|
||||
let (dst_reg, dst_mode, dst_ext) = destination(dst);
|
||||
a.push(
|
||||
two_arg(*opcode) << 12
|
||||
| src_reg << 8
|
||||
| dst_mode << 7
|
||||
| (*width as u16) << 6
|
||||
| src_mode << 4
|
||||
| dst_reg,
|
||||
);
|
||||
|
||||
if let Some(expr) = src_ext {
|
||||
a.defer_expr(expr)
|
||||
}
|
||||
if let Some(expr) = dst_ext {
|
||||
a.defer_expr(expr)
|
||||
}
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Jump<'t> {
|
||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
||||
/// [ 0 0 1 [cond:3] +- [word_offset:10 ] ]
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
let Self { opcode, dst } = self;
|
||||
let word = 1 << 13
|
||||
| jump(*opcode) << 10
|
||||
| match *dst {
|
||||
JumpDst::Rel(value) if value & 1 == 1 => return Err(a.error(OddJump(value))),
|
||||
JumpDst::Rel(value) if !(-0x3fe..=0x400).contains(&value) => {
|
||||
return Err(a.error(LongJump(value)))
|
||||
}
|
||||
JumpDst::Rel(value) => (value - 1) as u16 >> 1 & 0x3ff,
|
||||
JumpDst::Label(label) => {
|
||||
a.defer_jump(label);
|
||||
0
|
||||
}
|
||||
} & 0x3ff;
|
||||
a.push(word);
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Reti {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
a.output.push(0b0001_0011_0000_0000);
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Br<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
eprintln!(
|
||||
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
|
||||
);
|
||||
self.clone().to_canonical().assemble_in(a)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one_arg(opcode: token::OneArg) -> u16 {
|
||||
opcode as u16
|
||||
}
|
||||
|
||||
pub fn two_arg(opcode: token::TwoArg) -> u16 {
|
||||
opcode as u16 + 4
|
||||
}
|
||||
|
||||
pub fn jump(opcode: token::Jump) -> u16 {
|
||||
use token::Jump;
|
||||
match opcode {
|
||||
Jump::Jne | Jump::Jnz => 0,
|
||||
Jump::Jeq | Jump::Jz => 1,
|
||||
Jump::Jnc | Jump::Jlo => 2,
|
||||
Jump::Jc | Jump::Jhs => 3,
|
||||
Jump::Jn => 4,
|
||||
Jump::Jge => 5,
|
||||
Jump::Jl => 6,
|
||||
Jump::Jmp => 7,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a tuple of (Reg, AddrMode, extword)
|
||||
pub fn source<'t>(src: &Src<'t>) -> (u16, u16, Option<Expr<'t>>) {
|
||||
use SrcSpecial::*;
|
||||
match src {
|
||||
Src::Special(Four) => (2, 2, None),
|
||||
Src::Special(Eight) => (2, 3, None),
|
||||
Src::Special(Zero) => (3, 0, None),
|
||||
Src::Special(One) => (3, 1, None),
|
||||
Src::Special(Two) => (3, 2, None),
|
||||
Src::Special(NegOne) => (3, 3, None),
|
||||
Src::Immediate(e) => (0, 3, Some(*e.clone())),
|
||||
Src::Absolute(e) => (2, 1, Some(*e.clone())),
|
||||
Src::Direct(r) => (*r as u16, 0, None),
|
||||
Src::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
|
||||
Src::Indirect(r) => (*r as u16, 2, None),
|
||||
Src::PostInc(r) => (*r as u16, 3, None),
|
||||
Src::BareExpr(e) => (0, 3, Some(*e.clone())),
|
||||
}
|
||||
}
|
||||
/// Returns a tuple of (Reg, AddrMode, Extword)
|
||||
pub fn destination<'t>(dst: &Dst<'t>) -> (u16, u16, Option<Expr<'t>>) {
|
||||
use DstSpecial::*;
|
||||
match dst {
|
||||
Dst::Special(Zero) => (3, 0, None),
|
||||
Dst::Special(One) => (3, 1, None),
|
||||
Dst::Absolute(e) => (2, 1, Some(*e.clone())),
|
||||
Dst::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
|
||||
Dst::Direct(r) => (*r as u16, 0, None),
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Assembler<'t> {
|
||||
/// Evaluates an [Expr] using labels and constants defined in the current program
|
||||
fn eval(&self, expr: &Expr) -> AResult<u16> {
|
||||
match expr {
|
||||
Expr::Binary(head, tails) => {
|
||||
let mut head = self.eval(head)?;
|
||||
for (op, tail) in tails {
|
||||
let tail = self.eval(tail)?;
|
||||
head = match op {
|
||||
BinOp::Mul => head.wrapping_mul(tail),
|
||||
BinOp::Div => head.wrapping_div(tail),
|
||||
BinOp::Rem => head.wrapping_rem(tail),
|
||||
BinOp::Add => head.wrapping_add(tail),
|
||||
BinOp::Sub => head.wrapping_sub(tail),
|
||||
BinOp::Lsh => head.wrapping_shl(tail as u32),
|
||||
BinOp::Rsh => head.wrapping_shr(tail as u32),
|
||||
BinOp::And => head & tail,
|
||||
BinOp::Xor => head ^ tail,
|
||||
BinOp::Or => head | tail,
|
||||
};
|
||||
}
|
||||
Ok(head)
|
||||
}
|
||||
Expr::Unary(ops, tail) => {
|
||||
let mut tail = self.eval(tail)?;
|
||||
for op in ops {
|
||||
tail = match op {
|
||||
UnOp::Not => !tail,
|
||||
UnOp::Neg => 0u16.wrapping_sub(tail),
|
||||
UnOp::Deref => *self
|
||||
.output
|
||||
.get(tail.wrapping_sub(self.org_base as u16) as usize >> 1)
|
||||
.ok_or_else(|| self.error(OobRead(tail)))?,
|
||||
}
|
||||
}
|
||||
Ok(tail)
|
||||
}
|
||||
Expr::Group(e) => self.eval(e),
|
||||
Expr::Number(n) => Ok(*n),
|
||||
Expr::Ident(name) => {
|
||||
self.valueof(name).ok_or_else(|| self.error(UndefinedLabel(name.to_string())))
|
||||
}
|
||||
Expr::AddrOf(name) => self
|
||||
.addrof(name)
|
||||
.map(|p| (p << 1).wrapping_add(self.org_base as u16))
|
||||
.ok_or_else(|| self.error(UndefinedLabel(name.to_string()))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod error {
|
||||
use std::fmt::Display;
|
||||
|
||||
use crate::util::Span;
|
||||
|
||||
pub type AResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum IdentType {
|
||||
Word,
|
||||
Jump,
|
||||
pub struct Error {
|
||||
pub span: Span<usize>,
|
||||
pub kind: ErrorKind,
|
||||
}
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
/// Takes in an AST's [Root], and outputs a sequence of bytes
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct Assembler {
|
||||
out: Vec<u16>,
|
||||
/// A map from Labels' [Identifier]s to their location in the binary
|
||||
labels: HashMap<Identifier, usize>,
|
||||
/// A list of all referenced [Identifier]s in the binary, and their locations
|
||||
identifiers: Vec<(usize, Identifier, IdentType)>,
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum ErrorKind {
|
||||
#[default]
|
||||
Todo,
|
||||
/// A label was used, but not defined
|
||||
UndefinedLabel(String),
|
||||
RedefinedLabel(String),
|
||||
OobRead(u16),
|
||||
OddJump(i16),
|
||||
LongJump(i16),
|
||||
/// A plethora of [Error]s
|
||||
Errors(Vec<Error>),
|
||||
}
|
||||
|
||||
impl Assembler {
|
||||
pub fn assemble(r: &Root) -> Result<Vec<u16>, AssemblyError> {
|
||||
let mut out = Self::default();
|
||||
out.visit_root(r)?;
|
||||
Ok(out.out)
|
||||
}
|
||||
pub fn load(&mut self, r: &Root) -> Result<(), AssemblyError> { self.visit_root(r) }
|
||||
pub fn out(self) -> Vec<u16> { self.out }
|
||||
|
||||
fn last_mut(&mut self) -> Result<&mut u16, AssemblyError> { self.out.last_mut().ok_or(AssemblyError::EmptyBuffer) }
|
||||
fn push_default(&mut self) -> usize {
|
||||
self.out.push(Default::default());
|
||||
self.out.len() - 1
|
||||
impl Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { kind, span } = self;
|
||||
write!(f, "[{span}]: ")?;
|
||||
write!(f, "Error: {kind}")
|
||||
}
|
||||
}
|
||||
|
||||
impl Assembler {
|
||||
/// Visits the [Root] node of a parse tree
|
||||
fn visit_root(&mut self, r: &Root) -> Result<(), AssemblyError> {
|
||||
// Visit the entire tree
|
||||
for (num, line) in r.lines() {
|
||||
self.visit_line(line).map_err(|e| e.ctx(r.file().unwrap_or(Path::new("stdin")), *num))?;
|
||||
impl Display for ErrorKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ErrorKind::Todo => write!(f, "Not yet implemented"),
|
||||
ErrorKind::UndefinedLabel(label) => write!(f, "Label '{label}' not defined"),
|
||||
ErrorKind::RedefinedLabel(label) => write!(f, "Label '{label}' already defined"),
|
||||
ErrorKind::OobRead(addr) => {
|
||||
write!(f, "Out of bounds read in constant expression: {addr}")
|
||||
}
|
||||
// Link identifiers
|
||||
for (idx, id, id_type) in self.identifiers.iter() {
|
||||
let Some(&num) = self.labels.get(id) else { return Err(AssemblyError::UnresolvedIdentifier(id.clone())) };
|
||||
let offset = (num as isize - *idx as isize) * 2;
|
||||
*self.out.get_mut(*idx).expect("idx should be a valid index into out") |= match id_type {
|
||||
IdentType::Word => offset as u16,
|
||||
IdentType::Jump => JumpTarget::squish(offset)?,
|
||||
};
|
||||
ErrorKind::OddJump(to) => write!(f, "Cannot jump to odd location: {to}"),
|
||||
ErrorKind::LongJump(to) => {
|
||||
write!(f, "Jump target ({to}) outside of range -0x400..=0x3fe")
|
||||
}
|
||||
Ok(())
|
||||
ErrorKind::Errors(errors) => {
|
||||
writeln!(f, "Could not complete assembly:")?;
|
||||
for error in errors {
|
||||
writeln!(f, "{error}")?;
|
||||
}
|
||||
|
||||
/// visit a [Line]
|
||||
fn visit_line(&mut self, line: &Line) -> Result<(), AssemblyError> {
|
||||
match line {
|
||||
Line::Insn(insn) => self.visit_instruction(insn),
|
||||
Line::Label(label) => self.visit_label(label),
|
||||
Line::Directive(d) => self.visit_directive(d),
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Visits a [Directive]
|
||||
fn visit_directive(&mut self, node: &Directive) -> Result<(), AssemblyError> {
|
||||
match node {
|
||||
Directive::Org(_) => todo!("{node}"),
|
||||
Directive::Define(..) => (),
|
||||
Directive::Include(r) => self.visit_root(r)?,
|
||||
Directive::Byte(word) | Directive::Word(word) => self.out.push((*word).into()),
|
||||
Directive::Bytes(words) | Directive::Words(words) => {
|
||||
for word in words {
|
||||
self.out.push((*word).into());
|
||||
}
|
||||
}
|
||||
Directive::String(s) => self.visit_string(s)?,
|
||||
Directive::Strings(strs) => {
|
||||
for s in strs {
|
||||
self.visit_string(s)?;
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a [Label]
|
||||
fn visit_label(&mut self, node: &Label) -> Result<(), AssemblyError> {
|
||||
// Register the label
|
||||
match self.labels.insert(node.0.to_owned(), self.out.len()) {
|
||||
Some(_) => Err(AssemblyError::RedefinedLabel(node.0.to_owned())),
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Visits an [Instruction]
|
||||
fn visit_instruction(&mut self, insn: &Instruction) -> Result<(), AssemblyError> {
|
||||
self.push_default();
|
||||
self.visit_opcode(insn.opcode())?;
|
||||
self.visit_encoding(insn.encoding())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits an [Opcode]
|
||||
fn visit_opcode(&mut self, node: &Opcode) -> Result<(), AssemblyError> {
|
||||
*self.last_mut()? |= *node as u16;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits an [Encoding]
|
||||
fn visit_encoding(&mut self, node: &Encoding) -> Result<(), AssemblyError> {
|
||||
*self.last_mut()? |= node.word();
|
||||
match node {
|
||||
Encoding::Single { dst, .. } => {
|
||||
self.visit_primary_operand(dst)?;
|
||||
}
|
||||
Encoding::Jump { target } => {
|
||||
self.visit_jump_target(target)?;
|
||||
}
|
||||
Encoding::Double { src, dst, .. } => {
|
||||
self.visit_primary_operand(src)?;
|
||||
self.visit_secondary_operand(dst)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a [JumpTarget]
|
||||
fn visit_jump_target(&mut self, node: &JumpTarget) -> Result<(), AssemblyError> {
|
||||
match node {
|
||||
JumpTarget::Number(num) => self.visit_number(num),
|
||||
JumpTarget::Identifier(id) => {
|
||||
self.visit_identifier(id, self.out.len() - 1, IdentType::Jump)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Visits a [SecondaryOperand]
|
||||
fn visit_secondary_operand(&mut self, node: &SecondaryOperand) -> Result<(), AssemblyError> {
|
||||
use SecondaryOperand as O;
|
||||
if let O::Indexed(_, num) | O::Absolute(num) = node {
|
||||
self.push_default();
|
||||
self.visit_number(num)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a [PrimaryOperand]
|
||||
fn visit_primary_operand(&mut self, node: &PrimaryOperand) -> Result<(), AssemblyError> {
|
||||
use PrimaryOperand as O;
|
||||
match node {
|
||||
O::Indexed(_, num) | O::Absolute(num) | O::Immediate(num) => {
|
||||
self.push_default();
|
||||
self.visit_number(num)?;
|
||||
}
|
||||
O::Relative(id) => {
|
||||
let addr = self.push_default();
|
||||
self.visit_identifier(id, addr, IdentType::Word)?;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a number and writes it into the last index
|
||||
fn visit_number(&mut self, node: &Number) -> Result<(), AssemblyError> {
|
||||
*self.last_mut()? |= u16::from(*node);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a number and appends it to the output buffer
|
||||
fn visit_string(&mut self, node: &str) -> Result<(), AssemblyError> {
|
||||
for (idx, byte) in node.bytes().chain([0u8].into_iter()).enumerate() {
|
||||
if idx % 2 == 0 {
|
||||
self.push_default();
|
||||
}
|
||||
*self.last_mut()? |= (byte as u16) << (8 * (idx % 2));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits an [Identifier], and registers it to the identifier list
|
||||
fn visit_identifier(&mut self, node: &Identifier, addr: usize, ty: IdentType) -> Result<(), AssemblyError> {
|
||||
self.identifiers.push((addr, node.clone(), ty));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -1,56 +0,0 @@
|
||||
// © 2023 John Breauxs
|
||||
use crate::parser::{error::ParseError, preamble::*};
|
||||
use std::{
|
||||
fmt::Display,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum AssemblyError {
|
||||
UnresolvedIdentifier(Identifier),
|
||||
RedefinedLabel(Identifier),
|
||||
JumpedTooFar(Identifier, isize),
|
||||
ParseError(ParseError),
|
||||
// TODO: This, better'
|
||||
Context(Box<AssemblyError>, PathBuf, usize),
|
||||
EmptyBuffer,
|
||||
}
|
||||
|
||||
impl AssemblyError {
|
||||
pub(super) fn ctx<P: AsRef<Path> + ?Sized>(self, file: &P, line: usize) -> Self {
|
||||
Self::Context(self.into(), file.as_ref().into(), line)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseError> for AssemblyError {
|
||||
fn from(value: ParseError) -> Self { Self::ParseError(value) }
|
||||
}
|
||||
|
||||
impl Display for AssemblyError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::UnresolvedIdentifier(id) => {
|
||||
write!(f, "Identifier {id} is undefined, but referenced anyway.")
|
||||
}
|
||||
Self::RedefinedLabel(id) => {
|
||||
write!(f, "Redefined label '{id}'.")
|
||||
}
|
||||
Self::JumpedTooFar(id, num) => {
|
||||
write!(f, "Label '{id}' is too far away. ({num} is outside range -0x400..=0x3fe)")
|
||||
}
|
||||
Self::ParseError(e) => Display::fmt(e, f),
|
||||
Self::Context(e, file, line) => write!(f, "{}:{line}:\n\t{e}", file.display()),
|
||||
Self::EmptyBuffer => Display::fmt("Tried to get last element of output buffer, but buffer was empty", f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for AssemblyError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::ParseError(e) => Some(e),
|
||||
Self::Context(e, ..) => Some(e),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
49
src/error.rs
49
src/error.rs
@ -1,49 +0,0 @@
|
||||
// © 2023 John Breauxs
|
||||
//! Common error type for [msp430-asm](crate) errors
|
||||
|
||||
use super::*;
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// Produced by [lexer]
|
||||
LexError(lexer::error::LexError),
|
||||
/// Produced by [parser]
|
||||
ParseError(parser::error::ParseError),
|
||||
/// Produced by [assembler]
|
||||
AssemblyError(assembler::error::AssemblyError),
|
||||
}
|
||||
|
||||
impl Error {}
|
||||
|
||||
impl From<lexer::error::LexError> for Error {
|
||||
fn from(value: lexer::error::LexError) -> Self { Self::LexError(value) }
|
||||
}
|
||||
|
||||
impl From<parser::error::ParseError> for Error {
|
||||
fn from(value: parser::error::ParseError) -> Self { Self::ParseError(value) }
|
||||
}
|
||||
|
||||
impl From<assembler::error::AssemblyError> for Error {
|
||||
fn from(value: assembler::error::AssemblyError) -> Self { Self::AssemblyError(value) }
|
||||
}
|
||||
|
||||
impl Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Error::LexError(e) => Display::fmt(e, f),
|
||||
Error::ParseError(e) => Display::fmt(e, f),
|
||||
Error::AssemblyError(e) => Display::fmt(e, f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Error::LexError(e) => Some(e),
|
||||
Error::ParseError(e) => Some(e),
|
||||
Error::AssemblyError(e) => Some(e),
|
||||
}
|
||||
}
|
||||
}
|
22
src/hash.rs
22
src/hash.rs
@ -1,22 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! Convenience functions and traits for dealing with hashable data
|
||||
pub type Hash = u64;
|
||||
|
||||
/// Calculates a hash using Rust hashmap's default hasher.
|
||||
pub fn hash<T: std::hash::Hash>(hashable: T) -> Hash {
|
||||
use std::hash::Hasher;
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
hashable.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
pub trait FromHash: From<Hash> {
|
||||
/// Hashes anything that implements [type@Hash] using the
|
||||
/// [DefaultHasher](std::collections::hash_map::DefaultHasher)
|
||||
fn hash<T: std::hash::Hash>(hashable: T) -> Hash { hash(hashable) }
|
||||
fn from_hash<T: std::hash::Hash>(hashable: T) -> Self
|
||||
where Self: Sized {
|
||||
Self::from(Self::hash(hashable))
|
||||
}
|
||||
}
|
||||
impl<T: From<Hash>> FromHash for T {}
|
305
src/lexer.rs
305
src/lexer.rs
@ -1,69 +1,268 @@
|
||||
// © 2023 John Breaux
|
||||
//! Iterates over [`&str`](str), producing [`Token`s](Token)
|
||||
// © 2023-2024 John Breaux
|
||||
//! The [Lexer] turns a [sequence of characters](str) into a stream of
|
||||
//! [lexically-tagged tokens](token)
|
||||
|
||||
pub mod context;
|
||||
pub mod error;
|
||||
pub mod ignore;
|
||||
pub mod preprocessed;
|
||||
pub mod token;
|
||||
pub mod token_stream;
|
||||
|
||||
use context::Context;
|
||||
use error::LexError;
|
||||
use token::{Token, Type};
|
||||
use token_stream::TokenStream;
|
||||
use self::token::{Special, TokenKind, *};
|
||||
use crate::util::Span;
|
||||
use std::{
|
||||
iter::Peekable,
|
||||
str::{CharIndices, FromStr},
|
||||
};
|
||||
use unicode_ident::*;
|
||||
|
||||
/// Iterates over &[str], producing [Token]s
|
||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Tokenizer<'t> {
|
||||
const DEFAULT_BASE: u32 = 10;
|
||||
|
||||
/// Turns a [sequence of characters](str) into a stream of [lexically identified tokens](token).
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// # use libmsp430::lexer::{Lexer, token::*};
|
||||
/// let text = "mov r14, r15";
|
||||
/// let mut lexer = Lexer::new(text);
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::TwoArg(TwoArg::Mov));
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R14));
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Comma);
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R15));
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Eof);
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Lexer<'t> {
|
||||
/// Keeps track of the byte offset into the string
|
||||
iter: Peekable<CharIndices<'t>>,
|
||||
text: &'t str,
|
||||
idx: usize,
|
||||
context: Context,
|
||||
start: usize,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl<'t> Tokenizer<'t> {
|
||||
/// Produces a new [Tokenizer] from a [str]ing slice
|
||||
pub fn new<T>(text: &'t T) -> Self
|
||||
where T: AsRef<str> + ?Sized {
|
||||
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
|
||||
impl<'t> Lexer<'t> {
|
||||
/// Creates a new [Lexer] over some [text](str)
|
||||
pub fn new(text: &'t str) -> Self {
|
||||
Self { iter: text.char_indices().peekable(), text, start: 0, index: 0 }
|
||||
}
|
||||
/// Gets the current byte-position
|
||||
pub fn location(&self) -> usize {
|
||||
self.start
|
||||
}
|
||||
/// Internal: Emits a token with the provided [TokenKind], providing its extents.
|
||||
fn emit(&mut self, kind: TokenKind) -> Option<Token<'t>> {
|
||||
let out =
|
||||
Some(Token::new(self.next_lexeme(), kind, Span { start: self.start, end: self.index }));
|
||||
self.start = self.index;
|
||||
out
|
||||
}
|
||||
fn next_lexeme(&self) -> &'t str {
|
||||
&self.text[self.start..self.index]
|
||||
}
|
||||
fn repeat(&mut self, f: impl Fn(char) -> bool) -> &mut Self {
|
||||
while let Some(&c) = self.peek() {
|
||||
if !f(c) {
|
||||
break;
|
||||
}
|
||||
self.next();
|
||||
}
|
||||
self
|
||||
}
|
||||
fn space(&mut self) -> Option<&mut Self> {
|
||||
while self.peek()?.is_whitespace() && *self.peek()? != '\n' {
|
||||
self.next();
|
||||
}
|
||||
self.start = self.index;
|
||||
Some(self)
|
||||
}
|
||||
/// Consumes a [char] without checking, for ergonomic chaining
|
||||
fn then(&mut self) -> &mut Self {
|
||||
self.next();
|
||||
self
|
||||
}
|
||||
fn peek(&mut self) -> Option<&char> {
|
||||
self.iter.peek().map(|(_, c)| c)
|
||||
}
|
||||
fn next(&mut self) -> Option<char> {
|
||||
let (index, c) = self.iter.next()?;
|
||||
self.index = index + c.len_utf8();
|
||||
Some(c)
|
||||
}
|
||||
|
||||
fn count(&mut self, token: &Token) {
|
||||
// update the context
|
||||
self.context.count(token);
|
||||
// advance the index
|
||||
self.idx += token.len();
|
||||
/// Scans for the next [Token] in the stream
|
||||
pub fn scan(&mut self) -> Option<Token<'t>> {
|
||||
if self.space().is_none() {
|
||||
return self.emit(TokenKind::Eof);
|
||||
}
|
||||
let Some(c) = self.peek() else {
|
||||
return self.emit(TokenKind::Eof);
|
||||
};
|
||||
match c {
|
||||
'\n' => self.then().emit(TokenKind::Newline),
|
||||
'!' => self.then().emit(TokenKind::Bang),
|
||||
'#' => self.then().emit(TokenKind::Hash),
|
||||
'$' => self.then().emit(TokenKind::Dollar),
|
||||
'%' => self.then().emit(TokenKind::Percent),
|
||||
'&' => self.then().emit(TokenKind::Amp),
|
||||
'\'' => self.then().char(),
|
||||
'"' => self.then().string(),
|
||||
'(' => self.then().emit(TokenKind::OpenParen),
|
||||
')' => self.then().emit(TokenKind::CloseParen),
|
||||
'*' => self.then().emit(TokenKind::Star),
|
||||
'+' => self.then().emit(TokenKind::Plus),
|
||||
',' => self.then().emit(TokenKind::Comma),
|
||||
'-' => self.then().emit(TokenKind::Minus),
|
||||
'.' => self.then().directive_or_bw(),
|
||||
'/' => self.then().comment_or_slash(),
|
||||
'0' => self.then().number_with_base(),
|
||||
':' => self.then().emit(TokenKind::Colon),
|
||||
';' => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
|
||||
'<' => self.then().less(),
|
||||
'>' => self.then().greater(),
|
||||
'@' => self.then().emit(TokenKind::At),
|
||||
'[' => self.then().emit(TokenKind::OpenBrace),
|
||||
']' => self.then().emit(TokenKind::CloseBrace),
|
||||
'^' => self.then().emit(TokenKind::Caret),
|
||||
'_' => self.then().identifier(),
|
||||
'{' => self.then().emit(TokenKind::OpenCurly),
|
||||
'|' => self.then().emit(TokenKind::Bar),
|
||||
'}' => self.then().emit(TokenKind::CloseCurly),
|
||||
c if c.is_numeric() => self.number::<DEFAULT_BASE>(),
|
||||
&c if is_xid_start(c) => self.then().identifier(),
|
||||
c => todo!("Unrecognized character: {c}"),
|
||||
}
|
||||
}
|
||||
fn number_with_base(&mut self) -> Option<Token<'t>> {
|
||||
match self.peek() {
|
||||
Some('x') => self.then().number::<16>(),
|
||||
Some('d') => self.then().number::<10>(),
|
||||
Some('o') => self.then().number::<8>(),
|
||||
Some('b') => self.then().number::<2>(),
|
||||
Some(c) if c.is_ascii_digit() => self.number::<DEFAULT_BASE>(),
|
||||
_ => self.emit(TokenKind::Number(0, 10)),
|
||||
}
|
||||
}
|
||||
fn number<const B: u32>(&mut self) -> Option<Token<'t>> {
|
||||
let mut num = self.digit::<B>()?;
|
||||
while let Some(digit) = self.digit::<B>() {
|
||||
num = num * B + digit;
|
||||
}
|
||||
if num > u16::MAX as u32 {
|
||||
None
|
||||
} else {
|
||||
self.emit(TokenKind::Number(num as u16, B as u8))
|
||||
}
|
||||
}
|
||||
fn digit<const B: u32>(&mut self) -> Option<u32> {
|
||||
let digit = self.peek()?.to_digit(B)?;
|
||||
self.then();
|
||||
Some(digit)
|
||||
}
|
||||
|
||||
fn comment_or_slash(&mut self) -> Option<Token<'t>> {
|
||||
match self.peek() {
|
||||
Some('/') => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
|
||||
_ => self.emit(TokenKind::Slash),
|
||||
}
|
||||
}
|
||||
fn less(&mut self) -> Option<Token<'t>> {
|
||||
match self.peek() {
|
||||
Some('<') => self.then().emit(TokenKind::Lsh),
|
||||
_ => todo!("less"),
|
||||
}
|
||||
}
|
||||
fn greater(&mut self) -> Option<Token<'t>> {
|
||||
match self.peek() {
|
||||
Some('>') => self.then().emit(TokenKind::Lsh),
|
||||
_ => todo!("greater"),
|
||||
}
|
||||
}
|
||||
fn identifier(&mut self) -> Option<Token<'t>> {
|
||||
while let Some(c) = self.then().peek() {
|
||||
if !is_xid_continue(*c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let lexeme = self.next_lexeme();
|
||||
if let Ok(op) = Reg::from_str(lexeme) {
|
||||
self.emit(TokenKind::Reg(op))
|
||||
} else if let Ok(op) = NoEm::from_str(lexeme) {
|
||||
self.emit(TokenKind::NoEm(op))
|
||||
} else if let Ok(op) = OneEm::from_str(lexeme) {
|
||||
self.emit(TokenKind::OneEm(op))
|
||||
} else if let Ok(op) = Special::from_str(lexeme) {
|
||||
self.emit(TokenKind::Special(op))
|
||||
} else if let Ok(op) = OneArg::from_str(lexeme) {
|
||||
self.emit(TokenKind::OneArg(op))
|
||||
} else if let Ok(op) = TwoArg::from_str(lexeme) {
|
||||
self.emit(TokenKind::TwoArg(op))
|
||||
} else if let Ok(op) = Jump::from_str(lexeme) {
|
||||
self.emit(TokenKind::Jump(op))
|
||||
} else {
|
||||
self.emit(TokenKind::Identifier)
|
||||
}
|
||||
}
|
||||
fn directive_or_bw(&mut self) -> Option<Token<'t>> {
|
||||
while let Some(c) = self.then().peek() {
|
||||
if !is_xid_continue(*c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
match self.next_lexeme() {
|
||||
".b" => self.emit(TokenKind::Byte),
|
||||
".w" => self.emit(TokenKind::Word),
|
||||
_ => self.emit(TokenKind::Directive),
|
||||
}
|
||||
}
|
||||
|
||||
impl<'text> Iterator for Tokenizer<'text> {
|
||||
type Item = Token<'text>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.idx >= self.text.len() {
|
||||
return None;
|
||||
/// Todo: Character unescaping in Lexer::string
|
||||
fn string(&mut self) -> Option<Token<'t>> {
|
||||
while '"' != self.next()? {}
|
||||
self.emit(TokenKind::String)
|
||||
}
|
||||
let token = Token::from(&self.text[self.idx..]);
|
||||
// Process [Type::Directive]s
|
||||
// Count the token
|
||||
self.count(&token);
|
||||
Some(token)
|
||||
fn char(&mut self) -> Option<Token<'t>> {
|
||||
let out = self.unescape()?;
|
||||
self.next().filter(|c| *c == '\'').and_then(|_| self.emit(TokenKind::Char(out)))
|
||||
}
|
||||
/// Unescape a single character
|
||||
fn unescape(&mut self) -> Option<char> {
|
||||
match self.next() {
|
||||
Some('\\') => (),
|
||||
other => return other,
|
||||
}
|
||||
Some(match self.next()? {
|
||||
'a' => '\x07',
|
||||
'b' => '\x08',
|
||||
'f' => '\x0c',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'x' => self.hex_escape()?,
|
||||
'u' => self.unicode_escape()?,
|
||||
'0' => '\0',
|
||||
chr => chr,
|
||||
})
|
||||
}
|
||||
/// unescape a single 2-digit hex escape
|
||||
fn hex_escape(&mut self) -> Option<char> {
|
||||
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
|
||||
char::from_u32(out) //.ok_or(Error::bad_unicode(out, self.line(), self.col()))
|
||||
}
|
||||
/// unescape a single \u{} unicode escape
|
||||
fn unicode_escape(&mut self) -> Option<char> {
|
||||
let mut out = 0;
|
||||
let Some('{') = self.peek() else {
|
||||
return None; //Err(Error::invalid_escape('u', self.line(), self.col()));
|
||||
};
|
||||
self.then();
|
||||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'}' => {
|
||||
self.then();
|
||||
return char::from_u32(out); //.ok_or(Error::bad_unicode(out, self.line(), self.col()));
|
||||
}
|
||||
_ => out = (out << 4) + self.digit::<16>()?,
|
||||
}
|
||||
}
|
||||
None //Err(Error::invalid_escape('u', self.line(), self.col()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'text> TokenStream<'text> for Tokenizer<'text> {
|
||||
fn context(&self) -> Context { self.context }
|
||||
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
|
||||
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
|
||||
// registers are valid identifiers.
|
||||
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
|
||||
self.count(&token);
|
||||
Ok(token)
|
||||
}
|
||||
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
|
||||
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
@ -1,38 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [Context] stores contextual information about the current tokenizer state
|
||||
//!
|
||||
//! This data is trivially copyable and can be provided in error messages using the
|
||||
//! [Error::Contextual] specialization)
|
||||
use super::*;
|
||||
/// Stores contextual information about the current tokenizer state, useful for printing errors
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Context {
|
||||
line: usize,
|
||||
position: usize,
|
||||
tokens: usize,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn new() -> Self { Default::default() }
|
||||
pub fn line(&self) -> usize { self.line }
|
||||
pub fn tokens(&self) -> usize { self.tokens }
|
||||
pub fn position(&self) -> usize { self.position }
|
||||
pub(super) fn count(&mut self, t: &Token) {
|
||||
match t.variant() {
|
||||
Type::EndOfFile => return,
|
||||
Type::Endl => {
|
||||
self.line += 1;
|
||||
self.position = 1;
|
||||
}
|
||||
_ => self.position += t.len(),
|
||||
}
|
||||
self.tokens += 1;
|
||||
}
|
||||
}
|
||||
impl Default for Context {
|
||||
fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } }
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Context {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) }
|
||||
}
|
@ -1,68 +0,0 @@
|
||||
// © 2023 John Breauxs
|
||||
use super::{
|
||||
context::Context,
|
||||
token::{OwnedToken, *},
|
||||
};
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LexError {
|
||||
/// Any other error, tagged with [Context]. Created by [`Error::context()`]
|
||||
Contextual(Context, Box<Self>),
|
||||
/// Produced by [Token] when the input is entirely unexpected.
|
||||
UnexpectedSymbol(String),
|
||||
/// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type]
|
||||
UnexpectedToken { expected: Type, got: OwnedToken },
|
||||
/// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the
|
||||
/// expected [Types](Type)
|
||||
AllExpectationsFailed { expected: Types, got: OwnedToken },
|
||||
}
|
||||
|
||||
impl LexError {
|
||||
pub fn context(self, c: Context) -> Self {
|
||||
match self {
|
||||
Self::Contextual(..) => self,
|
||||
_ => Self::Contextual(c, Box::new(self)),
|
||||
}
|
||||
}
|
||||
|
||||
// Extracts the root of the error tree
|
||||
pub fn bare(self) -> Self {
|
||||
match self {
|
||||
Self::Contextual(_, bare) => bare.bare(),
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expected<E: AsRef<[Type]>, T: Into<OwnedToken>>(expected: E, got: T) -> Self {
|
||||
match expected.as_ref().len() {
|
||||
1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() },
|
||||
_ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mask_expectation(mut self, expected: Type) -> Self {
|
||||
match self {
|
||||
LexError::UnexpectedToken { got, .. } => self = LexError::UnexpectedToken { expected, got },
|
||||
LexError::AllExpectationsFailed { got, .. } => self = LexError::UnexpectedToken { expected, got },
|
||||
LexError::Contextual(context, err) => {
|
||||
self = LexError::Contextual(context, Box::new(err.mask_expectation(expected)))
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for LexError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LexError::Contextual(ctx, error) => write!(f, "{ctx}: {error}"),
|
||||
LexError::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""),
|
||||
LexError::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."),
|
||||
LexError::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LexError {}
|
@ -1,55 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! Removes a single [kind](Type) of [`Token`] from a [`TokenStream`]
|
||||
use super::*;
|
||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Ignore<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
ignore: Type,
|
||||
inner: &'t mut T,
|
||||
}
|
||||
|
||||
impl<'t, T> Ignore<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
/// Creates a new [Ignore], which ignores the [ignore Type](Type)
|
||||
pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } }
|
||||
|
||||
/// Gets a mutable reference to the inner [Iterator]
|
||||
pub fn inner_mut(&mut self) -> &mut T { self.inner }
|
||||
}
|
||||
|
||||
impl<'t, T> Iterator for Ignore<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
type Item = Token<'t>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let next = self.inner.next()?;
|
||||
// Space tokens are greedy, so the next token shouldn't be a Space
|
||||
match next.variant() {
|
||||
Type::Space => self.next(),
|
||||
_ => Some(next),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T> TokenStream<'t> for Ignore<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
fn context(&self) -> Context { self.inner.context() }
|
||||
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
self.inner.allow(self.ignore);
|
||||
self.inner.expect(expected)
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Self::Item {
|
||||
self.inner.allow(self.ignore);
|
||||
self.inner.peek()
|
||||
}
|
||||
|
||||
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
self.inner.allow(self.ignore);
|
||||
self.inner.peek_expect(expected)
|
||||
}
|
||||
}
|
@ -1,174 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band
|
||||
//! ".define" rules
|
||||
use super::*;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
// TODO: Clean this spaghetti mess up
|
||||
|
||||
/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define"
|
||||
/// rules
|
||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct Preprocessed<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
sub_table: HashMap<Token<'t>, Vec<Token<'t>>>,
|
||||
sub_types: Vec<Type>,
|
||||
queue: VecDeque<Token<'t>>,
|
||||
inner: &'t mut T,
|
||||
}
|
||||
|
||||
impl<'t, T> Iterator for Preprocessed<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
type Item = Token<'t>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.queue.pop_front() {
|
||||
Some(token) => Some(token),
|
||||
None => {
|
||||
let next = self.inner.next()?;
|
||||
if let Some(subs) = self.sub_table.get(&next) {
|
||||
self.queue.extend(subs);
|
||||
return self.next();
|
||||
}
|
||||
Some(next)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> {
|
||||
/// Creates a new [Preprocessed] [TokenStream]
|
||||
pub fn new(inner: &'t mut T) -> Self {
|
||||
Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner }
|
||||
}
|
||||
|
||||
/// Gets a mutable reference to the inner [TokenStream]
|
||||
pub fn inner_mut(&mut self) -> &mut T { self.inner }
|
||||
|
||||
/// Preserve the next token in the queue
|
||||
fn enqueue(&mut self, token: Token<'t>) -> Token<'t> {
|
||||
self.queue.push_back(token);
|
||||
token
|
||||
}
|
||||
|
||||
/// Process .define directives in the preprocessor
|
||||
fn define(&mut self, token: Token<'t>) -> Result<(), LexError> {
|
||||
if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) {
|
||||
return Ok(());
|
||||
}
|
||||
// Tokenize the subdocument
|
||||
self.allow(Type::Directive);
|
||||
self.allow(Type::Space);
|
||||
|
||||
let Some(k) = self.inner.next() else { return Ok(()) };
|
||||
if !self.sub_types.contains(&k.variant()) {
|
||||
self.sub_types.push(k.variant());
|
||||
};
|
||||
|
||||
self.allow(Type::Space);
|
||||
|
||||
let mut replacement = vec![];
|
||||
loop {
|
||||
match self.inner.peek().variant() {
|
||||
Type::Endl | Type::EndOfFile => break,
|
||||
Type::Comment | Type::Space => {
|
||||
// ignore comments
|
||||
self.inner.next();
|
||||
}
|
||||
_ => {
|
||||
let next = self.inner.next().unwrap();
|
||||
replacement.push(self.enqueue(next));
|
||||
}
|
||||
}
|
||||
}
|
||||
self.sub_table.insert(k, replacement);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Does the preprocessing step
|
||||
fn preprocess(&mut self, token: Token<'t>) {
|
||||
if let Some(subs) = self.sub_table.get(&token) {
|
||||
self.queue.extend(subs);
|
||||
self.inner.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T> TokenStream<'t> for Preprocessed<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
fn context(&self) -> Context { self.inner.context() }
|
||||
|
||||
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
match self.queue.front() {
|
||||
Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()),
|
||||
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
|
||||
None => {
|
||||
// Only resolve defines when expecting, otherwise you'll run into issues.
|
||||
if let Ok(next) = self.inner.expect(expected) {
|
||||
self.define(next)?;
|
||||
return Ok(next);
|
||||
}
|
||||
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
|
||||
if let Some(subs) = self.sub_table.get(&next) {
|
||||
self.inner.allow_any_of(&self.sub_types);
|
||||
self.queue.extend(subs);
|
||||
}
|
||||
return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) };
|
||||
}
|
||||
Err(LexError::expected([expected], self.inner.peek()).context(self.context()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Self::Item {
|
||||
match self.queue.front() {
|
||||
Some(token) => *token,
|
||||
None => {
|
||||
// Only allow substitution when the next token is unexpected
|
||||
let old = self.inner.peek();
|
||||
self.preprocess(old);
|
||||
match self.queue.front() {
|
||||
Some(&new) => new,
|
||||
None => old,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
match self.queue.front() {
|
||||
Some(&token) if token.is_variant(expected) => Ok(token),
|
||||
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
|
||||
None => {
|
||||
if let Ok(next) = self.inner.peek_expect(expected) {
|
||||
return Ok(next);
|
||||
}
|
||||
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
|
||||
self.preprocess(next);
|
||||
return if self.queue.is_empty() {
|
||||
self.inner.peek_expect(expected)
|
||||
} else {
|
||||
self.peek_expect(expected)
|
||||
};
|
||||
}
|
||||
Err(LexError::expected([expected], self.inner.peek()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T> std::fmt::Debug for Preprocessed<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Preprocessed")
|
||||
.field("sub_table", &self.sub_table)
|
||||
.field("sub_types", &self.sub_types)
|
||||
.field("queue", &self.queue)
|
||||
.field("context", &self.context())
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
66
src/lexer/tests.rs
Normal file
66
src/lexer/tests.rs
Normal file
@ -0,0 +1,66 @@
|
||||
use super::*;
|
||||
macro_rules! lex {
|
||||
(type ($t:tt), $expected:expr) => {
|
||||
let token = Lexer::new(stringify!($t)).scan().expect(stringify!($t:tt should yield a valid token));
|
||||
assert_eq!(token.kind, $expected);
|
||||
};
|
||||
({ $($t:tt)* }) => {
|
||||
Lexer::new(stringify!($($t)*))
|
||||
};
|
||||
}
|
||||
#[test]
|
||||
fn ascii_char() {
|
||||
lex!(type ('A'), TokenKind::Char('A')); // 'A' should be a valid char
|
||||
lex!(type ('\x1b'), TokenKind::Char('\x1b')); // '\\x1b' should be a valid char
|
||||
}
|
||||
#[test]
|
||||
fn unicode_escape_char() {
|
||||
lex!(type ('\u{1f988}'), TokenKind::Char('🦈')); // '\\u{1f988}' should be a valid 🦈
|
||||
}
|
||||
#[test]
|
||||
fn number_with_base() {
|
||||
lex!(type (0), TokenKind::Number(0, 10)); // 0 should be a 16-bit base-10 number
|
||||
lex!(type (42069), TokenKind::Number(42069, 10)); // 42069 should be a 16-bit base-10 number
|
||||
lex!(type (0x420), TokenKind::Number(0x420, 16)); // 0x420 should be a 16-bit base-16 number
|
||||
lex!(type (0d100), TokenKind::Number(100, 10)); // 0d100 should be a 16-bit base-10 number
|
||||
lex!(type (0o100), TokenKind::Number(64, 8)); // 0o100 should be a 16-bit base-8 number
|
||||
lex!(type (0b100), TokenKind::Number(4, 2)); // 0b100 should be a 16-bit base-8 number
|
||||
}
|
||||
#[test]
|
||||
fn no_operand_emulated() {
|
||||
lex!(type (nop), TokenKind::NoEm(NoEm::Nop)); // nop should be a valid NoEm
|
||||
lex!(type (ret), TokenKind::NoEm(NoEm::Ret)); // ret should be a valid NoEm
|
||||
lex!(type (clrc), TokenKind::NoEm(NoEm::Clrc)); // clrc should be a valid NoEm
|
||||
lex!(type (clrz), TokenKind::NoEm(NoEm::Clrz)); // clrz should be a valid NoEm
|
||||
lex!(type (clrn), TokenKind::NoEm(NoEm::Clrn)); // clrn should be a valid NoEm
|
||||
lex!(type (setc), TokenKind::NoEm(NoEm::Setc)); // setc should be a valid NoEm
|
||||
lex!(type (setz), TokenKind::NoEm(NoEm::Setz)); // setz should be a valid NoEm
|
||||
lex!(type (setn), TokenKind::NoEm(NoEm::Setn)); // setn should be a valid NoEm
|
||||
lex!(type (dint), TokenKind::NoEm(NoEm::Dint)); // dint should be a valid NoEm
|
||||
lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm
|
||||
}
|
||||
#[test]
|
||||
fn registers() {
|
||||
lex!(type(pc), TokenKind::Reg(Reg::PC));
|
||||
lex!(type(sp), TokenKind::Reg(Reg::SP));
|
||||
lex!(type(sr), TokenKind::Reg(Reg::SR));
|
||||
lex!(type(cg), TokenKind::Reg(Reg::CG));
|
||||
lex!(type(r0), TokenKind::Reg(Reg::PC));
|
||||
lex!(type(r1), TokenKind::Reg(Reg::SP));
|
||||
lex!(type(r2), TokenKind::Reg(Reg::SR));
|
||||
lex!(type(r3), TokenKind::Reg(Reg::CG));
|
||||
lex!(type(r4), TokenKind::Reg(Reg::R4));
|
||||
lex!(type(r5), TokenKind::Reg(Reg::R5));
|
||||
lex!(type(r6), TokenKind::Reg(Reg::R6));
|
||||
lex!(type(r7), TokenKind::Reg(Reg::R7));
|
||||
lex!(type(r8), TokenKind::Reg(Reg::R8));
|
||||
lex!(type(r9), TokenKind::Reg(Reg::R9));
|
||||
lex!(type(r10), TokenKind::Reg(Reg::R10));
|
||||
lex!(type(r11), TokenKind::Reg(Reg::R11));
|
||||
lex!(type(r12), TokenKind::Reg(Reg::R12));
|
||||
lex!(type(r13), TokenKind::Reg(Reg::R13));
|
||||
lex!(type(r14), TokenKind::Reg(Reg::R14));
|
||||
lex!(type(r15), TokenKind::Reg(Reg::R15));
|
||||
}
|
||||
|
||||
// TODO: opcode tests, misc. special character tests, etc.
|
@ -1,335 +1,479 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [Token] is a [semantically tagged](Type) sequence of characters.
|
||||
// © 2023-2024 John Breaux
|
||||
//! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span]
|
||||
//!
|
||||
//! Token, and the tokenizer, intend to copy as little as possible.
|
||||
|
||||
use super::error::LexError;
|
||||
use regex::Regex;
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
sync::OnceLock,
|
||||
};
|
||||
|
||||
/// Implements regex matching functions on [`Token`] for each [`Type`],
|
||||
/// and implements [`From<&str>`] for [`Token`]
|
||||
macro_rules! regex_impl {
|
||||
(<$t:lifetime> $type:ty {$(
|
||||
$(#[$meta:meta])*
|
||||
pub fn $func:ident (text: &str) -> Option<Self> {
|
||||
regex!($out:path = $re:literal)
|
||||
//! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`)
|
||||
use crate::util::Span;
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Token<'t> {
|
||||
pub lexeme: &'t str,
|
||||
pub kind: TokenKind,
|
||||
pub pos: Span<usize>,
|
||||
}
|
||||
)*}) => {
|
||||
impl<$t> $type {
|
||||
/// Lexes a token only for the expected `variant`
|
||||
///
|
||||
/// Warning: This bypasses precedence rules. Only use for specific patterns.
|
||||
pub fn expect(text: &$t str, expected: Type) -> Result<Self, LexError> {
|
||||
match expected {$(
|
||||
$out => Self::$func(text),
|
||||
)*}.ok_or(LexError::UnexpectedToken {
|
||||
expected,
|
||||
got: Self::from(text).into(),
|
||||
impl<'t> Token<'t> {
|
||||
pub fn new(lexeme: &'t str, kind: TokenKind, pos: Span<usize>) -> Self {
|
||||
Self { lexeme, kind, pos }
|
||||
}
|
||||
pub fn kind(&self) -> TokenKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum TokenKind {
|
||||
Eof,
|
||||
Newline, // \n
|
||||
OpenParen, // (
|
||||
CloseParen, // )
|
||||
OpenCurly, // {
|
||||
CloseCurly, // }
|
||||
OpenBrace, // [
|
||||
CloseBrace, // ]
|
||||
Comma, // ,
|
||||
Colon, // :
|
||||
Bang, // !
|
||||
At, // @
|
||||
Amp, // &
|
||||
Bar, // |
|
||||
Caret, // ^
|
||||
Star, // *
|
||||
Hash, // #
|
||||
Dollar, // $
|
||||
Percent, // %
|
||||
Plus, // +
|
||||
Minus, // -
|
||||
Slash, // /
|
||||
Lsh, // <<
|
||||
Rsh, // >>
|
||||
|
||||
Comment, // (';' | '//') .* '\n' |
|
||||
Directive, // '.' XID_CONTINUE*
|
||||
Identifier, // XID_START XID_CONTINUE*
|
||||
Number(u16, u8), // varies depending on base
|
||||
Char(char), // '\'' ('\' Escape | .) '\''
|
||||
String, // '"' .* '"'
|
||||
Reg(Reg),
|
||||
NoEm(NoEm),
|
||||
OneEm(OneEm),
|
||||
Special(Special),
|
||||
OneArg(OneArg),
|
||||
TwoArg(TwoArg),
|
||||
Jump(Jump),
|
||||
|
||||
Byte, // .b
|
||||
Word, // .w
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Reg {
|
||||
PC,
|
||||
SP,
|
||||
SR,
|
||||
CG,
|
||||
R4,
|
||||
R5,
|
||||
R6,
|
||||
R7,
|
||||
R8,
|
||||
R9,
|
||||
R10,
|
||||
R11,
|
||||
R12,
|
||||
R13,
|
||||
R14,
|
||||
R15,
|
||||
}
|
||||
/// Fake instructions of the form `opcode`
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum NoEm {
|
||||
Nop,
|
||||
Ret,
|
||||
Clrc,
|
||||
Clrz,
|
||||
Clrn,
|
||||
Setc,
|
||||
Setz,
|
||||
Setn,
|
||||
Dint,
|
||||
Eint,
|
||||
}
|
||||
/// Fake instructions of the form `opcode dst`
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum OneEm {
|
||||
Pop,
|
||||
Rla,
|
||||
Rlc,
|
||||
Inv,
|
||||
Clr,
|
||||
Tst,
|
||||
Dec,
|
||||
Decd,
|
||||
Inc,
|
||||
Incd,
|
||||
Adc,
|
||||
Dadc,
|
||||
Sbc,
|
||||
}
|
||||
/// These opcodes have bespoke grammatical rules
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Special {
|
||||
/// Br = "br" Src
|
||||
Br,
|
||||
}
|
||||
/// Real instructions of the form `opcode src`
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum OneArg {
|
||||
Rrc,
|
||||
Swpb,
|
||||
Rra,
|
||||
Sxt,
|
||||
Push,
|
||||
Call,
|
||||
Reti,
|
||||
}
|
||||
/// Real instructions of the form `opcode src, dst`
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum TwoArg {
|
||||
Mov,
|
||||
Add,
|
||||
Addc,
|
||||
Subc,
|
||||
Sub,
|
||||
Cmp,
|
||||
Dadd,
|
||||
Bit,
|
||||
Bic,
|
||||
Bis,
|
||||
Xor,
|
||||
And,
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Jump {
|
||||
Jne,
|
||||
Jnz,
|
||||
Jeq,
|
||||
Jz,
|
||||
Jnc,
|
||||
Jlo,
|
||||
Jc,
|
||||
Jhs,
|
||||
Jn,
|
||||
Jge,
|
||||
Jl,
|
||||
Jmp,
|
||||
}
|
||||
mod convert {
|
||||
//! Implementations of [FromStr] for [token](super) types.
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
|
||||
impl FromStr for Reg {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"pc" => Reg::PC,
|
||||
"sp" => Reg::SP,
|
||||
"sr" => Reg::SR,
|
||||
"cg" => Reg::CG,
|
||||
"r0" => Reg::PC,
|
||||
"r1" => Reg::SP,
|
||||
"r2" => Reg::SR,
|
||||
"r3" => Reg::CG,
|
||||
"r4" => Reg::R4,
|
||||
"r5" => Reg::R5,
|
||||
"r6" => Reg::R6,
|
||||
"r7" => Reg::R7,
|
||||
"r8" => Reg::R8,
|
||||
"r9" => Reg::R9,
|
||||
"r10" => Reg::R10,
|
||||
"r11" => Reg::R11,
|
||||
"r12" => Reg::R12,
|
||||
"r13" => Reg::R13,
|
||||
"r14" => Reg::R14,
|
||||
"r15" => Reg::R15,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
$(
|
||||
$(#[$meta])*
|
||||
/// Tries to read [`
|
||||
#[doc = stringify!($out)]
|
||||
/// `] from `text`
|
||||
pub fn $func(text: &$t str) -> Option<Self> {
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
let lexeme = RE.get_or_init(|| Regex::new($re).unwrap())
|
||||
.find(text)?.into();
|
||||
Some(Self { variant: $out, lexeme })
|
||||
})*
|
||||
}
|
||||
impl<$t> From<&$t str> for $type {
|
||||
fn from (value: &$t str) -> Self {
|
||||
$(
|
||||
if let Some(token) = Self::$func(value) {
|
||||
token
|
||||
} else
|
||||
)*
|
||||
{todo!("Unexpected input: {value:#?} (Tokenization failure)")}
|
||||
impl FromStr for NoEm {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"nop" => NoEm::Nop,
|
||||
"ret" => NoEm::Ret,
|
||||
"clrc" => NoEm::Clrc,
|
||||
"clrz" => NoEm::Clrz,
|
||||
"clrn" => NoEm::Clrn,
|
||||
"setc" => NoEm::Setc,
|
||||
"setz" => NoEm::Setz,
|
||||
"setn" => NoEm::Setn,
|
||||
"dint" => NoEm::Dint,
|
||||
"eint" => NoEm::Eint,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
impl FromStr for OneEm {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"pop" => OneEm::Pop,
|
||||
"rla" => OneEm::Rla,
|
||||
"rlc" => OneEm::Rlc,
|
||||
"inv" => OneEm::Inv,
|
||||
"clr" => OneEm::Clr,
|
||||
"tst" => OneEm::Tst,
|
||||
"dec" => OneEm::Dec,
|
||||
"decd" => OneEm::Decd,
|
||||
"inc" => OneEm::Inc,
|
||||
"incd" => OneEm::Incd,
|
||||
"adc" => OneEm::Adc,
|
||||
"dadc" => OneEm::Dadc,
|
||||
"sbc" => OneEm::Sbc,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
|
||||
/// A [Token] is a [semantically tagged](Type) sequence of characters
|
||||
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Token<'text> {
|
||||
/// The type of this token
|
||||
variant: Type,
|
||||
/// The sub[str]ing corresponding to this token
|
||||
lexeme: &'text str,
|
||||
}
|
||||
|
||||
impl<'text> Token<'text> {
|
||||
/// Returns the [Type] of this [Token]
|
||||
pub fn variant(&self) -> Type { self.variant }
|
||||
|
||||
/// Returns the lexeme (originating string slice) of this token
|
||||
pub fn lexeme(&self) -> &'text str { self.lexeme }
|
||||
|
||||
/// Parses this [Token] into another type
|
||||
pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
|
||||
where F: std::str::FromStr {
|
||||
self.lexeme.parse()
|
||||
impl FromStr for Special {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"br" => Special::Br,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
/// Returns whether the Lexeme is the expected [Type]
|
||||
pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected }
|
||||
|
||||
/// Returns the length of [Self::lexeme] in bytes.
|
||||
pub fn len(&self) -> usize { self.lexeme.len() }
|
||||
|
||||
/// Returns `true` if [Self::lexeme] has a length of zero bytes.
|
||||
pub fn is_empty(&self) -> bool { self.lexeme.is_empty() }
|
||||
}
|
||||
|
||||
impl<'text> Debug for Token<'text> {
|
||||
impl FromStr for OneArg {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"rrc" => OneArg::Rrc,
|
||||
"swpb" => OneArg::Swpb,
|
||||
"rra" => OneArg::Rra,
|
||||
"sxt" => OneArg::Sxt,
|
||||
"push" => OneArg::Push,
|
||||
"call" => OneArg::Call,
|
||||
"reti" => OneArg::Reti,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FromStr for TwoArg {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"mov" => TwoArg::Mov,
|
||||
"add" => TwoArg::Add,
|
||||
"addc" => TwoArg::Addc,
|
||||
"subc" => TwoArg::Subc,
|
||||
"sub" => TwoArg::Sub,
|
||||
"cmp" => TwoArg::Cmp,
|
||||
"dadd" => TwoArg::Dadd,
|
||||
"bit" => TwoArg::Bit,
|
||||
"bic" => TwoArg::Bic,
|
||||
"bis" => TwoArg::Bis,
|
||||
"xor" => TwoArg::Xor,
|
||||
"and" => TwoArg::And,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FromStr for Jump {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"jne" => Jump::Jne,
|
||||
"jnz" => Jump::Jnz,
|
||||
"jeq" => Jump::Jeq,
|
||||
"jz" => Jump::Jz,
|
||||
"jnc" => Jump::Jnc,
|
||||
"jlo" => Jump::Jlo,
|
||||
"jc" => Jump::Jc,
|
||||
"jhs" => Jump::Jhs,
|
||||
"jn" => Jump::Jn,
|
||||
"jge" => Jump::Jge,
|
||||
"jl" => Jump::Jl,
|
||||
"jmp" => Jump::Jmp,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
mod display {
|
||||
//! Implementations of [Display] for [token](super) types.
|
||||
use super::*;
|
||||
use std::fmt::Display;
|
||||
impl<'t> Display for Token<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_list().entry(&self.variant).entry(&self.lexeme).finish()
|
||||
let Self { lexeme, kind, pos: _ } = self;
|
||||
match kind {
|
||||
TokenKind::Comment
|
||||
| TokenKind::Directive
|
||||
| TokenKind::Identifier
|
||||
| TokenKind::String => {
|
||||
write!(f, "{}", lexeme)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'text> Display for Token<'text> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self.variant {
|
||||
Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f),
|
||||
v => write!(f, "{v} \"{}\"", self.lexeme),
|
||||
ty => ty.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A [token Type](Type) is a semantic tag for a sequence of characters
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Type {
|
||||
/// contiguous whitespace, excluding newline
|
||||
Space,
|
||||
/// newline and contiguous whitespace
|
||||
Endl,
|
||||
/// A line-comment
|
||||
Comment,
|
||||
/// Jump label *definition*
|
||||
Label,
|
||||
/// Instructions
|
||||
Insn,
|
||||
/// Operand width is byte
|
||||
ByteWidth,
|
||||
/// Operand width is word
|
||||
WordWidth,
|
||||
/// Register mnemonic (i.e. `pc`, `r14`)
|
||||
Register,
|
||||
/// Marker for base-10
|
||||
RadixMarkerDec,
|
||||
/// Marker for base-16
|
||||
RadixMarkerHex,
|
||||
/// Marker for base-8
|
||||
RadixMarkerOct,
|
||||
/// Marker for base-2
|
||||
RadixMarkerBin,
|
||||
/// 1-4 hexadigit numbers only
|
||||
Number,
|
||||
/// Negative number marker
|
||||
Minus,
|
||||
/// post-increment mode marker
|
||||
Plus,
|
||||
/// Open-Indexed-Mode marker
|
||||
LParen,
|
||||
/// Close-Indexed-Mode marker
|
||||
RParen,
|
||||
/// Open Square Bracket
|
||||
LBracket,
|
||||
/// Closed Square Bracket
|
||||
RBracket,
|
||||
/// Indirect mode marker
|
||||
Indirect,
|
||||
/// absolute address marker
|
||||
Absolute,
|
||||
/// immediate value marker
|
||||
Immediate,
|
||||
/// Valid identifier. Identifiers must start with a Latin alphabetic character or underline
|
||||
Identifier,
|
||||
/// A string, encased in "quotes"
|
||||
String,
|
||||
/// Assembler directive
|
||||
Directive,
|
||||
/// Separator (comma)
|
||||
Separator,
|
||||
/// End of File marker
|
||||
#[default]
|
||||
EndOfFile,
|
||||
/// Invalid token
|
||||
Invalid,
|
||||
}
|
||||
|
||||
regex_impl! {<'text> Token<'text> {
|
||||
pub fn expect_space(text: &str) -> Option<Self> {
|
||||
regex!(Type::Space = r"^[\s--\n]+")
|
||||
}
|
||||
pub fn expect_endl(text: &str) -> Option<Self> {
|
||||
regex!(Type::Endl = r"^\n[\s--\n]*")
|
||||
}
|
||||
pub fn expect_comment(text: &str) -> Option<Self> {
|
||||
regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*")
|
||||
}
|
||||
pub fn expect_label(text: &str) -> Option<Self> {
|
||||
regex!(Type::Label = r"^:")
|
||||
}
|
||||
pub fn expect_insn(text: &str) -> Option<Self> {
|
||||
regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)")
|
||||
}
|
||||
pub fn expect_byte_width(text: &str) -> Option<Self> {
|
||||
regex!(Type::ByteWidth = r"(?i)^\.b")
|
||||
}
|
||||
pub fn expect_word_width(text: &str) -> Option<Self> {
|
||||
regex!(Type::WordWidth = r"(?i)^\.w")
|
||||
}
|
||||
pub fn expect_register(text: &str) -> Option<Self> {
|
||||
// old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
|
||||
regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)")
|
||||
}
|
||||
pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
|
||||
regex!(Type::RadixMarkerDec = r"(?i)^0d")
|
||||
}
|
||||
pub fn expect_radix_marker_hex(text: &str) -> Option<Self> {
|
||||
regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)")
|
||||
}
|
||||
pub fn expect_radix_marker_oct(text: &str) -> Option<Self> {
|
||||
regex!(Type::RadixMarkerOct = r"(?i)^0o")
|
||||
}
|
||||
pub fn expect_radix_marker_bin(text: &str) -> Option<Self> {
|
||||
regex!(Type::RadixMarkerBin = r"(?i)^0b")
|
||||
}
|
||||
pub fn expect_number(text: &str) -> Option<Self> {
|
||||
regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)")
|
||||
}
|
||||
pub fn expect_minus(text: &str) -> Option<Self> {
|
||||
regex!(Type::Minus = r"^-")
|
||||
}
|
||||
pub fn expect_plus(text: &str) -> Option<Self> {
|
||||
regex!(Type::Plus = r"^\+")
|
||||
}
|
||||
pub fn expect_l_paren(text: &str) -> Option<Self> {
|
||||
regex!(Type::LParen = r"^\(")
|
||||
}
|
||||
pub fn expect_r_paren(text: &str) -> Option<Self> {
|
||||
regex!(Type::RParen = r"^\)")
|
||||
}
|
||||
pub fn expect_l_bracket(text: &str) -> Option<Self> {
|
||||
regex!(Type::LBracket = r"^\[")
|
||||
}
|
||||
pub fn expect_r_bracket(text: &str) -> Option<Self> {
|
||||
regex!(Type::RBracket = r"^]")
|
||||
}
|
||||
pub fn expect_indrect(text: &str) -> Option<Self> {
|
||||
regex!(Type::Indirect = r"^@")
|
||||
}
|
||||
pub fn expect_absolute(text: &str) -> Option<Self> {
|
||||
regex!(Type::Absolute = r"^&")
|
||||
}
|
||||
pub fn expect_immediate(text: &str) -> Option<Self> {
|
||||
regex!(Type::Immediate = r"^#")
|
||||
}
|
||||
pub fn expect_string(text: &str) -> Option<Self> {
|
||||
regex!(Type::String = r#"^"[^"]*""#)
|
||||
}
|
||||
pub fn expect_directive(text: &str) -> Option<Self> {
|
||||
regex!(Type::Directive = r"^\.\w+")
|
||||
}
|
||||
pub fn expect_identifier(text: &str) -> Option<Self> {
|
||||
regex!(Type::Identifier = r"^[A-Za-z_]\w*")
|
||||
}
|
||||
pub fn expect_separator(text: &str) -> Option<Self> {
|
||||
regex!(Type::Separator = r"^,")
|
||||
}
|
||||
pub fn expect_end_of_file(text: &str) -> Option<Self> {
|
||||
regex!(Type::EndOfFile = r"^$")
|
||||
}
|
||||
pub fn expect_anything(text: &str) -> Option<Self> {
|
||||
regex!(Type::Invalid = r"^.*")
|
||||
}
|
||||
}}
|
||||
|
||||
impl Display for Type {
|
||||
impl Display for TokenKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Space => Display::fmt("space", f),
|
||||
Self::Endl => Display::fmt("newline", f),
|
||||
Self::Comment => Display::fmt("comment", f),
|
||||
Self::Label => Display::fmt("label definition", f),
|
||||
Self::Insn => Display::fmt("opcode", f),
|
||||
Self::ByteWidth => Display::fmt("byte-width", f),
|
||||
Self::WordWidth => Display::fmt("word-width", f),
|
||||
Self::Register => Display::fmt("register", f),
|
||||
Self::RadixMarkerDec => Display::fmt("decimal marker", f),
|
||||
Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f),
|
||||
Self::RadixMarkerOct => Display::fmt("octal marker", f),
|
||||
Self::RadixMarkerBin => Display::fmt("binary marker", f),
|
||||
Self::Number => Display::fmt("number", f),
|
||||
Self::Minus => Display::fmt("minus sign", f),
|
||||
Self::Plus => Display::fmt("plus sign", f),
|
||||
Self::LParen => Display::fmt("left parenthesis", f),
|
||||
Self::RParen => Display::fmt("right parenthesis", f),
|
||||
Self::LBracket => Display::fmt("left bracket", f),
|
||||
Self::RBracket => Display::fmt("right bracket", f),
|
||||
Self::Indirect => Display::fmt("indirect", f),
|
||||
Self::Absolute => Display::fmt("absolute", f),
|
||||
Self::Immediate => Display::fmt("immediate", f),
|
||||
Self::Identifier => Display::fmt("identifier", f),
|
||||
Self::String => Display::fmt("string", f),
|
||||
Self::Directive => Display::fmt("directive", f),
|
||||
Self::Separator => Display::fmt("comma", f),
|
||||
Self::EndOfFile => Display::fmt("EOF", f),
|
||||
Self::Invalid => Display::fmt("invalid token", f),
|
||||
TokenKind::Eof => write!(f, "[EOF]"),
|
||||
TokenKind::Newline => writeln!(f),
|
||||
TokenKind::OpenParen => write!(f, "("),
|
||||
TokenKind::CloseParen => write!(f, ")"),
|
||||
TokenKind::OpenCurly => write!(f, "{{"),
|
||||
TokenKind::CloseCurly => write!(f, "}}"),
|
||||
TokenKind::OpenBrace => write!(f, "["),
|
||||
TokenKind::CloseBrace => write!(f, "]"),
|
||||
TokenKind::Comma => write!(f, ","),
|
||||
TokenKind::Colon => write!(f, ":"),
|
||||
TokenKind::Bang => write!(f, "!"),
|
||||
TokenKind::At => write!(f, "@"),
|
||||
TokenKind::Amp => write!(f, "&"),
|
||||
TokenKind::Bar => write!(f, "|"),
|
||||
TokenKind::Caret => write!(f, "^"),
|
||||
TokenKind::Star => write!(f, "*"),
|
||||
TokenKind::Hash => write!(f, "#"),
|
||||
TokenKind::Dollar => write!(f, "$"),
|
||||
TokenKind::Percent => write!(f, "%"),
|
||||
TokenKind::Plus => write!(f, "+"),
|
||||
TokenKind::Minus => write!(f, "-"),
|
||||
TokenKind::Slash => write!(f, "/"),
|
||||
TokenKind::Lsh => write!(f, "<<"),
|
||||
TokenKind::Rsh => write!(f, ">>"),
|
||||
TokenKind::Comment => write!(f, "; "),
|
||||
TokenKind::Directive => write!(f, "."),
|
||||
TokenKind::Identifier => write!(f, "Identifier"),
|
||||
TokenKind::Number(val, 2) => write!(f, "0b{val:b}"),
|
||||
TokenKind::Number(val, 8) => write!(f, "0o{val:o}"),
|
||||
TokenKind::Number(val, 16) => write!(f, "0x{val:x}"),
|
||||
TokenKind::Number(val, _) => write!(f, "{val}"),
|
||||
TokenKind::Char(c) => write!(f, "'{c}'"),
|
||||
TokenKind::String => write!(f, "\"String\""),
|
||||
TokenKind::Reg(kw) => write!(f, "{kw}"),
|
||||
TokenKind::NoEm(kw) => write!(f, "{kw}"),
|
||||
TokenKind::OneEm(kw) => write!(f, "{kw}"),
|
||||
TokenKind::Special(kw) => write!(f, "{kw}"),
|
||||
TokenKind::OneArg(kw) => write!(f, "{kw}"),
|
||||
TokenKind::TwoArg(kw) => write!(f, "{kw}"),
|
||||
TokenKind::Jump(kw) => write!(f, "{kw}"),
|
||||
TokenKind::Byte => write!(f, ".b"),
|
||||
TokenKind::Word => write!(f, ".w"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A [Token] which can outlive its parent buffer
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct OwnedToken {
|
||||
/// The type of this token
|
||||
variant: Type,
|
||||
/// The sub[String] corresponding to this token
|
||||
lexeme: String,
|
||||
}
|
||||
|
||||
impl Display for OwnedToken {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) }
|
||||
}
|
||||
|
||||
impl<'t> From<&'t OwnedToken> for Token<'t> {
|
||||
fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } }
|
||||
}
|
||||
|
||||
impl From<Token<'_>> for OwnedToken {
|
||||
fn from(value: Token<'_>) -> Self {
|
||||
let Token { variant, lexeme } = value;
|
||||
OwnedToken { variant, lexeme: lexeme.to_owned() }
|
||||
}
|
||||
}
|
||||
|
||||
/// [Types] are an owned array of [types](Type), with a custom [Display] implementation
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Types(Vec<Type>);
|
||||
|
||||
impl<T: AsRef<[Type]>> From<T> for Types {
|
||||
// TODO: Possibly bad. Check out in rust playground.
|
||||
fn from(value: T) -> Self { Self(value.as_ref().to_owned()) }
|
||||
}
|
||||
|
||||
impl Display for Types {
|
||||
impl Display for Reg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for (idx, t) in self.0.iter().enumerate() {
|
||||
Display::fmt(t, f)?;
|
||||
match idx {
|
||||
i if i < self.0.len() - 2 => Display::fmt(", ", f)?,
|
||||
i if i < self.0.len() - 1 => Display::fmt(" or ", f)?,
|
||||
_ => (),
|
||||
match self {
|
||||
Reg::PC => "pc".fmt(f),
|
||||
Reg::SP => "sp".fmt(f),
|
||||
Reg::SR => "sr".fmt(f),
|
||||
Reg::CG => "cg".fmt(f),
|
||||
Reg::R4 => "r4".fmt(f),
|
||||
Reg::R5 => "r5".fmt(f),
|
||||
Reg::R6 => "r6".fmt(f),
|
||||
Reg::R7 => "r7".fmt(f),
|
||||
Reg::R8 => "r8".fmt(f),
|
||||
Reg::R9 => "r9".fmt(f),
|
||||
Reg::R10 => "r10".fmt(f),
|
||||
Reg::R11 => "r11".fmt(f),
|
||||
Reg::R12 => "r12".fmt(f),
|
||||
Reg::R13 => "r13".fmt(f),
|
||||
Reg::R14 => "r14".fmt(f),
|
||||
Reg::R15 => "r15".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for NoEm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
NoEm::Nop => "nop".fmt(f),
|
||||
NoEm::Ret => "ret".fmt(f),
|
||||
NoEm::Clrc => "clrc".fmt(f),
|
||||
NoEm::Clrz => "clrz".fmt(f),
|
||||
NoEm::Clrn => "clrn".fmt(f),
|
||||
NoEm::Setc => "setc".fmt(f),
|
||||
NoEm::Setz => "setz".fmt(f),
|
||||
NoEm::Setn => "setn".fmt(f),
|
||||
NoEm::Dint => "dint".fmt(f),
|
||||
NoEm::Eint => "eint".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for OneEm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
OneEm::Pop => "pop".fmt(f),
|
||||
OneEm::Rla => "rla".fmt(f),
|
||||
OneEm::Rlc => "rlc".fmt(f),
|
||||
OneEm::Inv => "inv".fmt(f),
|
||||
OneEm::Clr => "clr".fmt(f),
|
||||
OneEm::Tst => "tst".fmt(f),
|
||||
OneEm::Dec => "dec".fmt(f),
|
||||
OneEm::Decd => "decd".fmt(f),
|
||||
OneEm::Inc => "inc".fmt(f),
|
||||
OneEm::Incd => "incd".fmt(f),
|
||||
OneEm::Adc => "adc".fmt(f),
|
||||
OneEm::Dadc => "dadc".fmt(f),
|
||||
OneEm::Sbc => "sbc".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Special {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Special::Br => "br".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for OneArg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
OneArg::Rrc => "rrc".fmt(f),
|
||||
OneArg::Swpb => "swpb".fmt(f),
|
||||
OneArg::Rra => "rra".fmt(f),
|
||||
OneArg::Sxt => "sxt".fmt(f),
|
||||
OneArg::Push => "push".fmt(f),
|
||||
OneArg::Call => "call".fmt(f),
|
||||
OneArg::Reti => "reti".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for TwoArg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TwoArg::Mov => "mov".fmt(f),
|
||||
TwoArg::Add => "add".fmt(f),
|
||||
TwoArg::Addc => "addc".fmt(f),
|
||||
TwoArg::Subc => "subc".fmt(f),
|
||||
TwoArg::Sub => "sub".fmt(f),
|
||||
TwoArg::Cmp => "cmp".fmt(f),
|
||||
TwoArg::Dadd => "dadd".fmt(f),
|
||||
TwoArg::Bit => "bit".fmt(f),
|
||||
TwoArg::Bic => "bic".fmt(f),
|
||||
TwoArg::Bis => "bis".fmt(f),
|
||||
TwoArg::Xor => "xor".fmt(f),
|
||||
TwoArg::And => "and".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Jump {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Jump::Jne => "jne".fmt(f),
|
||||
Jump::Jnz => "jnz".fmt(f),
|
||||
Jump::Jeq => "jeq".fmt(f),
|
||||
Jump::Jz => "jz".fmt(f),
|
||||
Jump::Jnc => "jnc".fmt(f),
|
||||
Jump::Jlo => "jlo".fmt(f),
|
||||
Jump::Jc => "jc".fmt(f),
|
||||
Jump::Jhs => "jhs".fmt(f),
|
||||
Jump::Jn => "jn".fmt(f),
|
||||
Jump::Jge => "jge".fmt(f),
|
||||
Jump::Jl => "jl".fmt(f),
|
||||
Jump::Jmp => "jmp".fmt(f),
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -1,85 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
|
||||
use super::*;
|
||||
|
||||
use super::ignore::Ignore;
|
||||
use super::preprocessed::Preprocessed;
|
||||
|
||||
/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
|
||||
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> + std::fmt::Debug {
|
||||
/// Gets this stream's [Context]
|
||||
fn context(&self) -> Context;
|
||||
|
||||
/// Creates an iterator that skips [Type::Space] in the input
|
||||
#[inline]
|
||||
fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self>
|
||||
where Self: Sized {
|
||||
Ignore::new(variant, self)
|
||||
}
|
||||
|
||||
/// Creates a [TokenStream] that performs live substitution of the input
|
||||
#[inline]
|
||||
fn preprocessed(&'text mut self) -> Preprocessed<'text, Self>
|
||||
where Self: Sized {
|
||||
Preprocessed::new(self)
|
||||
}
|
||||
|
||||
/// Returns the next [Token] without advancing
|
||||
fn peek(&mut self) -> Self::Item;
|
||||
|
||||
/// Returns the next [Token] if it is of the expected [Type], without advancing
|
||||
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
|
||||
|
||||
/// Consumes and returns a [Token] if it is the expected [Type]
|
||||
///
|
||||
/// Otherwise, does not consume a [Token]
|
||||
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
|
||||
|
||||
/// Ignores a [Token] of the expected [Type], propegating errors.
|
||||
#[inline]
|
||||
fn require(&mut self, expected: Type) -> Result<(), LexError> { self.expect(expected).map(|_| ()) }
|
||||
|
||||
/// Ignores a [Token] of the expected [Type], discarding errors.
|
||||
#[inline]
|
||||
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
|
||||
|
||||
/// Runs a function on each
|
||||
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, LexError>, expected: T) -> Result<U, LexError>
|
||||
where T: AsRef<[Type]> {
|
||||
for &expected in expected.as_ref() {
|
||||
match f(self, expected).map_err(|e| e.bare()) {
|
||||
Ok(t) => return Ok(t),
|
||||
Err(LexError::UnexpectedToken { .. }) => continue,
|
||||
Err(e) => return Err(e.context(self.context())),
|
||||
}
|
||||
}
|
||||
Err(LexError::expected(expected, self.peek()).context(self.context()))
|
||||
}
|
||||
|
||||
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
|
||||
#[inline]
|
||||
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
|
||||
where T: AsRef<[Type]> {
|
||||
self.any_of(Self::peek_expect, expected)
|
||||
}
|
||||
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
|
||||
///
|
||||
/// Otherwise, does not consume a [Token]
|
||||
#[inline]
|
||||
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
|
||||
where T: AsRef<[Type]> {
|
||||
self.any_of(Self::expect, expected)
|
||||
}
|
||||
/// Ignores a [Token] of any expected [Type], discarding errors.
|
||||
#[inline]
|
||||
fn allow_any_of<T>(&mut self, expected: T)
|
||||
where T: AsRef<[Type]> {
|
||||
let _ = self.expect_any_of(expected);
|
||||
}
|
||||
/// Ignores a [Token] of any expected [Type], propegating errors.
|
||||
#[inline]
|
||||
fn require_any_of<T>(&mut self, expected: T) -> Result<(), LexError>
|
||||
where T: AsRef<[Type]> {
|
||||
self.any_of(Self::require, expected)
|
||||
}
|
||||
}
|
62
src/lib.rs
62
src/lib.rs
@ -54,23 +54,55 @@
|
||||
//! └─ EndOfFile
|
||||
//! ```
|
||||
|
||||
pub mod preamble {
|
||||
//! Common imports for msp430-asm
|
||||
use super::*;
|
||||
pub use assembler::Assembler;
|
||||
pub use error::Error;
|
||||
pub use lexer::{
|
||||
context::Context,
|
||||
token::{Token, Type},
|
||||
token_stream::TokenStream,
|
||||
Tokenizer,
|
||||
pub mod util {
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
ops::{Index, Range},
|
||||
};
|
||||
pub use parser::Parser;
|
||||
/// A <code> [Clone] + [Copy] + [!Iterator](Iterator) <\code> version of a [Range]
|
||||
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Span<Idx> {
|
||||
pub start: Idx,
|
||||
pub end: Idx,
|
||||
}
|
||||
impl<Idx> From<Span<Idx>> for Range<Idx> {
|
||||
fn from(value: Span<Idx>) -> Self {
|
||||
value.start..value.end
|
||||
}
|
||||
}
|
||||
impl<Idx> From<Range<Idx>> for Span<Idx> {
|
||||
fn from(value: Range<Idx>) -> Self {
|
||||
Self { start: value.start, end: value.end }
|
||||
}
|
||||
}
|
||||
impl<T> Index<Span<usize>> for [T] {
|
||||
type Output = [T];
|
||||
fn index(&self, index: Span<usize>) -> &Self::Output {
|
||||
self.index(Range::from(index))
|
||||
}
|
||||
}
|
||||
impl Index<Span<usize>> for str {
|
||||
type Output = str;
|
||||
fn index(&self, index: Span<usize>) -> &Self::Output {
|
||||
self.index(Range::from(index))
|
||||
}
|
||||
}
|
||||
impl<Idx: Debug> Debug for Span<Idx> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}..{:?}", self.start, self.end)
|
||||
}
|
||||
}
|
||||
impl<Idx: Display> Display for Span<Idx> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}..{}", self.start, self.end)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use preamble::*;
|
||||
pub mod error;
|
||||
pub mod lexer;
|
||||
|
||||
pub mod preprocessor;
|
||||
|
||||
pub mod parser;
|
||||
|
||||
pub mod assembler;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
|
636
src/parser.rs
636
src/parser.rs
@ -1,81 +1,591 @@
|
||||
// © 2023 John Breaux
|
||||
//! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root)
|
||||
// © 2023-2024 John Breaux
|
||||
//! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast)
|
||||
pub mod ast;
|
||||
|
||||
use crate::{TokenStream, Type};
|
||||
use error::ParseError;
|
||||
use preamble::*;
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
path::Path,
|
||||
use self::error::{
|
||||
Error,
|
||||
ErrorKind::{self, *},
|
||||
PResult, Parsing,
|
||||
};
|
||||
|
||||
pub mod preamble {
|
||||
//! All the different AST node types
|
||||
use super::*;
|
||||
// Traits
|
||||
pub use parsable::Parsable;
|
||||
// Nodes
|
||||
pub use comment::Comment;
|
||||
pub use directive::Directive;
|
||||
pub use identifier::Identifier;
|
||||
pub use instruction::{
|
||||
encoding::{
|
||||
encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand,
|
||||
register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding,
|
||||
use crate::{
|
||||
lexer::{
|
||||
token::{Reg, Special, Token, TokenKind as Kind},
|
||||
Lexer,
|
||||
},
|
||||
opcode::Opcode,
|
||||
Instruction,
|
||||
preprocessor::Preprocessor,
|
||||
util::Span,
|
||||
};
|
||||
pub use label::Label;
|
||||
pub use line::Line;
|
||||
pub use root::Root;
|
||||
// Error
|
||||
pub use error::ParseError;
|
||||
use ast::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Parser<'t> {
|
||||
lexer: Preprocessor<'t>,
|
||||
next: Option<Token<'t>>,
|
||||
loc: Span<usize>,
|
||||
}
|
||||
|
||||
pub mod parsable;
|
||||
|
||||
pub mod comment;
|
||||
pub mod directive;
|
||||
pub mod error;
|
||||
pub mod identifier;
|
||||
pub mod instruction;
|
||||
pub mod label;
|
||||
pub mod line;
|
||||
pub mod root;
|
||||
|
||||
pub struct Parser {
|
||||
radix: u32,
|
||||
impl<'t> Parser<'t> {
|
||||
/// Creates a new [Parser]
|
||||
pub fn new(text: &'t str) -> Self {
|
||||
let lexer = Preprocessor::new(text);
|
||||
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
|
||||
}
|
||||
/// Createes a new [Parser] from an existing [Lexer]
|
||||
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
|
||||
let lexer = Preprocessor::with_lexer(lexer);
|
||||
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result<Root, ParseError> {
|
||||
Root::parse(&self, &mut stream.ignore(Type::Space))
|
||||
pub fn parse<T: Parsable<'t>>(&mut self) -> PResult<T> {
|
||||
Parsable::parse(self)
|
||||
}
|
||||
pub fn parse<T>(self, input: &T) -> Result<Root, ParseError>
|
||||
where T: AsRef<str> + ?Sized {
|
||||
Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
|
||||
}
|
||||
pub fn parse_file<P>(self, path: &P) -> Result<Root, ParseError>
|
||||
where P: AsRef<Path> + ?Sized {
|
||||
self.parse(&std::fs::read_to_string(path.as_ref())?).map(|r| r.set_file(path.as_ref().into()))
|
||||
}
|
||||
pub fn parse_one<T>(self, input: &T) -> Result<Line, ParseError>
|
||||
where T: AsRef<str> + ?Sized {
|
||||
Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
|
||||
pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error {
|
||||
Error { parsing, kind, loc: self.loc }
|
||||
}
|
||||
|
||||
/// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number]
|
||||
/// conversion
|
||||
pub fn radix(mut self, radix: u32) { self.radix = radix; }
|
||||
/// Peek a token out of the lexer
|
||||
pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> {
|
||||
if self.next.is_none() {
|
||||
self.next = self.lexer.scan();
|
||||
}
|
||||
self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p))
|
||||
}
|
||||
pub fn next(&mut self, p: Parsing) -> PResult<Token<'t>> {
|
||||
Ok(match self.take() {
|
||||
Some(token) => token,
|
||||
None => {
|
||||
self.peek(p)?;
|
||||
self.take().expect("should have been populated by peek")
|
||||
}
|
||||
})
|
||||
}
|
||||
/// Consumes the next token
|
||||
pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> {
|
||||
match self.peek(p)?.kind {
|
||||
kind if kind == expect => {
|
||||
self.take();
|
||||
Ok(self)
|
||||
}
|
||||
kind => Err(self.error(Unexpected(kind), p)),
|
||||
}
|
||||
}
|
||||
/// Consumes the next token without checking it
|
||||
pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> {
|
||||
self.next(p)?;
|
||||
Ok(self)
|
||||
}
|
||||
/// Take the last peeked token
|
||||
pub fn take(&mut self) -> Option<Token<'t>> {
|
||||
self.next.take()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self { Self { radix: 16 } }
|
||||
// Expressions
|
||||
impl<'t> Parser<'t> {
|
||||
/// Parses an expression
|
||||
pub fn expr(&mut self) -> PResult<Expr<'t>> {
|
||||
self.term()
|
||||
}
|
||||
/// Parses a term-expression (binary `*`mul, `/`div, `%`rem)
|
||||
pub fn term(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let a = self.factor()?;
|
||||
let mut other = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)),
|
||||
Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)),
|
||||
Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)),
|
||||
_ if other.is_empty() => break Ok(a),
|
||||
_ => break Ok(Expr::Binary(a.into(), other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Parses a factor expression (binary `+`add, `-`sub)
|
||||
pub fn factor(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let a = self.shift()?;
|
||||
let mut other = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)),
|
||||
Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)),
|
||||
_ if other.is_empty() => break Ok(a),
|
||||
_ => break Ok(Expr::Binary(a.into(), other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right)
|
||||
pub fn shift(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let a = self.bin()?;
|
||||
let mut other = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)),
|
||||
Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)),
|
||||
_ if other.is_empty() => break Ok(a),
|
||||
_ => break Ok(Expr::Binary(a.into(), other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn bin(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let a = self.unary()?;
|
||||
let mut other = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)),
|
||||
Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)),
|
||||
Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)),
|
||||
_ if other.is_empty() => break Ok(a),
|
||||
_ => break Ok(Expr::Binary(a.into(), other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Parses a unary expression (`!`invert, `-`negate)
|
||||
pub fn unary(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let mut ops = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Star => ops.push(UnOp::Deref),
|
||||
Kind::Minus => ops.push(UnOp::Neg),
|
||||
Kind::Bang => ops.push(UnOp::Not),
|
||||
_ if ops.is_empty() => break Ok(self.primary()?),
|
||||
_ => break Ok(Expr::Unary(ops, self.primary()?.into())),
|
||||
}
|
||||
self.take();
|
||||
}
|
||||
}
|
||||
/// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier
|
||||
pub fn primary(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let Token { lexeme, kind, .. } = *self.peek(p)?;
|
||||
Ok(match kind {
|
||||
Kind::OpenParen => {
|
||||
let out = Expr::Group(self.then(p)?.parse()?);
|
||||
self.assert(Kind::CloseParen, p)?;
|
||||
out
|
||||
}
|
||||
Kind::Number(n, _) => {
|
||||
self.take();
|
||||
Expr::Number(n)
|
||||
}
|
||||
Kind::Identifier => {
|
||||
self.take();
|
||||
Expr::Ident(lexeme)
|
||||
}
|
||||
Kind::Amp => self.then(p)?.addrof()?,
|
||||
ty => Err(self.error(NonNumeric(ty), p))?,
|
||||
})
|
||||
}
|
||||
pub fn addrof(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let token = self.peek(p)?;
|
||||
let out = match token.kind {
|
||||
Kind::Identifier => Expr::AddrOf(token.lexeme),
|
||||
Kind::Number(n, _) => Expr::Number(n),
|
||||
ty => Err(self.error(Unexpected(ty), p))?,
|
||||
};
|
||||
self.take();
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Parser {
|
||||
pub trait Parsable<'t>: Sized {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self>;
|
||||
}
|
||||
|
||||
impl<'t> Parsable<'t> for Statements<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let mut stmts = vec![];
|
||||
while p.peek(Parsing::File)?.kind != Kind::Eof {
|
||||
stmts.push(p.parse()?)
|
||||
}
|
||||
Ok(Self { stmts })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parsable<'t> for Statement<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let token = *p.peek(Parsing::Stmt)?;
|
||||
Ok(match token.kind {
|
||||
Kind::Comment => {
|
||||
p.take();
|
||||
Statement::Comment(token.lexeme)
|
||||
}
|
||||
Kind::Directive => Statement::Directive(p.parse()?),
|
||||
Kind::Identifier => Statement::Label(p.label()?),
|
||||
_ => Statement::Insn(p.parse()?),
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Directive<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Directive;
|
||||
let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?;
|
||||
let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) };
|
||||
p.take();
|
||||
Ok(match lexeme {
|
||||
".define" => Directive::Define(p.parse()?),
|
||||
".org" => Directive::Org(p.expr()?.into()),
|
||||
".word" => Directive::Word(p.parse()?),
|
||||
".words" => Directive::Words(p.parse()?),
|
||||
".string" => Directive::String(p.string()?),
|
||||
_ => Err(p.error(Unexpected(Kind::Directive), parsing))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Vec<Token<'t>> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Directive;
|
||||
let mut tokens = vec![];
|
||||
loop {
|
||||
if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind {
|
||||
break;
|
||||
}
|
||||
tokens.push(p.next(parsing)?)
|
||||
}
|
||||
p.take();
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Instruction<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let start = p.peek(Parsing::Instruction)?.pos.start;
|
||||
Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } })
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for InstructionKind<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
use crate::lexer::token::OneArg;
|
||||
// an instruction starts with an opcode
|
||||
Ok(match p.peek(Parsing::Instruction)?.kind() {
|
||||
Kind::NoEm(_) => Self::NoEm(p.parse()?),
|
||||
Kind::OneEm(_) => Self::OneEm(p.parse()?),
|
||||
Kind::Special(Special::Br) => Self::Br(p.parse()?),
|
||||
Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?),
|
||||
Kind::OneArg(_) => Self::OneArg(p.parse()?),
|
||||
Kind::TwoArg(_) => Self::TwoArg(p.parse()?),
|
||||
Kind::Jump(_) => Self::Jump(p.parse()?),
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for NoEm {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
match p.next(Parsing::NoEm)?.kind {
|
||||
Kind::NoEm(opcode) => Ok(Self { opcode }),
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::NoEm)),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for OneEm<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
Ok(Self {
|
||||
opcode: match p.next(Parsing::OneEm)?.kind {
|
||||
Kind::OneEm(opcode) => opcode,
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?,
|
||||
},
|
||||
width: p.parse()?,
|
||||
dst: p.parse()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for OneArg<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
Ok(Self {
|
||||
opcode: match p.next(Parsing::OneArg)?.kind {
|
||||
Kind::OneArg(opcode) => opcode,
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?,
|
||||
},
|
||||
width: p.parse()?,
|
||||
src: p.parse()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for TwoArg<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::TwoArg;
|
||||
Ok(Self {
|
||||
opcode: match p.next(parsing)?.kind {
|
||||
Kind::TwoArg(opcode) => opcode,
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
},
|
||||
width: p.parse()?,
|
||||
src: p.parse()?,
|
||||
dst: p.assert(Kind::Comma, parsing)?.parse()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Jump<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Jump;
|
||||
Ok(Self {
|
||||
opcode: match p.next(parsing)?.kind {
|
||||
Kind::Jump(opcode) => opcode,
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
},
|
||||
dst: p.parse()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Reti {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
use crate::lexer::token::OneArg;
|
||||
p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?;
|
||||
Ok(Reti)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Br<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
p.assert(Kind::Special(Special::Br), Parsing::Br)?;
|
||||
Ok(Self { src: p.parse()? })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parsable<'t> for Src<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Src;
|
||||
Ok(match p.peek(parsing)?.kind {
|
||||
Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special
|
||||
Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?), // &addr
|
||||
Kind::At => {
|
||||
let reg = match p.then(parsing)?.next(parsing)?.kind {
|
||||
Kind::Reg(r) => r,
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
};
|
||||
if let Kind::Plus = p.peek(parsing)?.kind {
|
||||
p.take();
|
||||
Src::PostInc(reg)
|
||||
} else {
|
||||
Src::Indirect(reg)
|
||||
}
|
||||
} // @reg+, @reg
|
||||
Kind::Reg(_) => Src::Direct(p.parse()?),
|
||||
_ => {
|
||||
let expr = p.parse()?;
|
||||
match p.peek(parsing)?.kind {
|
||||
Kind::OpenParen => Src::Indexed(expr, {
|
||||
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
|
||||
p.assert(Kind::CloseParen, parsing)?;
|
||||
reg
|
||||
}),
|
||||
_ => Src::BareExpr(expr),
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Dst<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Dst;
|
||||
Ok(match p.peek(parsing)?.kind {
|
||||
Kind::Hash => match p.then(parsing)?.next(parsing)?.kind {
|
||||
Kind::Number(0, _) => Dst::Special(DstSpecial::Zero),
|
||||
Kind::Number(1, _) => Dst::Special(DstSpecial::One),
|
||||
Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?,
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
},
|
||||
Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?),
|
||||
Kind::Reg(_) => Dst::Direct(p.parse()?),
|
||||
_ => Dst::Indexed(p.expr()?.into(), {
|
||||
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
|
||||
p.assert(Kind::CloseParen, parsing)?;
|
||||
reg
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for JumpDst<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Jump;
|
||||
let mut neg = false;
|
||||
let out = loop {
|
||||
let token = p.peek(parsing)?;
|
||||
match token.kind {
|
||||
Kind::Minus => {
|
||||
neg = !neg;
|
||||
}
|
||||
Kind::Plus => {}
|
||||
Kind::Identifier => break Self::Label(token.lexeme),
|
||||
Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }),
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
}
|
||||
p.take();
|
||||
};
|
||||
p.take();
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Width {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let out = match p.peek(Parsing::Width)?.kind() {
|
||||
Kind::Byte => Width::Byte,
|
||||
Kind::Word => Width::Word,
|
||||
_ => return Ok(Width::Word),
|
||||
};
|
||||
p.take();
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Reg {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let out = match p.peek(Parsing::Reg)?.kind {
|
||||
Kind::Reg(r) => r,
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::Reg))?,
|
||||
};
|
||||
p.take();
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Expr<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
p.expr()
|
||||
}
|
||||
}
|
||||
impl<'t, T: Parsable<'t>> Parsable<'t> for Box<T> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
Ok(Box::new(p.parse()?))
|
||||
}
|
||||
}
|
||||
impl<'t, T: Parsable<'t>> Parsable<'t> for Vec<T> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Vec;
|
||||
p.assert(Kind::OpenBrace, parsing)?;
|
||||
let mut out = vec![];
|
||||
while Kind::CloseBrace != p.peek(parsing)?.kind {
|
||||
out.push(p.parse()?)
|
||||
}
|
||||
p.assert(Kind::CloseBrace, parsing)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
/// Context-sensitive parsing rules
|
||||
impl<'t> Parser<'t> {
|
||||
pub fn string(&mut self) -> PResult<&'t str> {
|
||||
let token = *self.peek(Parsing::Directive)?;
|
||||
match token.kind {
|
||||
Kind::String => {
|
||||
self.take();
|
||||
Ok(&token.lexeme[1..token.lexeme.len() - 1])
|
||||
}
|
||||
ty => Err(self.error(Unexpected(ty), Parsing::Directive)),
|
||||
}
|
||||
}
|
||||
pub fn label(&mut self) -> PResult<&'t str> {
|
||||
let p = Parsing::Label;
|
||||
let token = self.next(p)?;
|
||||
assert_eq!(Kind::Identifier, token.kind);
|
||||
self.assert(Kind::Colon, p)?;
|
||||
Ok(token.lexeme)
|
||||
}
|
||||
pub fn reg(&mut self) -> PResult<Reg> {
|
||||
match self.peek(Parsing::Reg)?.kind {
|
||||
Kind::Reg(r) => {
|
||||
self.take();
|
||||
Ok(r)
|
||||
}
|
||||
ty => Err(self.error(Unexpected(ty), Parsing::Reg)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod error {
|
||||
use super::Kind;
|
||||
use crate::util::Span;
|
||||
use std::{fmt::Display, num::TryFromIntError};
|
||||
|
||||
pub type PResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Error {
|
||||
pub parsing: Parsing,
|
||||
pub kind: ErrorKind,
|
||||
pub loc: Span<usize>,
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ErrorKind {
|
||||
LexError,
|
||||
/// Returned when [Parsing::ConstExpr] fails without consuming
|
||||
NotExpr,
|
||||
DivZero,
|
||||
NonNumeric(Kind),
|
||||
BadIntForDst(u16),
|
||||
TryFromIntError(TryFromIntError),
|
||||
Unexpected(Kind),
|
||||
BufEmpty,
|
||||
Todo,
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Parsing {
|
||||
File,
|
||||
Stmt,
|
||||
|
||||
Label,
|
||||
Directive,
|
||||
Instruction,
|
||||
|
||||
NoEm,
|
||||
OneEm,
|
||||
Reti,
|
||||
Br,
|
||||
OneArg,
|
||||
TwoArg,
|
||||
Jump,
|
||||
|
||||
Width,
|
||||
Src,
|
||||
Dst,
|
||||
Reg,
|
||||
|
||||
Expr,
|
||||
Vec,
|
||||
}
|
||||
impl Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive()
|
||||
write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing)
|
||||
}
|
||||
}
|
||||
impl Display for ErrorKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ErrorKind::LexError => write!(f, "lexical error"),
|
||||
ErrorKind::TryFromIntError(e) => write!(f, "{e}"),
|
||||
ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"),
|
||||
ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"),
|
||||
ErrorKind::DivZero => write!(f, "Division by zero"),
|
||||
ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"),
|
||||
ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"),
|
||||
ErrorKind::BufEmpty => write!(f, "Peek buffer empty"),
|
||||
ErrorKind::Todo => write!(f, "Not yet implemented"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Parsing {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Parsing::File => "a file".fmt(f),
|
||||
Parsing::Stmt => "a line".fmt(f),
|
||||
Parsing::Label => "a label".fmt(f),
|
||||
Parsing::Directive => "a directive".fmt(f),
|
||||
Parsing::Instruction => "an instruction".fmt(f),
|
||||
Parsing::NoEm => "a no-operand emulated instruction".fmt(f),
|
||||
Parsing::OneEm => "a one-operand emulated instruction".fmt(f),
|
||||
Parsing::Reti => "a `reti` instruction".fmt(f),
|
||||
Parsing::Br => "a `br` instruction".fmt(f),
|
||||
Parsing::OneArg => "a one-operand instruction".fmt(f),
|
||||
Parsing::TwoArg => "a two-operand instruction".fmt(f),
|
||||
Parsing::Jump => "a jump instruction".fmt(f),
|
||||
Parsing::Width => "an instruction width".fmt(f),
|
||||
Parsing::Src => "a source".fmt(f),
|
||||
Parsing::Dst => "a destination".fmt(f),
|
||||
Parsing::Reg => "a register".fmt(f),
|
||||
Parsing::Expr => "a constant expression".fmt(f),
|
||||
Parsing::Vec => "a list".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl std::error::Error for Error {}
|
||||
}
|
||||
|
679
src/parser/ast.rs
Normal file
679
src/parser/ast.rs
Normal file
@ -0,0 +1,679 @@
|
||||
// © 2023-2024 John Breaux
|
||||
/// Represents MSP430 instructions,
|
||||
use crate::{
|
||||
lexer::token::{self, Reg, Token},
|
||||
util::Span,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Statements<'t> {
|
||||
pub stmts: Vec<Statement<'t>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Statement<'t> {
|
||||
Label(&'t str),
|
||||
Insn(Instruction<'t>),
|
||||
Directive(Directive<'t>),
|
||||
Comment(&'t str),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Directive<'t> {
|
||||
/// TODO: Store define as a vec of tokens. This will require help from the
|
||||
/// [preprocessor](crate::preprocessor)
|
||||
Define(Vec<Token<'t>>),
|
||||
Org(Box<Expr<'t>>),
|
||||
Word(Box<Expr<'t>>),
|
||||
Words(Vec<Expr<'t>>),
|
||||
String(&'t str),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Instruction<'t> {
|
||||
pub span: Span<usize>,
|
||||
pub kind: InstructionKind<'t>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum InstructionKind<'t> {
|
||||
NoEm(NoEm),
|
||||
OneEm(OneEm<'t>),
|
||||
OneArg(OneArg<'t>),
|
||||
TwoArg(TwoArg<'t>),
|
||||
Jump(Jump<'t>),
|
||||
Reti(Reti),
|
||||
Br(Br<'t>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct NoEm {
|
||||
pub opcode: token::NoEm,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct OneEm<'t> {
|
||||
pub opcode: token::OneEm,
|
||||
pub width: Width,
|
||||
pub dst: Dst<'t>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct OneArg<'t> {
|
||||
pub opcode: token::OneArg,
|
||||
pub width: Width,
|
||||
pub src: Src<'t>,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct TwoArg<'t> {
|
||||
pub opcode: token::TwoArg,
|
||||
pub width: Width,
|
||||
pub src: Src<'t>,
|
||||
pub dst: Dst<'t>,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Jump<'t> {
|
||||
pub opcode: token::Jump,
|
||||
pub dst: JumpDst<'t>,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Reti;
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Br<'t> {
|
||||
pub src: Src<'t>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Width {
|
||||
#[default]
|
||||
Word,
|
||||
Byte,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Src<'t> {
|
||||
Direct(Reg),
|
||||
Indexed(Box<Expr<'t>>, Reg),
|
||||
Indirect(Reg),
|
||||
PostInc(Reg),
|
||||
Absolute(Box<Expr<'t>>),
|
||||
Immediate(Box<Expr<'t>>),
|
||||
Special(SrcSpecial),
|
||||
BareExpr(Box<Expr<'t>>),
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum SrcSpecial {
|
||||
Zero,
|
||||
One,
|
||||
Four,
|
||||
Two,
|
||||
Eight,
|
||||
NegOne,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Dst<'t> {
|
||||
Direct(Reg),
|
||||
Indexed(Box<Expr<'t>>, Reg),
|
||||
Absolute(Box<Expr<'t>>),
|
||||
Special(DstSpecial),
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum DstSpecial {
|
||||
Zero,
|
||||
One,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum JumpDst<'t> {
|
||||
/// A relative offset, nominally an even number from -0x400..=0x3fe
|
||||
Rel(i16),
|
||||
Label(&'t str),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Expr<'t> {
|
||||
Binary(Box<Expr<'t>>, Vec<(BinOp, Expr<'t>)>),
|
||||
Unary(Vec<UnOp>, Box<Expr<'t>>),
|
||||
Group(Box<Expr<'t>>),
|
||||
Number(u16),
|
||||
Ident(&'t str),
|
||||
AddrOf(&'t str),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum BinOp {
|
||||
Mul,
|
||||
Div,
|
||||
Rem,
|
||||
Add,
|
||||
Sub,
|
||||
Lsh,
|
||||
Rsh,
|
||||
And,
|
||||
Xor,
|
||||
Or,
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum UnOp {
|
||||
Deref,
|
||||
Not,
|
||||
Neg,
|
||||
}
|
||||
|
||||
pub mod conv {
|
||||
//! Conversions between [ast](super) types, via [From], or via `new` constructor
|
||||
use super::{InstructionKind as Ik, *};
|
||||
|
||||
macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$(
|
||||
impl<'t> From<$src> for $dst {
|
||||
fn from(value: $src) -> Self {
|
||||
$expr(value)
|
||||
}
|
||||
}
|
||||
)*}}
|
||||
// sure am glad macros aren't hygenic over lifetimes
|
||||
impl_from! { Ik<'t> {
|
||||
NoEm => Ik::NoEm,
|
||||
OneEm<'t> => Ik::OneEm,
|
||||
OneArg<'t> => Ik::OneArg,
|
||||
TwoArg<'t> => Ik::TwoArg,
|
||||
Jump<'t> => Ik::Jump,
|
||||
Reti => Ik::Reti,
|
||||
Br<'t> => Ik::Br,
|
||||
}}
|
||||
impl_from! { Expr<'t> {
|
||||
u16 => Expr::Number
|
||||
}}
|
||||
impl<'t> From<Dst<'t>> for Src<'t> {
|
||||
fn from(value: Dst<'t>) -> Self {
|
||||
match value {
|
||||
Dst::Special(v) => Src::Special(v.into()),
|
||||
Dst::Absolute(v) => Src::Absolute(v),
|
||||
Dst::Indexed(i, r) => Src::Indexed(i, r),
|
||||
Dst::Direct(r) => Src::Direct(r),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl From<DstSpecial> for SrcSpecial {
|
||||
fn from(value: DstSpecial) -> Self {
|
||||
match value {
|
||||
DstSpecial::Zero => SrcSpecial::Zero,
|
||||
DstSpecial::One => SrcSpecial::One,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> TwoArg<'t> {
|
||||
pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self {
|
||||
Self { opcode, width, src, dst }
|
||||
}
|
||||
}
|
||||
}
|
||||
pub mod display {
|
||||
use super::*;
|
||||
use std::fmt::Display;
|
||||
|
||||
impl<'t> Display for Statements<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for stmt in &self.stmts {
|
||||
writeln!(f, "{stmt}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Statement<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Statement::Label(v) => write!(f, "{v}:"),
|
||||
Statement::Insn(v) => write!(f, "{v}"),
|
||||
Statement::Directive(v) => write!(f, "{v}"),
|
||||
Statement::Comment(v) => write!(f, "{v}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Directive<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Directive::Define(_) => write!(f, ".directive"),
|
||||
Directive::Org(e) => write!(f, ".org {e}"),
|
||||
Directive::Word(w) => write!(f, ".word {w}"),
|
||||
Directive::Words(words) => {
|
||||
write!(f, ".words [ ")?;
|
||||
for word in words {
|
||||
write!(f, "{word} ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
Directive::String(s) => write!(f, ".string \"{s}\""),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Instruction<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { span: _, kind } = self;
|
||||
write!(f, "{kind}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for InstructionKind<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
InstructionKind::NoEm(v) => v.fmt(f),
|
||||
InstructionKind::OneEm(v) => v.fmt(f),
|
||||
InstructionKind::OneArg(v) => v.fmt(f),
|
||||
InstructionKind::TwoArg(v) => v.fmt(f),
|
||||
InstructionKind::Jump(v) => v.fmt(f),
|
||||
InstructionKind::Reti(v) => v.fmt(f),
|
||||
InstructionKind::Br(v) => v.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for NoEm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode } = self;
|
||||
write!(f, "{opcode}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for OneEm<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode, width, dst } = self;
|
||||
write!(f, "{opcode}{width}\t{dst}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for OneArg<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode, width, src } = self;
|
||||
write!(f, "{opcode}{width}\t{src}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for TwoArg<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode, width, src, dst } = self;
|
||||
write!(f, "{opcode}{width}\t{src}, {dst}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Jump<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode, dst } = self;
|
||||
write!(f, "{opcode}\t{dst}")
|
||||
}
|
||||
}
|
||||
impl Display for Reti {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "reti")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Br<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { src } = self;
|
||||
write!(f, "br\t{src}")
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Display for Src<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Src::Direct(r) => write!(f, "{r}"),
|
||||
Src::Indexed(e, r) => write!(f, "{e}({r})"),
|
||||
Src::Indirect(r) => write!(f, "@{r}"),
|
||||
Src::PostInc(r) => write!(f, "@{r}+"),
|
||||
Src::Absolute(e) => write!(f, "&{e}"),
|
||||
Src::Immediate(e) => write!(f, "#{e}"),
|
||||
Src::Special(i) => write!(f, "#{i}"),
|
||||
Src::BareExpr(id) => write!(f, "{id}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for SrcSpecial {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
SrcSpecial::Zero => write!(f, "0"),
|
||||
SrcSpecial::One => write!(f, "1"),
|
||||
SrcSpecial::Four => write!(f, "4"),
|
||||
SrcSpecial::Two => write!(f, "2"),
|
||||
SrcSpecial::Eight => write!(f, "8"),
|
||||
SrcSpecial::NegOne => write!(f, "-1"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Dst<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Dst::Direct(r) => write!(f, "{r}"),
|
||||
Dst::Indexed(e, r) => write!(f, "{e}({r})"),
|
||||
Dst::Absolute(e) => write!(f, "&{e}"),
|
||||
Dst::Special(i) => write!(f, "#{i}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for DstSpecial {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
DstSpecial::Zero => write!(f, "0"),
|
||||
DstSpecial::One => write!(f, "1"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for JumpDst<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
JumpDst::Rel(i) => write!(f, "{i}"),
|
||||
JumpDst::Label(l) => write!(f, "{l}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Expr<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Expr::Binary(head, tail) => {
|
||||
write!(f, "{head}")?;
|
||||
for (op, tail) in tail {
|
||||
write!(f, "{op}{tail}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Expr::Unary(ops, tail) => {
|
||||
for op in ops {
|
||||
write!(f, "{op}")?
|
||||
}
|
||||
write!(f, "{tail}")
|
||||
}
|
||||
Expr::Group(e) => write!(f, "({e})"),
|
||||
Expr::Number(n) => write!(f, "{n:x}"),
|
||||
Expr::Ident(n) => write!(f, "{n}"),
|
||||
Expr::AddrOf(n) => write!(f, "&{n}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for BinOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
BinOp::Mul => write!(f, "*"),
|
||||
BinOp::Div => write!(f, "/"),
|
||||
BinOp::Rem => write!(f, "%"),
|
||||
BinOp::Add => write!(f, "+"),
|
||||
BinOp::Sub => write!(f, "-"),
|
||||
BinOp::Lsh => write!(f, "<<"),
|
||||
BinOp::Rsh => write!(f, ">>"),
|
||||
BinOp::And => write!(f, "&"),
|
||||
BinOp::Xor => write!(f, "^"),
|
||||
BinOp::Or => write!(f, "|"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for UnOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
UnOp::Deref => write!(f, "*"),
|
||||
UnOp::Not => write!(f, "!"),
|
||||
UnOp::Neg => write!(f, "-"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Width {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Width::Word => Ok(()),
|
||||
Width::Byte => write!(f, ".b"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pub mod canonical {
|
||||
use std::iter;
|
||||
|
||||
use super::*;
|
||||
use token::TwoArg::*;
|
||||
pub trait Canonicalize {
|
||||
/// The output after canonicalization
|
||||
type Output;
|
||||
/// Transmutes Self into its "canonical" form. "Emulated" instructions are converted
|
||||
/// into their respective non-emulated forms.
|
||||
fn to_canonical(self) -> Self::Output;
|
||||
}
|
||||
impl<'t> Canonicalize for Statements<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() }
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Statement<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
match self {
|
||||
Statement::Insn(i) => Self::Insn(i.to_canonical()),
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Instruction<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
Self { kind: self.kind.to_canonical(), ..self }
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for InstructionKind<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
match self {
|
||||
Self::NoEm(v) => Self::TwoArg(v.to_canonical()),
|
||||
Self::OneEm(v) => Self::TwoArg(v.to_canonical()),
|
||||
Self::Reti(v) => Self::Reti(v.to_canonical()),
|
||||
Self::Br(v) => Self::TwoArg(v.to_canonical()),
|
||||
Self::OneArg(v) => Self::OneArg(v.to_canonical()),
|
||||
Self::TwoArg(v) => Self::TwoArg(v.to_canonical()),
|
||||
Self::Jump(v) => Self::Jump(v.to_canonical()),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Canonicalize for NoEm {
|
||||
type Output = TwoArg<'static>;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { opcode } = self;
|
||||
use SrcSpecial::*;
|
||||
use Width::*;
|
||||
match opcode {
|
||||
token::NoEm::Nop => {
|
||||
TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG))
|
||||
}
|
||||
token::NoEm::Ret => {
|
||||
TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC))
|
||||
}
|
||||
token::NoEm::Clrc => {
|
||||
TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Clrz => {
|
||||
TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Clrn => {
|
||||
TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Setc => {
|
||||
TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Setz => {
|
||||
TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Setn => {
|
||||
TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Dint => {
|
||||
TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Eint => {
|
||||
TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for OneEm<'t> {
|
||||
type Output = TwoArg<'t>;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
use SrcSpecial::*;
|
||||
let Self { opcode, width, dst } = self;
|
||||
match opcode {
|
||||
token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst),
|
||||
token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst),
|
||||
token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst),
|
||||
token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst),
|
||||
token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst),
|
||||
token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst),
|
||||
token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst),
|
||||
token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst),
|
||||
token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst),
|
||||
token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst),
|
||||
token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst),
|
||||
token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst),
|
||||
token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for OneArg<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { opcode, width, src } = self;
|
||||
Self {
|
||||
opcode,
|
||||
width: match opcode {
|
||||
token::OneArg::Call => Width::Word,
|
||||
_ => width,
|
||||
},
|
||||
src: src.to_canonical(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for TwoArg<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { opcode, width, src, dst } = self;
|
||||
Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() }
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Jump<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { opcode, dst } = self;
|
||||
Self {
|
||||
opcode: match opcode {
|
||||
token::Jump::Jnz => token::Jump::Jne,
|
||||
token::Jump::Jz => token::Jump::Jeq,
|
||||
token::Jump::Jnc => token::Jump::Jlo,
|
||||
token::Jump::Jc => token::Jump::Jhs,
|
||||
t => t,
|
||||
},
|
||||
dst: dst.to_canonical(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Canonicalize for Reti {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
self
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Br<'t> {
|
||||
type Output = TwoArg<'t>;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { src } = self;
|
||||
TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Canonicalize for Src<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
use SrcSpecial::*;
|
||||
match self {
|
||||
Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self,
|
||||
Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r),
|
||||
Src::Absolute(e) => Src::Absolute(e.to_canonical().into()),
|
||||
Src::Immediate(e) => match e.to_canonical() {
|
||||
Expr::Number(0) => Src::Special(Zero),
|
||||
Expr::Number(1) => Src::Special(One),
|
||||
Expr::Number(2) => Src::Special(Two),
|
||||
Expr::Number(4) => Src::Special(Four),
|
||||
Expr::Number(8) => Src::Special(Eight),
|
||||
Expr::Number(0xffff) => Src::Special(NegOne),
|
||||
expr => Src::Immediate(expr.into()),
|
||||
},
|
||||
Src::BareExpr(_) => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Dst<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
match self {
|
||||
Dst::Direct(_) | Dst::Special(_) => self,
|
||||
Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r),
|
||||
Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for JumpDst<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
self
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Expr<'t> {
|
||||
type Output = Self;
|
||||
/// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number],
|
||||
/// this returns a single [Expr::Number]. If not, it evaluates until
|
||||
/// it runs into an unevaluatable leaf.
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
match self {
|
||||
Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self,
|
||||
Expr::Group(e) => e.to_canonical(),
|
||||
Expr::Unary(ops, tail) => {
|
||||
let mut tail = match tail.to_canonical() {
|
||||
Expr::Number(n) => n,
|
||||
other => return other,
|
||||
};
|
||||
// If the tail is dereferenced, canonicalization must halt,
|
||||
// since we have no knowledge of memory layout
|
||||
let mut ops = ops.into_iter();
|
||||
for op in ops.by_ref() {
|
||||
tail = match op {
|
||||
UnOp::Deref => {
|
||||
return Expr::Unary(
|
||||
iter::once(op).chain(ops).collect(),
|
||||
Box::new(tail.into()),
|
||||
)
|
||||
}
|
||||
UnOp::Not => !tail,
|
||||
UnOp::Neg => 0u16.wrapping_sub(tail),
|
||||
}
|
||||
}
|
||||
Expr::Number(tail)
|
||||
}
|
||||
Expr::Binary(head, tails) => {
|
||||
let mut head = match head.to_canonical() {
|
||||
Expr::Number(n) => n,
|
||||
head => return Expr::Binary(head.into(), tails),
|
||||
};
|
||||
let mut tails = tails.into_iter();
|
||||
for (op, tail) in &mut tails {
|
||||
let tail = tail.to_canonical();
|
||||
// If the canonical tail isn't a number, rebuild and return
|
||||
let Expr::Number(tail) = tail else {
|
||||
return Expr::Binary(
|
||||
Box::new(head.into()),
|
||||
iter::once((op, tail)).chain(tails).collect(),
|
||||
);
|
||||
};
|
||||
head = match op {
|
||||
BinOp::Mul => head.wrapping_mul(tail),
|
||||
BinOp::Div => head.wrapping_div(tail),
|
||||
BinOp::Rem => head.wrapping_rem(tail),
|
||||
BinOp::Add => head.wrapping_add(tail),
|
||||
BinOp::Sub => head.wrapping_sub(tail),
|
||||
BinOp::Lsh => head.wrapping_shl(tail as u32),
|
||||
BinOp::Rsh => head.wrapping_shr(tail as u32),
|
||||
BinOp::And => head & tail,
|
||||
BinOp::Xor => head ^ tail,
|
||||
BinOp::Or => head | tail,
|
||||
};
|
||||
}
|
||||
Expr::Number(head)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Comment`] stores the contents of a line comment, including the preceding `;` or `//`
|
||||
use super::*;
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Comment(pub String);
|
||||
|
||||
impl Parsable for Comment {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
Ok(Self(stream.expect(Type::Comment)?.lexeme().to_string()))
|
||||
}
|
||||
}
|
||||
impl Display for Comment {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) }
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and
|
||||
//! [Linker](crate::Linker)
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use super::*;
|
||||
use crate::lexer::token::OwnedToken;
|
||||
|
||||
// TODO: Parse each kind of *postprocessor* directive into an AST node
|
||||
// - .org 8000: Directive::Org { base: Number }
|
||||
// - .define ident tt... Directive::Define { } ; should this be in the AST? How do I put this
|
||||
// in the AST?
|
||||
// - .include "<filename>" Directive::Include { Root } ; should this include an entire AST in
|
||||
// the AST?
|
||||
// - .word 8000 Directive::Word(Number)
|
||||
// - .words dead beef Directive::Words(Vec<u16>|Vec<Number>)
|
||||
// - .byte ff Directive::Byte(Number)
|
||||
// - .bytes de, ad, be, ef Directive::Bytes(Vec<u8>)
|
||||
// - .string "string" Directive::String(String)
|
||||
// - .ascii "string" Directive::Ascii(Vec<u8>)
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Directive {
|
||||
Org(Number),
|
||||
Define(Vec<OwnedToken>),
|
||||
Include(Root),
|
||||
Byte(Number),
|
||||
Bytes(Vec<Number>),
|
||||
Word(Number),
|
||||
Words(Vec<Number>),
|
||||
String(String),
|
||||
Strings(Vec<String>),
|
||||
}
|
||||
|
||||
impl Directive {}
|
||||
|
||||
impl Parsable for Directive {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
let d = stream.expect(Type::Directive)?;
|
||||
// match on the directive
|
||||
Ok(match d.lexeme() {
|
||||
".org" => Self::Org(Number::parse(p, stream)?),
|
||||
".define" => {
|
||||
let mut tokens = vec![];
|
||||
loop {
|
||||
match stream.peek().variant() {
|
||||
Type::Endl | Type::EndOfFile => break,
|
||||
_ => tokens.push(stream.next().unwrap_or_default().into()),
|
||||
}
|
||||
}
|
||||
Self::Define(tokens)
|
||||
}
|
||||
".include" => {
|
||||
// Try to get path
|
||||
Self::Include(Parser::default().parse_file(&PathBuf::parse(p, stream)?)?)
|
||||
}
|
||||
".byte" => Self::Byte(Number::parse(p, stream)?),
|
||||
".bytes" => Self::Bytes(Vec::<Number>::parse(p, stream)?),
|
||||
".word" => Self::Word(Number::parse(p, stream)?),
|
||||
".words" => Self::Words(Vec::<Number>::parse(p, stream)?),
|
||||
".string" => Self::String(String::parse(p, stream)?),
|
||||
".strings" => Self::Strings(Vec::<String>::parse(p, stream)?),
|
||||
e => Err(ParseError::UnrecognizedDirective(e.into()))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Directive {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Directive::Org(num) => write!(f, ".org {num}"),
|
||||
Directive::Define(rep) => {
|
||||
write!(f, ".define")?;
|
||||
for t in rep {
|
||||
write!(f, " {t}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Directive::Include(r) => Display::fmt(r, f),
|
||||
Directive::Byte(num) => write!(f, ".org {num}"),
|
||||
Directive::Bytes(v) => write!(f, ".bytes {v:?}"),
|
||||
Directive::Word(num) => write!(f, ".org {num}"),
|
||||
Directive::Words(v) => write!(f, ".bytes {v:?}"),
|
||||
Directive::String(s) => write!(f, ".string \"{s}\""),
|
||||
Directive::Strings(s) => write!(f, ".string \"{s:?}\""),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
// © 2023 John Breauxs
|
||||
use super::*;
|
||||
use crate::lexer::error::LexError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ParseError {
|
||||
/// Produced by [lexer](crate::lexer)
|
||||
LexError(LexError),
|
||||
/// Produced by [std::io]
|
||||
IoError(std::io::Error),
|
||||
/// Produced by [Number](Number)[::parse()](Parsable::parse())
|
||||
/// when the parsed number contains digits too high for the specified radix
|
||||
UnexpectedDigits(String, u32),
|
||||
/// Produced by [Opcode](Opcode)[::parse()](Parsable::parse())
|
||||
/// when the opcode passed lexing but did not match recognized opcodes.
|
||||
///
|
||||
/// This is always a lexer bug.
|
||||
UnrecognizedOpcode(String),
|
||||
/// Produced by [Directive](Directive)[::parse()](Parsable::parse())
|
||||
/// when an unknown or unimplemented directive is used
|
||||
UnrecognizedDirective(String),
|
||||
/// Produced by [Register] when attempting to convert from a [str]
|
||||
/// that isn't a register (pc, sp, sr, cg, or r{number})
|
||||
NotARegister(String),
|
||||
/// Produced by [Register] when the r{number} is outside the range 0-15
|
||||
RegisterTooHigh(u16),
|
||||
/// Produced by [SecondaryOperand] when the joke "secondary immediate" form
|
||||
/// is out of range 0..=1
|
||||
FatSecondaryImmediate(isize),
|
||||
/// Produced by a [Number] too wide to fit in 16 bits
|
||||
/// (outside the range `(-2^15) .. (2^16-1)` )
|
||||
NumberTooWide(isize),
|
||||
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
|
||||
/// when the jump offset is outside the range (-0x3ff..0x3fc)
|
||||
JumpedTooFar(isize),
|
||||
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
|
||||
JumpedOdd(isize),
|
||||
}
|
||||
|
||||
impl From<LexError> for ParseError {
|
||||
fn from(value: LexError) -> Self { Self::LexError(value) }
|
||||
}
|
||||
impl From<std::io::Error> for ParseError {
|
||||
fn from(value: std::io::Error) -> Self { Self::IoError(value) }
|
||||
}
|
||||
|
||||
impl Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::LexError(error) => Display::fmt(error, f),
|
||||
Self::IoError(error) => Display::fmt(error, f),
|
||||
Self::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."),
|
||||
Self::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"),
|
||||
Self::UnrecognizedDirective(d) => write!(f, "{d} is not a directive."),
|
||||
Self::NotARegister(reg) => write!(f, "{reg} is not a register"),
|
||||
Self::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"),
|
||||
Self::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"),
|
||||
Self::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"),
|
||||
Self::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"),
|
||||
Self::JumpedOdd(num) => {
|
||||
write!(f, "Jump targets only encode even numbers: {num} must not be odd.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl std::error::Error for ParseError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::LexError(e) => Some(e),
|
||||
Self::IoError(e) => Some(e),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [Identifier] stores the hash of an identifier
|
||||
use super::*;
|
||||
use std::rc::Rc;
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Identifier {
|
||||
str: Rc<str>,
|
||||
}
|
||||
|
||||
impl Identifier {
|
||||
fn str<T: AsRef<str>>(s: T) -> Self { Self { str: s.as_ref().to_owned().into() } }
|
||||
}
|
||||
|
||||
impl Parsable for Identifier {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
let token = stream.expect(Type::Identifier)?;
|
||||
match token.variant() {
|
||||
Type::Identifier => Ok(Self::str(token.lexeme())),
|
||||
_ => unreachable!("Expected identifier, got {token:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Identifier {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.str, f) }
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [`Instruction`] contains the [`Opcode`] and [`Encoding`] information for a single msp430
|
||||
//! instruction
|
||||
//!
|
||||
//!
|
||||
//! Note: [`Opcode`] and [`Encoding`] are very tightly coupled, because they represent
|
||||
//! interdependent parts of the same instruction. This is why [`Opcode`]::resolve() returns an
|
||||
//! [`EncodingParser`] -- otherwise, there's an explosion of states that I can't really cope with on
|
||||
//! my own. Really, there's about 9 valid classes of instruction, some of which are only used for
|
||||
//! one or two of the MSP430's instructions.
|
||||
|
||||
use super::*;
|
||||
|
||||
pub mod encoding;
|
||||
pub mod opcode;
|
||||
|
||||
/// Contains the [Opcode] and [Encoding] information for a single msp430 instruction
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Instruction(Opcode, Encoding);
|
||||
|
||||
impl Instruction {
|
||||
pub fn opcode(&self) -> &Opcode { &self.0 }
|
||||
pub fn encoding(&self) -> &Encoding { &self.1 }
|
||||
/// Gets the Instruction as a [u16]
|
||||
pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() }
|
||||
/// Gets the [extension words]
|
||||
pub fn ext_words(&self) -> [Option<u16>; 2] { self.1.extwords() }
|
||||
}
|
||||
|
||||
impl Parsable for Instruction {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where
|
||||
Self: Sized,
|
||||
T: crate::TokenStream<'text>,
|
||||
{
|
||||
// parse an opcode
|
||||
let opcode: Opcode = Opcode::parse(p, stream)?;
|
||||
// resolve the opcode to a final opcode and an encoding
|
||||
let (opcode, encoding) = opcode.resolve();
|
||||
// parse the encoding
|
||||
let encoding = encoding.parse(p, stream)?;
|
||||
Ok(Self(opcode, encoding))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Instruction> for u16 {
|
||||
fn from(value: Instruction) -> Self { value.word() }
|
||||
}
|
||||
|
||||
impl Display for Instruction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) }
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [`Encoding`] represents the set of arguments for a given [msp430 opcode](Opcode)
|
||||
use super::*;
|
||||
|
||||
pub mod number;
|
||||
pub mod register;
|
||||
pub mod width;
|
||||
|
||||
pub mod jump_target;
|
||||
pub mod primary_operand;
|
||||
pub mod secondary_operand;
|
||||
|
||||
mod builder;
|
||||
pub mod encoding_parser;
|
||||
|
||||
use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder};
|
||||
use encoding_parser::EncodingParser;
|
||||
|
||||
/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html)
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// use msp430_asm::{preamble::*, parser::preamble::*};
|
||||
/// // Create a token sequence
|
||||
/// let asm_file = r".b 8000(r15)";
|
||||
/// // Create a single-operand encoding parser
|
||||
/// let single: EncodingParser = Encoding::single().end();
|
||||
/// // Parse an Encoding from it
|
||||
/// let encoding: Encoding = single
|
||||
/// .parse(&Default::default(), &mut Tokenizer::new(asm_file).ignore_spaces())
|
||||
/// .unwrap();
|
||||
/// // Print the Encoding
|
||||
/// println!("{encoding}");
|
||||
/// ```
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Encoding {
|
||||
Single { width: Width, dst: PrimaryOperand },
|
||||
Jump { target: JumpTarget },
|
||||
Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand },
|
||||
}
|
||||
impl Encoding {
|
||||
/// Returns a builder for [Encoding::Single]
|
||||
pub fn single() -> SingleBuilder { Default::default() }
|
||||
/// Returns a builder for [Encoding::Jump]
|
||||
pub fn jump() -> JumpBuilder { Default::default() }
|
||||
/// Returns a builder for [Encoding::Double]
|
||||
pub fn double() -> DoubleBuilder { Default::default() }
|
||||
/// Returns a builder for [Encoding::Double]
|
||||
///
|
||||
/// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and
|
||||
/// dst are the same
|
||||
pub fn reflexive() -> ReflexiveBuilder { Default::default() }
|
||||
///
|
||||
pub fn word(&self) -> u16 {
|
||||
match self {
|
||||
Encoding::Single { width, dst } => u16::from(*width) | dst.mode() | dst.register() as u16,
|
||||
Encoding::Jump { target } => target.word().unwrap_or_default(),
|
||||
Encoding::Double { width, src, dst } => {
|
||||
u16::from(*width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8)
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Returns extwords for instruction
|
||||
pub fn extwords(&self) -> [Option<u16>; 2] {
|
||||
match self {
|
||||
Encoding::Double { src, dst, .. } => [src.ext_word(), dst.ext_word()],
|
||||
Encoding::Single { dst, .. } => [dst.ext_word(), None],
|
||||
Encoding::Jump { .. } => [None, None],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Encoding {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Encoding::Single { width, dst } => write!(f, "{width} {dst}"),
|
||||
Encoding::Jump { target } => write!(f, " {target}"),
|
||||
Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! Builder API for [`EncodingParser`]
|
||||
use super::*;
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SingleBuilder {
|
||||
width: Option<Width>,
|
||||
dst: Option<PrimaryOperand>,
|
||||
}
|
||||
impl SingleBuilder {
|
||||
pub fn width(mut self, width: bool) -> Self {
|
||||
self.width = Some(width.into());
|
||||
self
|
||||
}
|
||||
/// Sets the [PrimaryOperand] field
|
||||
pub fn operand(mut self, dst: PrimaryOperand) -> Self {
|
||||
self.dst = Some(dst);
|
||||
self
|
||||
}
|
||||
/// Build
|
||||
pub fn end(self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } }
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct JumpBuilder {
|
||||
target: Option<JumpTarget>,
|
||||
}
|
||||
impl JumpBuilder {
|
||||
pub fn target(mut self, target: JumpTarget) -> Self {
|
||||
self.target = Some(target);
|
||||
self
|
||||
}
|
||||
pub fn end(self) -> EncodingParser { EncodingParser::Jump { target: self.target } }
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DoubleBuilder {
|
||||
width: Option<Width>,
|
||||
src: Option<PrimaryOperand>,
|
||||
dst: Option<SecondaryOperand>,
|
||||
}
|
||||
impl DoubleBuilder {
|
||||
/// Sets the [Width] field
|
||||
pub fn width(mut self, width: bool) -> Self {
|
||||
self.width = Some(width.into());
|
||||
self
|
||||
}
|
||||
/// Sets the [PrimaryOperand] field
|
||||
pub fn src(mut self, src: PrimaryOperand) -> Self {
|
||||
self.src = Some(src);
|
||||
self
|
||||
}
|
||||
/// Sets the [PrimaryOperand] field
|
||||
pub fn dst(mut self, dst: SecondaryOperand) -> Self {
|
||||
self.dst = Some(dst);
|
||||
self
|
||||
}
|
||||
pub fn end(self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } }
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ReflexiveBuilder {
|
||||
width: Option<Width>,
|
||||
reg: Option<SecondaryOperand>,
|
||||
}
|
||||
impl ReflexiveBuilder {
|
||||
/// Sets the [Width] field
|
||||
pub fn width(mut self, width: bool) -> Self {
|
||||
self.width = Some(width.into());
|
||||
self
|
||||
}
|
||||
pub fn reg(mut self, reg: SecondaryOperand) -> Self {
|
||||
self.reg = Some(reg);
|
||||
self
|
||||
}
|
||||
pub fn end(self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } }
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`]
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream]
|
||||
pub enum EncodingParser {
|
||||
Single { width: Option<Width>, dst: Option<PrimaryOperand> },
|
||||
Jump { target: Option<JumpTarget> },
|
||||
Double { width: Option<Width>, src: Option<PrimaryOperand>, dst: Option<SecondaryOperand> },
|
||||
Reflexive { width: Option<Width>, reg: Option<SecondaryOperand> },
|
||||
}
|
||||
|
||||
impl EncodingParser {
|
||||
/// Constructs an [Encoding] from this [EncodingParser], filling holes
|
||||
/// with the tokenstream
|
||||
pub fn parse<'text, T>(self, p: &Parser, stream: &mut T) -> Result<Encoding, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
Ok(match self {
|
||||
Self::Single { width, dst } => Encoding::Single {
|
||||
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
|
||||
dst: if let Some(dst) = dst { dst } else { PrimaryOperand::parse(p, stream)? },
|
||||
},
|
||||
Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) },
|
||||
Self::Double { width, src, dst } => Encoding::Double {
|
||||
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
|
||||
src: if let Some(src) = src { src } else { PrimaryOperand::parse(p, stream)? },
|
||||
dst: if let Some(dst) = dst { dst } else { SecondaryOperand::parse(p, stream)? },
|
||||
},
|
||||
Self::Reflexive { width, reg } => {
|
||||
let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default());
|
||||
let reg = if let Some(reg) = reg { reg } else { SecondaryOperand::parse(p, stream)? };
|
||||
Encoding::Double { width, src: reg.clone().into(), dst: reg }
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`JumpTarget`] contains the [pc-relative offset](Number) or [label](Identifier)
|
||||
//! for a [Jump](Encoding::Jump) [instruction]
|
||||
use super::*;
|
||||
|
||||
/// Contains the [pc-relative offset](Number) or [label](Identifier)
|
||||
/// for a [Jump](Encoding::Jump) [Instruction]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum JumpTarget {
|
||||
Number(Number),
|
||||
Identifier(Identifier),
|
||||
}
|
||||
|
||||
impl JumpTarget {
|
||||
pub fn word(&self) -> Option<u16> {
|
||||
match self {
|
||||
JumpTarget::Number(n) => Some(u16::from(*n) & 0x3ff),
|
||||
JumpTarget::Identifier(_) => None,
|
||||
}
|
||||
}
|
||||
pub fn squish(value: isize) -> Result<u16, ParseError> {
|
||||
match value {
|
||||
i if i % 2 != 0 => Err(ParseError::JumpedOdd(i))?,
|
||||
i if (-1024..=1022).contains(&(i - 2)) => Ok(((value >> 1) - 1) as u16 & 0x3ff),
|
||||
i => Err(ParseError::JumpedTooFar(i))?,
|
||||
}
|
||||
}
|
||||
pub fn unsquish(value: u16) -> isize { (value as isize + 1) << 1 }
|
||||
}
|
||||
|
||||
impl Parsable for JumpTarget {
|
||||
// - Identifier
|
||||
// - Number
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
// Try to parse a number
|
||||
if let Some(num) = Number::try_parse(p, stream)? {
|
||||
Self::try_from(num)
|
||||
} else {
|
||||
// if that fails, try to parse an identifier instead
|
||||
Ok(Self::Identifier(Identifier::parse(p, stream)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Number> for JumpTarget {
|
||||
type Error = ParseError;
|
||||
fn try_from(value: Number) -> Result<Self, Self::Error> { Ok(Self::Number(Self::squish(value.into())?.into())) }
|
||||
}
|
||||
|
||||
impl Display for JumpTarget {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Number(num) => write!(f, "{:x}", Self::unsquish(u16::from(*num))),
|
||||
Self::Identifier(id) => write!(f, "{id}"),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Number`] represents a 16-bit signed or unsigned word
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Number(isize, u32); // (value, radix)
|
||||
|
||||
impl Parsable for Number {
|
||||
// A number is:
|
||||
// [Minus|Plus]? RadixMarker[Hex|Dec|Oct|Bin]? Number
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
use Type as Ty;
|
||||
// The number is negative when it begins with a Minus, but Plus is also acceptable.
|
||||
let negative = stream.expect_any_of([Ty::Minus, Ty::Plus]).map_or(false, |t| t.is_variant(Ty::Minus));
|
||||
let radix = match stream
|
||||
.expect_any_of([Ty::RadixMarkerHex, Ty::RadixMarkerDec, Ty::RadixMarkerOct, Ty::RadixMarkerBin])
|
||||
.ok()
|
||||
.map(|t| t.variant())
|
||||
{
|
||||
Some(Ty::RadixMarkerHex) => 16,
|
||||
Some(Ty::RadixMarkerDec) => 10,
|
||||
Some(Ty::RadixMarkerOct) => 8,
|
||||
Some(Ty::RadixMarkerBin) => 2,
|
||||
_ => p.radix,
|
||||
};
|
||||
let number = stream.expect(Ty::Number)?;
|
||||
// TODO: Reintroduce error context
|
||||
let number = isize::from_str_radix(number.lexeme(), radix)
|
||||
.map_err(|_| ParseError::UnexpectedDigits(number.lexeme().into(), radix))?
|
||||
* if negative { -1 } else { 1 };
|
||||
// Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit)
|
||||
Ok(Self(
|
||||
if (-0x8000..0x10000).contains(&number) { number } else { Err(ParseError::NumberTooWide(number))? },
|
||||
radix,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<isize> for Number {
|
||||
fn from(value: isize) -> Self { Self(value, 16) }
|
||||
}
|
||||
|
||||
impl From<Number> for isize {
|
||||
fn from(value: Number) -> Self { value.0 as Self }
|
||||
}
|
||||
|
||||
impl From<u16> for Number {
|
||||
fn from(value: u16) -> Self { Self(value as isize, 16) }
|
||||
}
|
||||
|
||||
impl From<Number> for u16 {
|
||||
fn from(value: Number) -> Self { value.0 as Self }
|
||||
}
|
||||
|
||||
impl std::ops::Sub<isize> for Number {
|
||||
type Output = Self;
|
||||
fn sub(mut self, rhs: isize) -> Self::Output {
|
||||
self.0 -= rhs;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shr<usize> for Number {
|
||||
type Output = Self;
|
||||
fn shr(mut self, rhs: usize) -> Self::Output {
|
||||
self.0 >>= rhs;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Number {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self.1 {
|
||||
2 => std::fmt::Binary::fmt(&self.0, f),
|
||||
8 => std::fmt::Octal::fmt(&self.0, f),
|
||||
16 => std::fmt::LowerHex::fmt(&self.0, f),
|
||||
_ => std::fmt::Display::fmt(&self.0, f),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,146 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`PrimaryOperand`] contains the first [`Register`], addressing mode, and Extension
|
||||
//! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [`Instruction`]
|
||||
use super::*;
|
||||
|
||||
/// Contains the first [Register], addressing mode, and Extension Word for a
|
||||
/// [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum PrimaryOperand {
|
||||
Direct(Register),
|
||||
Indirect(Register),
|
||||
PostInc(Register),
|
||||
Indexed(Register, Number),
|
||||
Relative(Identifier),
|
||||
Absolute(Number),
|
||||
Immediate(Number),
|
||||
Four,
|
||||
Eight,
|
||||
Zero,
|
||||
One,
|
||||
Two,
|
||||
MinusOne,
|
||||
}
|
||||
|
||||
impl PrimaryOperand {
|
||||
/// Returns the mode bits
|
||||
pub fn mode(&self) -> u16 {
|
||||
use PrimaryOperand::*;
|
||||
match self {
|
||||
Direct(_) | Zero => 0,
|
||||
Indexed(_, _) | Relative(_) | Absolute(_) | One => 1 << 4,
|
||||
Indirect(_) | Two | Four => 2 << 4,
|
||||
PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4,
|
||||
}
|
||||
}
|
||||
/// Gets the register
|
||||
pub fn register(&self) -> Register {
|
||||
use PrimaryOperand::*;
|
||||
match self {
|
||||
Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r,
|
||||
Immediate(_) | Relative(_) => Register::pc,
|
||||
Absolute(_) | Four | Eight => Register::sr,
|
||||
Zero | One | Two | MinusOne => Register::cg,
|
||||
}
|
||||
}
|
||||
/// Gets the extension word, if present
|
||||
pub fn ext_word(&self) -> Option<u16> {
|
||||
use PrimaryOperand::*;
|
||||
match self {
|
||||
Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parsable for PrimaryOperand {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
// Try parsing as Register (Direct)
|
||||
if let Some(r) = Register::try_parse(p, stream)? {
|
||||
return Ok(Self::Direct(r));
|
||||
}
|
||||
// Try parsing as Number (Indexed)
|
||||
if let Some(idx) = Number::try_parse(p, stream)? {
|
||||
stream.expect(Type::LParen)?;
|
||||
let reg = Register::parse(p, stream)?;
|
||||
stream.expect(Type::RParen)?;
|
||||
return Ok(Self::Indexed(reg, idx));
|
||||
}
|
||||
// Try parsing as Identifier (Relative, label mode)
|
||||
if let Some(id) = Identifier::try_parse(p, stream)? {
|
||||
return Ok(Self::Relative(id));
|
||||
}
|
||||
// Or directly match any of the valid prefix markers
|
||||
// Register, Number, and Identifier are included here to make error messages clearer.
|
||||
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
|
||||
// (a failure condition)
|
||||
let token = stream.expect_any_of([
|
||||
Type::Indirect,
|
||||
Type::Absolute,
|
||||
Type::Immediate,
|
||||
Type::Register,
|
||||
Type::Number,
|
||||
Type::Identifier,
|
||||
])?;
|
||||
Ok(match token.variant() {
|
||||
Type::Indirect => {
|
||||
let reg = Register::parse(p, stream)?;
|
||||
match stream.expect(Type::Plus) {
|
||||
Ok(_) => Self::PostInc(reg),
|
||||
Err(_) => Self::Indirect(reg),
|
||||
}
|
||||
}
|
||||
Type::Absolute => Self::Absolute(Number::parse(p, stream)?),
|
||||
Type::Immediate => {
|
||||
let number = Number::parse(p, stream)?;
|
||||
match number.into() {
|
||||
// There are two representations for the all-ones constant, since Number preserves
|
||||
// signedness.
|
||||
-1_isize | 0xffff => Self::MinusOne,
|
||||
0 => Self::Zero,
|
||||
1 => Self::One,
|
||||
2 => Self::Two,
|
||||
4 => Self::Four,
|
||||
8 => Self::Eight,
|
||||
_ => Self::Immediate(number),
|
||||
}
|
||||
}
|
||||
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SecondaryOperand> for PrimaryOperand {
|
||||
fn from(value: SecondaryOperand) -> Self {
|
||||
match value {
|
||||
SecondaryOperand::Direct(r) => Self::Direct(r),
|
||||
SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n),
|
||||
SecondaryOperand::Absolute(n) => Self::Absolute(n),
|
||||
SecondaryOperand::Relative(id) => Self::Relative(id),
|
||||
SecondaryOperand::Zero => Self::Zero,
|
||||
SecondaryOperand::One => Self::One,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PrimaryOperand {
|
||||
// Turn the operand back into a form which parses into the same type
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Direct(r) => Display::fmt(r, f),
|
||||
Self::Indirect(r) => write!(f, "@{r}"),
|
||||
Self::PostInc(r) => write!(f, "@{r}+"),
|
||||
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
|
||||
Self::Relative(id) => Display::fmt(id, f),
|
||||
Self::Absolute(n) => write!(f, "&{n}"),
|
||||
Self::Immediate(n) => write!(f, "#{n}"),
|
||||
Self::Four => Display::fmt("#4", f),
|
||||
Self::Eight => Display::fmt("#8", f),
|
||||
Self::Zero => Display::fmt("#0", f),
|
||||
Self::One => Display::fmt("#1", f),
|
||||
Self::Two => Display::fmt("#2", f),
|
||||
Self::MinusOne => Display::fmt("#-1", f),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,112 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Register`] represents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Register {
|
||||
/// Program Counter
|
||||
pc,
|
||||
/// Stack Pointer
|
||||
sp,
|
||||
/// Status Register
|
||||
sr,
|
||||
/// Constant Generator
|
||||
cg,
|
||||
r4,
|
||||
r5,
|
||||
r6,
|
||||
r7,
|
||||
r8,
|
||||
r9,
|
||||
r10,
|
||||
r11,
|
||||
r12,
|
||||
r13,
|
||||
r14,
|
||||
r15,
|
||||
}
|
||||
|
||||
impl Parsable for Register {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
stream.expect(Type::Register)?.lexeme().parse()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Register> for u16 {
|
||||
fn from(value: Register) -> Self { value as u16 }
|
||||
}
|
||||
|
||||
impl TryFrom<u16> for Register {
|
||||
type Error = ParseError;
|
||||
fn try_from(value: u16) -> Result<Self, Self::Error> {
|
||||
use Register::*;
|
||||
Ok(match value {
|
||||
0 => pc,
|
||||
1 => sp,
|
||||
2 => sr,
|
||||
3 => cg,
|
||||
4 => r4,
|
||||
5 => r5,
|
||||
6 => r6,
|
||||
7 => r7,
|
||||
8 => r8,
|
||||
9 => r9,
|
||||
10 => r10,
|
||||
11 => r11,
|
||||
12 => r12,
|
||||
13 => r13,
|
||||
14 => r14,
|
||||
15 => r15,
|
||||
_ => return Err(ParseError::RegisterTooHigh(value)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Register {
|
||||
type Err = ParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use Register::*;
|
||||
match s {
|
||||
"pc" => Ok(pc),
|
||||
"sp" => Ok(sp),
|
||||
"sr" => Ok(sr),
|
||||
"cg" => Ok(cg),
|
||||
_ => {
|
||||
str::parse::<u16>(&s[1..]).map_err(|_| -> Self::Err { ParseError::NotARegister(s.into()) })?.try_into()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Register> for &str {
|
||||
fn from(value: Register) -> Self {
|
||||
use Register::*;
|
||||
match value {
|
||||
pc => "pc",
|
||||
sp => "sp",
|
||||
sr => "sr",
|
||||
cg => "cg",
|
||||
r4 => "r4",
|
||||
r5 => "r5",
|
||||
r6 => "r6",
|
||||
r7 => "r7",
|
||||
r8 => "r8",
|
||||
r9 => "r9",
|
||||
r10 => "r10",
|
||||
r11 => "r11",
|
||||
r12 => "r12",
|
||||
r13 => "r13",
|
||||
r14 => "r14",
|
||||
r15 => "r15",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Register {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) }
|
||||
}
|
@ -1,105 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`SecondaryOperand`] contains the second [`Register`], addressing mode, and Extension
|
||||
//! Word for a [two-operand](Encoding::Double) [instruction]
|
||||
use super::*;
|
||||
|
||||
/// The destination of a [Double](Encoding::Double)
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum SecondaryOperand {
|
||||
Direct(Register),
|
||||
Indexed(Register, Number),
|
||||
Relative(Identifier),
|
||||
Absolute(Number),
|
||||
// Joke encodings?
|
||||
Zero,
|
||||
One,
|
||||
}
|
||||
|
||||
use SecondaryOperand as So;
|
||||
|
||||
impl SecondaryOperand {
|
||||
pub fn mode(&self) -> u16 {
|
||||
match self {
|
||||
So::Direct(_) | So::Zero => 0,
|
||||
So::Indexed(_, _) | So::Relative(_) | So::Absolute(_) | So::One => 1 << 7,
|
||||
}
|
||||
}
|
||||
pub fn register(&self) -> Register {
|
||||
use SecondaryOperand::*;
|
||||
match self {
|
||||
Direct(r) | Indexed(r, _) => *r,
|
||||
Relative(_) => Register::pc,
|
||||
Absolute(_) => Register::sr,
|
||||
Zero | One => Register::cg,
|
||||
}
|
||||
}
|
||||
/// This is the only way to have an extension word
|
||||
pub fn ext_word(&self) -> Option<u16> {
|
||||
use SecondaryOperand::*;
|
||||
match self {
|
||||
Indexed(_, w) | Absolute(w) => Some((*w).into()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parsable for SecondaryOperand {
|
||||
// Separator
|
||||
// - Register => Direct
|
||||
// - Number => Indexed
|
||||
// - OpenIdx
|
||||
// - Register
|
||||
// - CloseIdx
|
||||
// - Absolute
|
||||
// - Number
|
||||
// - Immediate
|
||||
// - Number == 0, 1
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
use SecondaryOperand::*;
|
||||
stream.allow(Type::Separator);
|
||||
// Try parsing as Register (Direct)
|
||||
if let Some(r) = Register::try_parse(p, stream)? {
|
||||
return Ok(Self::Direct(r));
|
||||
}
|
||||
// Try parsing as Number (Indexed)
|
||||
if let Some(idx) = Number::try_parse(p, stream)? {
|
||||
stream.expect(Type::LParen)?;
|
||||
let reg = Register::parse(p, stream)?;
|
||||
stream.expect(Type::RParen)?;
|
||||
return Ok(Self::Indexed(reg, idx));
|
||||
}
|
||||
// Try parsing as Identifier (Relative, label mode)
|
||||
if let Some(id) = Identifier::try_parse(p, stream)? {
|
||||
return Ok(Self::Relative(id));
|
||||
}
|
||||
// Register, Number, and Identifier are included here to make error messages clearer.
|
||||
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
|
||||
// (a failure condition) but should not match a token
|
||||
let token =
|
||||
stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number, Type::Identifier])?;
|
||||
Ok(match token.variant() {
|
||||
Type::Absolute => Absolute(Number::parse(p, stream)?),
|
||||
// TODO: Reintroduce error context
|
||||
Type::Immediate => match Number::parse(p, stream)?.into() {
|
||||
0 => Zero,
|
||||
1 => One,
|
||||
n => Err(ParseError::FatSecondaryImmediate(n))?,
|
||||
},
|
||||
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SecondaryOperand {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Direct(r) => Display::fmt(r, f),
|
||||
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
|
||||
Self::Relative(id) => Display::fmt(id, f),
|
||||
Self::Absolute(n) => write!(f, "&{n}"),
|
||||
Self::Zero => Display::fmt("#0", f),
|
||||
Self::One => Display::fmt("#1", f),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Width`] represents whether an instruction operates on whole words or bytes
|
||||
use super::*;
|
||||
|
||||
/// Represents an instruction's operand width.
|
||||
///
|
||||
/// Evaluates to false when instruction takes word-sized operands, or true when
|
||||
/// instruction takes byte-sized operands
|
||||
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Width(bool);
|
||||
|
||||
impl Parsable for Width {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else {
|
||||
return Ok(Self(false));
|
||||
};
|
||||
Ok(Self(token.is_variant(Type::ByteWidth)))
|
||||
}
|
||||
}
|
||||
impl From<Width> for u16 {
|
||||
fn from(value: Width) -> Self { (value.0 as Self) << 6 }
|
||||
}
|
||||
impl From<Width> for bool {
|
||||
fn from(value: Width) -> Self { value.0 }
|
||||
}
|
||||
impl From<bool> for Width {
|
||||
fn from(value: bool) -> Self { Width(value) }
|
||||
}
|
||||
impl std::fmt::Display for Width {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) }
|
||||
}
|
@ -1,261 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [`Opcode`] encodes an msp430 operation
|
||||
use super::*;
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Opcode from the [MSPGCC Manual][1]
|
||||
///
|
||||
/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will
|
||||
/// extract from a [TokenStream] only the required arguments for that call.
|
||||
///
|
||||
/// [1]: https://mspgcc.sourceforge.net/manual/x223.html
|
||||
#[allow(clippy::identity_op)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Opcode {
|
||||
// "Emulated" opcodes
|
||||
Nop,
|
||||
Pop,
|
||||
Br,
|
||||
Ret,
|
||||
Clrc,
|
||||
Setc,
|
||||
Clrz,
|
||||
Setz,
|
||||
Clrn,
|
||||
Setn,
|
||||
Dint,
|
||||
Eint,
|
||||
Rla,
|
||||
Rlc,
|
||||
Inv,
|
||||
Clr,
|
||||
Tst,
|
||||
Dec,
|
||||
Decd,
|
||||
Inc,
|
||||
Incd,
|
||||
Adc,
|
||||
Dadc,
|
||||
Sbc,
|
||||
// Single
|
||||
Rrc = 0x1000 | 0 << 7,
|
||||
Swpb = 0x1000 | 1 << 7,
|
||||
Rra = 0x1000 | 2 << 7,
|
||||
Sxt = 0x1000 | 3 << 7,
|
||||
Push = 0x1000 | 4 << 7,
|
||||
Call = 0x1000 | 5 << 7,
|
||||
Reti = 0x1000 | 6 << 7,
|
||||
// Jump
|
||||
Jnz = 0x2000 | 0 << 10,
|
||||
Jz = 0x2000 | 1 << 10,
|
||||
Jnc = 0x2000 | 2 << 10,
|
||||
Jc = 0x2000 | 3 << 10,
|
||||
Jn = 0x2000 | 4 << 10,
|
||||
Jge = 0x2000 | 5 << 10,
|
||||
Jl = 0x2000 | 6 << 10,
|
||||
Jmp = 0x2000 | 7 << 10,
|
||||
// Double
|
||||
Mov = 0x4000,
|
||||
Add = 0x5000,
|
||||
Addc = 0x6000,
|
||||
Subc = 0x7000,
|
||||
Sub = 0x8000,
|
||||
Cmp = 0x9000,
|
||||
Dadd = 0xa000,
|
||||
Bit = 0xb000,
|
||||
Bic = 0xc000,
|
||||
Bis = 0xd000,
|
||||
Xor = 0xe000,
|
||||
And = 0xf000,
|
||||
}
|
||||
|
||||
impl Opcode {
|
||||
/// Resolve an Opcode into an [Opcode] and an [EncodingParser]
|
||||
pub fn resolve(self) -> (Opcode, EncodingParser) {
|
||||
use super::Encoding as Enc;
|
||||
use Register as Reg;
|
||||
use {PrimaryOperand as Src, SecondaryOperand as Dst};
|
||||
match self {
|
||||
Self::Rrc | Self::Rra | Self::Push => (self, Enc::single().end()),
|
||||
// these instructions do not take a width specifier (though they may still behave properly)
|
||||
Self::Swpb | Self::Sxt | Self::Call => (self, Enc::single().width(false).end()),
|
||||
// `reti` does not take any operands.
|
||||
Self::Reti => (self, Enc::single().operand(Src::Direct(Reg::pc)).end()),
|
||||
Self::Jnz | Self::Jz | Self::Jnc | Self::Jc | Self::Jn | Self::Jge | Self::Jl | Self::Jmp => {
|
||||
(self, Enc::jump().end())
|
||||
}
|
||||
Self::Mov
|
||||
| Self::Add
|
||||
| Self::Addc
|
||||
| Self::Subc
|
||||
| Self::Sub
|
||||
| Self::Cmp
|
||||
| Self::Dadd
|
||||
| Self::Bit
|
||||
| Self::Bic
|
||||
| Self::Bis
|
||||
| Self::Xor
|
||||
| Self::And => (self, Enc::double().end()),
|
||||
Self::Nop => (Self::Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()),
|
||||
Self::Pop => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).end()),
|
||||
Self::Br => (Self::Mov, Enc::double().dst(Dst::Direct(Reg::pc)).end()),
|
||||
Self::Ret => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).dst(Dst::Direct(Reg::pc)).end()),
|
||||
Self::Clrc => (Self::Bic, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Setc => (Self::Bis, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Clrz => (Self::Bic, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Setz => (Self::Bis, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Clrn => (Self::Bic, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Setn => (Self::Bis, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Dint => (Self::Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Eint => (Self::Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Rla => (Self::Add, Enc::reflexive().end()),
|
||||
Self::Rlc => (Self::Addc, Enc::reflexive().end()),
|
||||
Self::Inv => (Self::Xor, Enc::double().src(Src::MinusOne).end()),
|
||||
Self::Clr => (Self::Mov, Enc::double().src(Src::Zero).end()),
|
||||
Self::Tst => (Self::Cmp, Enc::double().src(Src::Zero).end()),
|
||||
Self::Dec => (Self::Sub, Enc::double().src(Src::One).end()),
|
||||
Self::Decd => (Self::Sub, Enc::double().src(Src::Two).end()),
|
||||
Self::Inc => (Self::Add, Enc::double().src(Src::One).end()),
|
||||
Self::Incd => (Self::Add, Enc::double().src(Src::Two).end()),
|
||||
Self::Adc => (Self::Addc, Enc::double().src(Src::Zero).end()),
|
||||
Self::Dadc => (Self::Dadd, Enc::double().src(Src::Zero).end()),
|
||||
Self::Sbc => (Self::Subc, Enc::double().src(Src::Zero).end()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parsable for Opcode {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
// TODO: Reintroduce error context
|
||||
stream.expect(Type::Insn)?.parse()
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Opcode {
|
||||
type Err = ParseError;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
//TODO: Reduce allocations here?
|
||||
let s = s.to_ascii_lowercase();
|
||||
Ok(match s.as_str() {
|
||||
"rrc" => Self::Rrc,
|
||||
"swpb" => Self::Swpb,
|
||||
"rra" => Self::Rra,
|
||||
"sxt" => Self::Sxt,
|
||||
"push" => Self::Push,
|
||||
"call" => Self::Call,
|
||||
"reti" => Self::Reti,
|
||||
|
||||
"jne" | "jnz" => Self::Jnz,
|
||||
"jeq" | "jz" => Self::Jz,
|
||||
"jnc" | "jlo" => Self::Jnc,
|
||||
"jc" | "jhs" => Self::Jc,
|
||||
"jn" => Self::Jn,
|
||||
"jge" => Self::Jge,
|
||||
"jl" => Self::Jl,
|
||||
"jmp" => Self::Jmp,
|
||||
|
||||
"mov" => Self::Mov,
|
||||
"add" => Self::Add,
|
||||
"addc" => Self::Addc,
|
||||
"subc" => Self::Subc,
|
||||
"sub" => Self::Sub,
|
||||
"cmp" => Self::Cmp,
|
||||
"dadd" => Self::Dadd,
|
||||
"bit" => Self::Bit,
|
||||
"bic" => Self::Bic,
|
||||
"bis" => Self::Bis,
|
||||
"xor" => Self::Xor,
|
||||
"and" => Self::And,
|
||||
|
||||
"nop" => Self::Nop,
|
||||
"pop" => Self::Pop,
|
||||
"br" => Self::Br,
|
||||
"ret" => Self::Ret,
|
||||
"clrc" => Self::Clrc,
|
||||
"setc" => Self::Setc,
|
||||
"clrz" => Self::Clrz,
|
||||
"setz" => Self::Setz,
|
||||
"clrn" => Self::Clrn,
|
||||
"setn" => Self::Setn,
|
||||
"dint" => Self::Dint,
|
||||
"eint" => Self::Eint,
|
||||
"rla" => Self::Rla,
|
||||
"rlc" => Self::Rlc,
|
||||
"inv" => Self::Inv,
|
||||
"clr" => Self::Clr,
|
||||
"tst" => Self::Tst,
|
||||
"dec" => Self::Dec,
|
||||
"decd" => Self::Decd,
|
||||
"inc" => Self::Inc,
|
||||
"incd" => Self::Incd,
|
||||
"adc" => Self::Adc,
|
||||
"dadc" => Self::Dadc,
|
||||
"sbc" => Self::Sbc,
|
||||
_ => Err(ParseError::UnrecognizedOpcode(s))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Opcode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
match self {
|
||||
Self::Nop => "nop",
|
||||
Self::Pop => "pop",
|
||||
Self::Br => "br",
|
||||
Self::Ret => "ret",
|
||||
Self::Clrc => "clrc",
|
||||
Self::Setc => "setc",
|
||||
Self::Clrz => "clrz",
|
||||
Self::Setz => "setz",
|
||||
Self::Clrn => "clrn",
|
||||
Self::Setn => "setn",
|
||||
Self::Dint => "dint",
|
||||
Self::Eint => "eint",
|
||||
Self::Rla => "rla",
|
||||
Self::Rlc => "rlc",
|
||||
Self::Inv => "inv",
|
||||
Self::Clr => "clr",
|
||||
Self::Tst => "tst",
|
||||
Self::Dec => "dec",
|
||||
Self::Decd => "decd",
|
||||
Self::Inc => "inc",
|
||||
Self::Incd => "incd",
|
||||
Self::Adc => "adc",
|
||||
Self::Dadc => "dadc",
|
||||
Self::Sbc => "sbc",
|
||||
Self::Rrc => "rrc",
|
||||
Self::Swpb => "swpb",
|
||||
Self::Rra => "rra",
|
||||
Self::Sxt => "sxt",
|
||||
Self::Push => "push",
|
||||
Self::Call => "call",
|
||||
Self::Reti => "reti",
|
||||
Self::Jnz => "jnz",
|
||||
Self::Jz => "jz",
|
||||
Self::Jnc => "jnc",
|
||||
Self::Jc => "jc",
|
||||
Self::Jn => "jn",
|
||||
Self::Jge => "jge",
|
||||
Self::Jl => "jl",
|
||||
Self::Jmp => "jmp",
|
||||
Self::Mov => "mov",
|
||||
Self::Add => "add",
|
||||
Self::Addc => "addc",
|
||||
Self::Subc => "subc",
|
||||
Self::Sub => "sub",
|
||||
Self::Cmp => "cmp",
|
||||
Self::Dadd => "dadd",
|
||||
Self::Bit => "bit",
|
||||
Self::Bic => "bic",
|
||||
Self::Bis => "bis",
|
||||
Self::Xor => "xor",
|
||||
Self::And => "and",
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! The definition of a label
|
||||
use super::*;
|
||||
|
||||
/// The definition of a label
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Label(pub Identifier);
|
||||
|
||||
impl Parsable for Label {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
Ok(Self(Identifier::parse(p, stream).and_then(|t| {
|
||||
stream.require(Type::Label)?;
|
||||
Ok(t)
|
||||
})?))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Label {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) }
|
||||
}
|
@ -1,72 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same
|
||||
//! document line will be treated as if they took up multiple [`Line`s](Line).
|
||||
//!
|
||||
//! A line contains one of:
|
||||
//! - [`Label`]
|
||||
//! - [`Instruction`]
|
||||
//! - [`Directive`]
|
||||
//! - [`Comment`]
|
||||
//! - [Nothing](Line::Empty)
|
||||
use super::*;
|
||||
|
||||
/// A line contains any one of:
|
||||
/// - [`Label`] (definition)
|
||||
/// - [`Instruction`]
|
||||
/// - [`Directive`]
|
||||
/// - [`Comment`]
|
||||
/// - Nothing at all
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Line {
|
||||
Empty,
|
||||
Insn(Instruction),
|
||||
Comment(Comment),
|
||||
Directive(Directive),
|
||||
Label(Label),
|
||||
EndOfFile, // Expected end of file
|
||||
}
|
||||
|
||||
impl Parsable for Line {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
Ok(
|
||||
match stream
|
||||
.peek_expect_any_of([
|
||||
Type::Endl,
|
||||
Type::Insn,
|
||||
Type::Comment,
|
||||
Type::Directive,
|
||||
Type::Identifier,
|
||||
Type::EndOfFile,
|
||||
])?
|
||||
.variant()
|
||||
{
|
||||
Type::Endl => {
|
||||
stream.next();
|
||||
Self::Empty
|
||||
}
|
||||
Type::Insn => Self::Insn(Instruction::parse(p, stream)?),
|
||||
Type::Comment => Self::Comment(Comment::parse(p, stream)?),
|
||||
Type::Directive => Self::Directive(Directive::parse(p, stream)?),
|
||||
Type::Identifier => Self::Label(Label::parse(p, stream)?),
|
||||
Type::EndOfFile => {
|
||||
stream.next();
|
||||
Self::EndOfFile
|
||||
}
|
||||
_ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
impl Display for Line {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Empty => writeln!(f, "\n"),
|
||||
Self::Label(arg0) => Display::fmt(arg0, f),
|
||||
Self::Insn(arg0) => Display::fmt(arg0, f),
|
||||
Self::Directive(arg0) => Display::fmt(arg0, f),
|
||||
Self::Comment(arg0) => Display::fmt(arg0, f),
|
||||
Self::EndOfFile => write!(f, "; End of file."),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,85 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html)
|
||||
use super::*;
|
||||
/// Parses tokens from [stream](TokenStream) into Self node
|
||||
pub trait Parsable {
|
||||
/// Parses tokens from [TokenStream](TokenStream) into Self nodes
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where
|
||||
Self: Sized,
|
||||
T: TokenStream<'text>;
|
||||
|
||||
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
|
||||
///
|
||||
/// Masks failed expectations.
|
||||
fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, ParseError>
|
||||
where
|
||||
Self: Sized,
|
||||
T: TokenStream<'text>,
|
||||
{
|
||||
match Self::parse(p, stream) {
|
||||
Ok(some) => Ok(Some(some)),
|
||||
Err(ParseError::LexError(_)) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_and<'text, T, R>(
|
||||
p: &Parser,
|
||||
stream: &mut T,
|
||||
f: fn(p: &Parser, &mut T) -> R,
|
||||
) -> Result<(Self, R), ParseError>
|
||||
where
|
||||
Self: Sized,
|
||||
T: TokenStream<'text>,
|
||||
{
|
||||
Ok((Self::parse(p, stream)?, f(p, stream)))
|
||||
}
|
||||
|
||||
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
|
||||
///
|
||||
/// Returns [`Self::default()`](Default::default()) on error
|
||||
fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self
|
||||
where
|
||||
Self: Sized + Default,
|
||||
T: TokenStream<'text>,
|
||||
{
|
||||
Self::parse(p, stream).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! parsable_str_types {
|
||||
($($t:ty),*$(,)?) => {$(
|
||||
impl Parsable for $t {
|
||||
fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into())
|
||||
}
|
||||
}
|
||||
)*};
|
||||
}
|
||||
use std::{path::PathBuf, rc::Rc};
|
||||
parsable_str_types![String, Rc<str>, Box<str>, PathBuf];
|
||||
|
||||
/// Vectors of arbitrary parsables are cool
|
||||
impl<P: Parsable> Parsable for Vec<P> {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
// [dead beef]
|
||||
// [A, B,]
|
||||
// [c d e f]
|
||||
// [ something
|
||||
// else ]
|
||||
|
||||
stream.require(Type::LBracket)?;
|
||||
stream.allow(Type::Endl);
|
||||
let mut out = vec![];
|
||||
while let Some(t) = P::try_parse(p, stream)? {
|
||||
out.push(t);
|
||||
stream.allow(Type::Separator);
|
||||
stream.allow(Type::Endl);
|
||||
}
|
||||
stream.require(Type::RBracket)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
// © 2023 John Breaux
|
||||
use super::*;
|
||||
|
||||
/// Contains the entire AST
|
||||
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Root(Option<PathBuf>, Vec<(usize, Line)>);
|
||||
// pub struct Root { pub path: PathBuf, pub lines: Vec<Line> }
|
||||
|
||||
impl Root {
|
||||
pub fn file(&self) -> Option<&Path> { self.0.as_deref() }
|
||||
pub(crate) fn set_file(mut self, path: PathBuf) -> Self {
|
||||
self.0 = Some(path);
|
||||
self
|
||||
}
|
||||
pub fn lines(&self) -> &[(usize, Line)] { &self.1 }
|
||||
}
|
||||
|
||||
impl Parsable for Root {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
let mut lines = vec![];
|
||||
loop {
|
||||
let number = stream.context().line();
|
||||
match Line::parse(p, stream)? {
|
||||
Line::EndOfFile => break,
|
||||
line => lines.push((number, line)),
|
||||
}
|
||||
}
|
||||
Ok(Root(None, lines))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Root {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for (num, line) in &self.1 {
|
||||
f.pad(&format!("{num:3}: {line} "))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Root {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for line in self.0.iter() {
|
||||
Debug::fmt(line, f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
87
src/preprocessor.rs
Normal file
87
src/preprocessor.rs
Normal file
@ -0,0 +1,87 @@
|
||||
// © 2023-2024 John Breaux
|
||||
|
||||
use crate::{
|
||||
lexer::{
|
||||
token::{Token, TokenKind as Kind},
|
||||
Lexer,
|
||||
},
|
||||
util::Span,
|
||||
};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Preprocessor<'t> {
|
||||
lexer: Lexer<'t>,
|
||||
buf: VecDeque<Token<'t>>,
|
||||
defn: HashMap<&'t str, Vec<Token<'t>>>,
|
||||
/// Location for injected tokens
|
||||
pos: Span<usize>,
|
||||
}
|
||||
|
||||
impl<'t> Preprocessor<'t> {
|
||||
pub fn new(text: &'t str) -> Self {
|
||||
Self {
|
||||
lexer: Lexer::new(text),
|
||||
buf: Default::default(),
|
||||
defn: Default::default(),
|
||||
pos: Default::default(),
|
||||
}
|
||||
}
|
||||
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
|
||||
Self { lexer, buf: Default::default(), defn: Default::default(), pos: Default::default() }
|
||||
}
|
||||
pub fn scan(&mut self) -> Option<Token<'t>> {
|
||||
self.buf.pop_front().or_else(|| self.next()).inspect(|t| self.pos = t.pos)
|
||||
}
|
||||
pub fn start(&self) -> usize {
|
||||
self.lexer.location()
|
||||
}
|
||||
/// Grabs a token from the lexer, and attempts to match its lexeme
|
||||
fn next(&mut self) -> Option<Token<'t>> {
|
||||
let token = self.lexer.scan()?;
|
||||
if let Some(tokens) = self.defn.get(token.lexeme) {
|
||||
self.buf.extend(tokens.iter().copied().map(|mut t| {
|
||||
t.pos = self.pos;
|
||||
t
|
||||
}));
|
||||
return self.scan();
|
||||
} else {
|
||||
match token.kind {
|
||||
Kind::Directive => self.directive(token),
|
||||
Kind::Newline => return self.scan(),
|
||||
_ => {}
|
||||
}
|
||||
Some(token)
|
||||
}
|
||||
}
|
||||
/// Passes a token through while parsing a directive
|
||||
fn tee(&mut self) -> Option<Token<'t>> {
|
||||
let token = self.lexer.scan()?;
|
||||
self.buf.push_back(token);
|
||||
// self.buf.push_back(token);
|
||||
Some(token)
|
||||
}
|
||||
/// Parses and executes a directive
|
||||
pub fn directive(&mut self, token: Token<'t>) {
|
||||
if ".define" == token.lexeme {
|
||||
self.define()
|
||||
}
|
||||
}
|
||||
pub fn define(&mut self) {
|
||||
let Some(key) = self.tee() else {
|
||||
return;
|
||||
};
|
||||
let mut value = vec![];
|
||||
while let Some(token) = self.tee() {
|
||||
match token.kind {
|
||||
Kind::Comment => {
|
||||
self.buf.push_back(token);
|
||||
break;
|
||||
}
|
||||
Kind::Newline => break,
|
||||
_ => value.push(token),
|
||||
}
|
||||
}
|
||||
self.defn.insert(key.lexeme, value);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user