v0.3.0: Total overhaul

- Everything has been rewritten
- Modularity is improved somewhat
  - No dependency injection in preprocessor/parser, though
- There are now early and late constant evaluation engines
  - This engine allows for by-value access to already-assembled code
  - Performs basic math operations, remainder, bitwise logic, bit shifts, negation, and bit inversion
  - Also allows for indexing into already-generated code using pointer-arithmetic syntax: `*(&main + 10)`. This is subject to change? It's clunky, and only allows word-aligned access. However, this rewrite is taking far too long, so I'll call the bikeshedding here.
  - Pretty sure this constant evaluation is computationally equivalent to Deadfish?
This commit is contained in:
2024-01-30 05:27:12 -06:00
parent e4a1b889c2
commit fc8f8b9622
44 changed files with 3119 additions and 3055 deletions

View File

@@ -1,197 +1,423 @@
// © 2023 John Breaux
//! Traverses an AST, assembling instructions.
//!
//! [Assembler] carries *some* state
//! Assembles a binary using the given [AST](crate::parser::ast)
use crate::parser::preamble::*;
use error::AssemblyError;
use error::{AResult, ErrorKind::*};
use std::collections::HashMap;
use std::path::Path;
pub mod error;
use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, util::Span};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum IdentType {
Word,
Jump,
}
use self::error::{Error, ErrorKind};
/// Takes in an AST's [Root], and outputs a sequence of bytes
/// Assembles a binary using the given [Assemble]-able item
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Assembler {
out: Vec<u16>,
/// A map from Labels' [Identifier]s to their location in the binary
labels: HashMap<Identifier, usize>,
/// A list of all referenced [Identifier]s in the binary, and their locations
identifiers: Vec<(usize, Identifier, IdentType)>,
pub struct Assembler<'t> {
/// The assembled output
output: Vec<u16>,
/// Table of labels, for backpatching
labels: HashMap<&'t str, usize>,
/// Backpatch table for jump instructions
jump_queue: Vec<(usize, &'t str)>,
/// Backpatch table for immediate values
expr_queue: Vec<(usize, Expr<'t>)>,
/// Base address from .org directives
org_base: usize,
/// Last seen index in input
loc: Span<usize>,
}
impl Assembler {
pub fn assemble(r: &Root) -> Result<Vec<u16>, AssemblyError> {
let mut out = Self::default();
out.visit_root(r)?;
Ok(out.out)
impl<'t> Assembler<'t> {
pub fn new() -> Self {
Default::default()
}
pub fn load(&mut self, r: &Root) -> Result<(), AssemblyError> { self.visit_root(r) }
pub fn out(self) -> Vec<u16> { self.out }
fn last_mut(&mut self) -> Result<&mut u16, AssemblyError> { self.out.last_mut().ok_or(AssemblyError::EmptyBuffer) }
fn push_default(&mut self) -> usize {
self.out.push(Default::default());
self.out.len() - 1
pub fn assemble<T: Assemble<'t>>(&mut self, t: &T) -> AResult<&mut Self> {
t.assemble_in(self)
}
}
impl Assembler {
/// Visits the [Root] node of a parse tree
fn visit_root(&mut self, r: &Root) -> Result<(), AssemblyError> {
// Visit the entire tree
for (num, line) in r.lines() {
self.visit_line(line).map_err(|e| e.ctx(r.file().unwrap_or(Path::new("stdin")), *num))?;
/// Gets the address of a label
pub fn addrof(&self, name: &str) -> Option<u16> {
self.labels.get(name).map(|v| *v as u16)
}
/// Gets the value at a label
pub fn valueof(&self, name: &str) -> Option<u16> {
self.output.get(self.addrof(name)? as usize).copied()
}
fn push(&mut self, word: u16) {
self.output.push(word)
}
fn error(&self, kind: ErrorKind) -> Error {
Error { span: self.loc, kind }
}
/// Backpatches everything, and yoinks the output buffer.
pub fn out(&mut self) -> AResult<Vec<u16>> {
// Resolve jumps
for (idx, key) in &self.jump_queue {
// eprintln!("Patching jump at {idx} with key {key}");
match self.labels.get(key).map(|addr| addr.wrapping_sub(*idx as _) as i16) {
None => Err(self.error(UndefinedLabel(key.to_string())))?,
Some(value @ -0x3ff..=0x3fc) => self.output[*idx] |= (value - 1) as u16 & 0x3ff,
Some(value) => Err(self.error(LongJump(value)))?,
}
}
// Link identifiers
for (idx, id, id_type) in self.identifiers.iter() {
let Some(&num) = self.labels.get(id) else { return Err(AssemblyError::UnresolvedIdentifier(id.clone())) };
let offset = (num as isize - *idx as isize) * 2;
*self.out.get_mut(*idx).expect("idx should be a valid index into out") |= match id_type {
IdentType::Word => offset as u16,
IdentType::Jump => JumpTarget::squish(offset)?,
};
// Resolve immediates through late expression evaluation.
for (idx, expr) in &self.expr_queue {
// eprintln!("Patching immediate at {idx} with expression {expr:?}");
self.output[*idx] = self.eval(expr)?;
}
let out = std::mem::take(&mut self.output);
*self = Default::default();
Ok(out)
}
pub fn add_label(&mut self, label: &'t str) -> AResult<()> {
if *self.labels.entry(label).or_insert(self.output.len()) != self.output.len() {
Err(self.error(RedefinedLabel(label.into())))?
}
Ok(())
}
/// visit a [Line]
fn visit_line(&mut self, line: &Line) -> Result<(), AssemblyError> {
match line {
Line::Insn(insn) => self.visit_instruction(insn),
Line::Label(label) => self.visit_label(label),
Line::Directive(d) => self.visit_directive(d),
_ => Ok(()),
/// Appends an expr as an extword, deferring its calculation for later
pub fn defer_expr(&mut self, e: Expr<'t>) {
self.expr_queue.push((self.output.len(), e));
self.push(0);
}
/// Defers resolution of a jump label until output time
/// The jump label will be later resolved to the NEXT word.
pub fn defer_jump(&mut self, label: &'t str) {
self.jump_queue.push((self.output.len(), label))
}
}
pub trait Assemble<'t> {
fn assemble(&self) -> AResult<Vec<u16>> {
self.assemble_in(&mut Default::default())?.out()
}
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>>;
}
impl<'t> Assemble<'t> for Statements<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
for stmt in &self.stmts {
stmt.assemble_in(a)?;
}
Ok(a)
}
}
impl<'t> Assemble<'t> for Statement<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
Statement::Label(label) => a.add_label(label).map(|_| a),
Statement::Insn(i) => i.assemble_in(a),
Statement::Directive(d) => d.assemble_in(a),
Statement::Comment(_) => Ok(a),
}
}
/// Visits a [Directive]
fn visit_directive(&mut self, node: &Directive) -> Result<(), AssemblyError> {
match node {
Directive::Org(_) => todo!("{node}"),
Directive::Define(..) => (),
Directive::Include(r) => self.visit_root(r)?,
Directive::Byte(word) | Directive::Word(word) => self.out.push((*word).into()),
Directive::Bytes(words) | Directive::Words(words) => {
for word in words {
self.out.push((*word).into());
}
impl<'t> Assemble<'t> for Directive<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
Directive::Define(_) => {}
Directive::Org(base) => a.org_base = a.eval(base)? as usize,
Directive::Word(expr) => a.defer_expr(*expr.clone()),
Directive::Words(exprs) => {
for expr in exprs {
a.defer_expr(expr.clone())
}
}
Directive::String(s) => self.visit_string(s)?,
Directive::Strings(strs) => {
for s in strs {
self.visit_string(s)?;
}
}
};
Ok(())
}
/// Visits a [Label]
fn visit_label(&mut self, node: &Label) -> Result<(), AssemblyError> {
// Register the label
match self.labels.insert(node.0.to_owned(), self.out.len()) {
Some(_) => Err(AssemblyError::RedefinedLabel(node.0.to_owned())),
_ => Ok(()),
}
}
/// Visits an [Instruction]
fn visit_instruction(&mut self, insn: &Instruction) -> Result<(), AssemblyError> {
self.push_default();
self.visit_opcode(insn.opcode())?;
self.visit_encoding(insn.encoding())?;
Ok(())
}
/// Visits an [Opcode]
fn visit_opcode(&mut self, node: &Opcode) -> Result<(), AssemblyError> {
*self.last_mut()? |= *node as u16;
Ok(())
}
/// Visits an [Encoding]
fn visit_encoding(&mut self, node: &Encoding) -> Result<(), AssemblyError> {
*self.last_mut()? |= node.word();
match node {
Encoding::Single { dst, .. } => {
self.visit_primary_operand(dst)?;
}
Encoding::Jump { target } => {
self.visit_jump_target(target)?;
}
Encoding::Double { src, dst, .. } => {
self.visit_primary_operand(src)?;
self.visit_secondary_operand(dst)?;
Directive::String(str) => {
str.assemble_in(a)?;
}
}
Ok(a)
}
}
impl<'t> Assemble<'t> for &'t str {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
for chunk in self.as_bytes().chunks(2) {
match chunk.len() {
0 => a.push(0),
1 => {
a.push(chunk[0] as u16);
return Ok(a);
}
2 => a.push((chunk[1] as u16) << 8 | chunk[0] as u16),
n => unreachable!("expected chunks of length 2, got length {n}"),
}
}
a.push(0);
Ok(a)
}
}
impl<'t> Assemble<'t> for Instruction<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { span, kind } = self;
a.loc = *span;
kind.assemble_in(a)
}
}
impl<'t> Assemble<'t> for InstructionKind<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
match self {
InstructionKind::NoEm(v) => v.assemble_in(a),
InstructionKind::OneEm(v) => v.assemble_in(a),
InstructionKind::OneArg(v) => v.assemble_in(a),
InstructionKind::TwoArg(v) => v.assemble_in(a),
InstructionKind::Jump(v) => v.assemble_in(a),
InstructionKind::Reti(v) => v.assemble_in(a),
InstructionKind::Br(v) => v.assemble_in(a),
}
}
}
impl<'t> Assemble<'t> for NoEm {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
impl<'t> Assemble<'t> for OneEm<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
impl<'t> Assemble<'t> for OneArg<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src } = self;
let (src_reg, src_mode, src_ext) = source(src);
a.push(
0b000100 << 10 | one_arg(*opcode) << 7 | (*width as u16) << 6 | src_mode << 4 | src_reg,
);
if let Some(expr) = src_ext {
a.defer_expr(expr)
}
Ok(a)
}
}
impl<'t> Assemble<'t> for TwoArg<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, width, src, dst } = self;
let (src_reg, src_mode, src_ext) = source(src);
let (dst_reg, dst_mode, dst_ext) = destination(dst);
a.push(
two_arg(*opcode) << 12
| src_reg << 8
| dst_mode << 7
| (*width as u16) << 6
| src_mode << 4
| dst_reg,
);
if let Some(expr) = src_ext {
a.defer_expr(expr)
}
if let Some(expr) = dst_ext {
a.defer_expr(expr)
}
Ok(a)
}
}
impl<'t> Assemble<'t> for Jump<'t> {
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
/// [ 0 0 1 [cond:3] +- [word_offset:10 ] ]
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
let Self { opcode, dst } = self;
let word = 1 << 13
| jump(*opcode) << 10
| match *dst {
JumpDst::Rel(value) if value & 1 == 1 => return Err(a.error(OddJump(value))),
JumpDst::Rel(value) if !(-0x3fe..=0x400).contains(&value) => {
return Err(a.error(LongJump(value)))
}
JumpDst::Rel(value) => (value - 1) as u16 >> 1 & 0x3ff,
JumpDst::Label(label) => {
a.defer_jump(label);
0
}
} & 0x3ff;
a.push(word);
Ok(a)
}
}
impl<'t> Assemble<'t> for Reti {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
a.output.push(0b0001_0011_0000_0000);
Ok(a)
}
}
impl<'t> Assemble<'t> for Br<'t> {
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
eprintln!(
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
);
self.clone().to_canonical().assemble_in(a)
}
}
pub fn one_arg(opcode: token::OneArg) -> u16 {
opcode as u16
}
pub fn two_arg(opcode: token::TwoArg) -> u16 {
opcode as u16 + 4
}
pub fn jump(opcode: token::Jump) -> u16 {
use token::Jump;
match opcode {
Jump::Jne | Jump::Jnz => 0,
Jump::Jeq | Jump::Jz => 1,
Jump::Jnc | Jump::Jlo => 2,
Jump::Jc | Jump::Jhs => 3,
Jump::Jn => 4,
Jump::Jge => 5,
Jump::Jl => 6,
Jump::Jmp => 7,
}
}
/// Returns a tuple of (Reg, AddrMode, extword)
pub fn source<'t>(src: &Src<'t>) -> (u16, u16, Option<Expr<'t>>) {
use SrcSpecial::*;
match src {
Src::Special(Four) => (2, 2, None),
Src::Special(Eight) => (2, 3, None),
Src::Special(Zero) => (3, 0, None),
Src::Special(One) => (3, 1, None),
Src::Special(Two) => (3, 2, None),
Src::Special(NegOne) => (3, 3, None),
Src::Immediate(e) => (0, 3, Some(*e.clone())),
Src::Absolute(e) => (2, 1, Some(*e.clone())),
Src::Direct(r) => (*r as u16, 0, None),
Src::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
Src::Indirect(r) => (*r as u16, 2, None),
Src::PostInc(r) => (*r as u16, 3, None),
Src::BareExpr(e) => (0, 3, Some(*e.clone())),
}
}
/// Returns a tuple of (Reg, AddrMode, Extword)
pub fn destination<'t>(dst: &Dst<'t>) -> (u16, u16, Option<Expr<'t>>) {
use DstSpecial::*;
match dst {
Dst::Special(Zero) => (3, 0, None),
Dst::Special(One) => (3, 1, None),
Dst::Absolute(e) => (2, 1, Some(*e.clone())),
Dst::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
Dst::Direct(r) => (*r as u16, 0, None),
}
}
impl<'t> Assembler<'t> {
/// Evaluates an [Expr] using labels and constants defined in the current program
fn eval(&self, expr: &Expr) -> AResult<u16> {
match expr {
Expr::Binary(head, tails) => {
let mut head = self.eval(head)?;
for (op, tail) in tails {
let tail = self.eval(tail)?;
head = match op {
BinOp::Mul => head.wrapping_mul(tail),
BinOp::Div => head.wrapping_div(tail),
BinOp::Rem => head.wrapping_rem(tail),
BinOp::Add => head.wrapping_add(tail),
BinOp::Sub => head.wrapping_sub(tail),
BinOp::Lsh => head.wrapping_shl(tail as u32),
BinOp::Rsh => head.wrapping_shr(tail as u32),
BinOp::And => head & tail,
BinOp::Xor => head ^ tail,
BinOp::Or => head | tail,
};
}
Ok(head)
}
Expr::Unary(ops, tail) => {
let mut tail = self.eval(tail)?;
for op in ops {
tail = match op {
UnOp::Not => !tail,
UnOp::Neg => 0u16.wrapping_sub(tail),
UnOp::Deref => *self
.output
.get(tail.wrapping_sub(self.org_base as u16) as usize >> 1)
.ok_or_else(|| self.error(OobRead(tail)))?,
}
}
Ok(tail)
}
Expr::Group(e) => self.eval(e),
Expr::Number(n) => Ok(*n),
Expr::Ident(name) => {
self.valueof(name).ok_or_else(|| self.error(UndefinedLabel(name.to_string())))
}
Expr::AddrOf(name) => self
.addrof(name)
.map(|p| (p << 1).wrapping_add(self.org_base as u16))
.ok_or_else(|| self.error(UndefinedLabel(name.to_string()))),
}
}
}
pub mod error {
use std::fmt::Display;
use crate::util::Span;
pub type AResult<T> = Result<T, Error>;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Error {
pub span: Span<usize>,
pub kind: ErrorKind,
}
impl std::error::Error for Error {}
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum ErrorKind {
#[default]
Todo,
/// A label was used, but not defined
UndefinedLabel(String),
RedefinedLabel(String),
OobRead(u16),
OddJump(i16),
LongJump(i16),
/// A plethora of [Error]s
Errors(Vec<Error>),
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { kind, span } = self;
write!(f, "[{span}]: ")?;
write!(f, "Error: {kind}")
}
}
impl Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorKind::Todo => write!(f, "Not yet implemented"),
ErrorKind::UndefinedLabel(label) => write!(f, "Label '{label}' not defined"),
ErrorKind::RedefinedLabel(label) => write!(f, "Label '{label}' already defined"),
ErrorKind::OobRead(addr) => {
write!(f, "Out of bounds read in constant expression: {addr}")
}
ErrorKind::OddJump(to) => write!(f, "Cannot jump to odd location: {to}"),
ErrorKind::LongJump(to) => {
write!(f, "Jump target ({to}) outside of range -0x400..=0x3fe")
}
ErrorKind::Errors(errors) => {
writeln!(f, "Could not complete assembly:")?;
for error in errors {
writeln!(f, "{error}")?;
}
Ok(())
}
}
}
Ok(())
}
/// Visits a [JumpTarget]
fn visit_jump_target(&mut self, node: &JumpTarget) -> Result<(), AssemblyError> {
match node {
JumpTarget::Number(num) => self.visit_number(num),
JumpTarget::Identifier(id) => {
self.visit_identifier(id, self.out.len() - 1, IdentType::Jump)?;
Ok(())
}
}
}
/// Visits a [SecondaryOperand]
fn visit_secondary_operand(&mut self, node: &SecondaryOperand) -> Result<(), AssemblyError> {
use SecondaryOperand as O;
if let O::Indexed(_, num) | O::Absolute(num) = node {
self.push_default();
self.visit_number(num)?;
}
Ok(())
}
/// Visits a [PrimaryOperand]
fn visit_primary_operand(&mut self, node: &PrimaryOperand) -> Result<(), AssemblyError> {
use PrimaryOperand as O;
match node {
O::Indexed(_, num) | O::Absolute(num) | O::Immediate(num) => {
self.push_default();
self.visit_number(num)?;
}
O::Relative(id) => {
let addr = self.push_default();
self.visit_identifier(id, addr, IdentType::Word)?;
}
_ => (),
}
Ok(())
}
/// Visits a number and writes it into the last index
fn visit_number(&mut self, node: &Number) -> Result<(), AssemblyError> {
*self.last_mut()? |= u16::from(*node);
Ok(())
}
/// Visits a number and appends it to the output buffer
fn visit_string(&mut self, node: &str) -> Result<(), AssemblyError> {
for (idx, byte) in node.bytes().chain([0u8].into_iter()).enumerate() {
if idx % 2 == 0 {
self.push_default();
}
*self.last_mut()? |= (byte as u16) << (8 * (idx % 2));
}
Ok(())
}
/// Visits an [Identifier], and registers it to the identifier list
fn visit_identifier(&mut self, node: &Identifier, addr: usize, ty: IdentType) -> Result<(), AssemblyError> {
self.identifiers.push((addr, node.clone(), ty));
Ok(())
}
}

View File

@@ -1,56 +0,0 @@
// © 2023 John Breauxs
use crate::parser::{error::ParseError, preamble::*};
use std::{
fmt::Display,
path::{Path, PathBuf},
};
#[derive(Debug)]
pub enum AssemblyError {
UnresolvedIdentifier(Identifier),
RedefinedLabel(Identifier),
JumpedTooFar(Identifier, isize),
ParseError(ParseError),
// TODO: This, better'
Context(Box<AssemblyError>, PathBuf, usize),
EmptyBuffer,
}
impl AssemblyError {
pub(super) fn ctx<P: AsRef<Path> + ?Sized>(self, file: &P, line: usize) -> Self {
Self::Context(self.into(), file.as_ref().into(), line)
}
}
impl From<ParseError> for AssemblyError {
fn from(value: ParseError) -> Self { Self::ParseError(value) }
}
impl Display for AssemblyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnresolvedIdentifier(id) => {
write!(f, "Identifier {id} is undefined, but referenced anyway.")
}
Self::RedefinedLabel(id) => {
write!(f, "Redefined label '{id}'.")
}
Self::JumpedTooFar(id, num) => {
write!(f, "Label '{id}' is too far away. ({num} is outside range -0x400..=0x3fe)")
}
Self::ParseError(e) => Display::fmt(e, f),
Self::Context(e, file, line) => write!(f, "{}:{line}:\n\t{e}", file.display()),
Self::EmptyBuffer => Display::fmt("Tried to get last element of output buffer, but buffer was empty", f),
}
}
}
impl std::error::Error for AssemblyError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::ParseError(e) => Some(e),
Self::Context(e, ..) => Some(e),
_ => None,
}
}
}

View File

@@ -1,49 +0,0 @@
// © 2023 John Breauxs
//! Common error type for [msp430-asm](crate) errors
use super::*;
use std::fmt::Display;
#[derive(Debug)]
pub enum Error {
/// Produced by [lexer]
LexError(lexer::error::LexError),
/// Produced by [parser]
ParseError(parser::error::ParseError),
/// Produced by [assembler]
AssemblyError(assembler::error::AssemblyError),
}
impl Error {}
impl From<lexer::error::LexError> for Error {
fn from(value: lexer::error::LexError) -> Self { Self::LexError(value) }
}
impl From<parser::error::ParseError> for Error {
fn from(value: parser::error::ParseError) -> Self { Self::ParseError(value) }
}
impl From<assembler::error::AssemblyError> for Error {
fn from(value: assembler::error::AssemblyError) -> Self { Self::AssemblyError(value) }
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::LexError(e) => Display::fmt(e, f),
Error::ParseError(e) => Display::fmt(e, f),
Error::AssemblyError(e) => Display::fmt(e, f),
}
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::LexError(e) => Some(e),
Error::ParseError(e) => Some(e),
Error::AssemblyError(e) => Some(e),
}
}
}

View File

@@ -1,22 +0,0 @@
// © 2023 John Breaux
//! Convenience functions and traits for dealing with hashable data
pub type Hash = u64;
/// Calculates a hash using Rust hashmap's default hasher.
pub fn hash<T: std::hash::Hash>(hashable: T) -> Hash {
use std::hash::Hasher;
let mut hasher = std::collections::hash_map::DefaultHasher::new();
hashable.hash(&mut hasher);
hasher.finish()
}
pub trait FromHash: From<Hash> {
/// Hashes anything that implements [type@Hash] using the
/// [DefaultHasher](std::collections::hash_map::DefaultHasher)
fn hash<T: std::hash::Hash>(hashable: T) -> Hash { hash(hashable) }
fn from_hash<T: std::hash::Hash>(hashable: T) -> Self
where Self: Sized {
Self::from(Self::hash(hashable))
}
}
impl<T: From<Hash>> FromHash for T {}

View File

@@ -1,69 +1,268 @@
// © 2023 John Breaux
//! Iterates over [`&str`](str), producing [`Token`s](Token)
// © 2023-2024 John Breaux
//! The [Lexer] turns a [sequence of characters](str) into a stream of
//! [lexically-tagged tokens](token)
pub mod context;
pub mod error;
pub mod ignore;
pub mod preprocessed;
pub mod token;
pub mod token_stream;
use context::Context;
use error::LexError;
use token::{Token, Type};
use token_stream::TokenStream;
use self::token::{Special, TokenKind, *};
use crate::util::Span;
use std::{
iter::Peekable,
str::{CharIndices, FromStr},
};
use unicode_ident::*;
/// Iterates over &[str], producing [Token]s
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Tokenizer<'t> {
const DEFAULT_BASE: u32 = 10;
/// Turns a [sequence of characters](str) into a stream of [lexically identified tokens](token).
///
/// # Examples
/// ```rust
/// # use libmsp430::lexer::{Lexer, token::*};
/// let text = "mov r14, r15";
/// let mut lexer = Lexer::new(text);
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::TwoArg(TwoArg::Mov));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R14));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Comma);
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R15));
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Eof);
/// ```
#[derive(Clone, Debug)]
pub struct Lexer<'t> {
/// Keeps track of the byte offset into the string
iter: Peekable<CharIndices<'t>>,
text: &'t str,
idx: usize,
context: Context,
start: usize,
index: usize,
}
impl<'t> Tokenizer<'t> {
/// Produces a new [Tokenizer] from a [str]ing slice
pub fn new<T>(text: &'t T) -> Self
where T: AsRef<str> + ?Sized {
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
impl<'t> Lexer<'t> {
/// Creates a new [Lexer] over some [text](str)
pub fn new(text: &'t str) -> Self {
Self { iter: text.char_indices().peekable(), text, start: 0, index: 0 }
}
fn count(&mut self, token: &Token) {
// update the context
self.context.count(token);
// advance the index
self.idx += token.len();
/// Gets the current byte-position
pub fn location(&self) -> usize {
self.start
}
}
impl<'text> Iterator for Tokenizer<'text> {
type Item = Token<'text>;
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.text.len() {
return None;
/// Internal: Emits a token with the provided [TokenKind], providing its extents.
fn emit(&mut self, kind: TokenKind) -> Option<Token<'t>> {
let out =
Some(Token::new(self.next_lexeme(), kind, Span { start: self.start, end: self.index }));
self.start = self.index;
out
}
fn next_lexeme(&self) -> &'t str {
&self.text[self.start..self.index]
}
fn repeat(&mut self, f: impl Fn(char) -> bool) -> &mut Self {
while let Some(&c) = self.peek() {
if !f(c) {
break;
}
self.next();
}
let token = Token::from(&self.text[self.idx..]);
// Process [Type::Directive]s
// Count the token
self.count(&token);
Some(token)
self
}
fn space(&mut self) -> Option<&mut Self> {
while self.peek()?.is_whitespace() && *self.peek()? != '\n' {
self.next();
}
self.start = self.index;
Some(self)
}
/// Consumes a [char] without checking, for ergonomic chaining
fn then(&mut self) -> &mut Self {
self.next();
self
}
fn peek(&mut self) -> Option<&char> {
self.iter.peek().map(|(_, c)| c)
}
fn next(&mut self) -> Option<char> {
let (index, c) = self.iter.next()?;
self.index = index + c.len_utf8();
Some(c)
}
/// Scans for the next [Token] in the stream
pub fn scan(&mut self) -> Option<Token<'t>> {
if self.space().is_none() {
return self.emit(TokenKind::Eof);
}
let Some(c) = self.peek() else {
return self.emit(TokenKind::Eof);
};
match c {
'\n' => self.then().emit(TokenKind::Newline),
'!' => self.then().emit(TokenKind::Bang),
'#' => self.then().emit(TokenKind::Hash),
'$' => self.then().emit(TokenKind::Dollar),
'%' => self.then().emit(TokenKind::Percent),
'&' => self.then().emit(TokenKind::Amp),
'\'' => self.then().char(),
'"' => self.then().string(),
'(' => self.then().emit(TokenKind::OpenParen),
')' => self.then().emit(TokenKind::CloseParen),
'*' => self.then().emit(TokenKind::Star),
'+' => self.then().emit(TokenKind::Plus),
',' => self.then().emit(TokenKind::Comma),
'-' => self.then().emit(TokenKind::Minus),
'.' => self.then().directive_or_bw(),
'/' => self.then().comment_or_slash(),
'0' => self.then().number_with_base(),
':' => self.then().emit(TokenKind::Colon),
';' => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
'<' => self.then().less(),
'>' => self.then().greater(),
'@' => self.then().emit(TokenKind::At),
'[' => self.then().emit(TokenKind::OpenBrace),
']' => self.then().emit(TokenKind::CloseBrace),
'^' => self.then().emit(TokenKind::Caret),
'_' => self.then().identifier(),
'{' => self.then().emit(TokenKind::OpenCurly),
'|' => self.then().emit(TokenKind::Bar),
'}' => self.then().emit(TokenKind::CloseCurly),
c if c.is_numeric() => self.number::<DEFAULT_BASE>(),
&c if is_xid_start(c) => self.then().identifier(),
c => todo!("Unrecognized character: {c}"),
}
}
fn number_with_base(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('x') => self.then().number::<16>(),
Some('d') => self.then().number::<10>(),
Some('o') => self.then().number::<8>(),
Some('b') => self.then().number::<2>(),
Some(c) if c.is_ascii_digit() => self.number::<DEFAULT_BASE>(),
_ => self.emit(TokenKind::Number(0, 10)),
}
}
fn number<const B: u32>(&mut self) -> Option<Token<'t>> {
let mut num = self.digit::<B>()?;
while let Some(digit) = self.digit::<B>() {
num = num * B + digit;
}
if num > u16::MAX as u32 {
None
} else {
self.emit(TokenKind::Number(num as u16, B as u8))
}
}
fn digit<const B: u32>(&mut self) -> Option<u32> {
let digit = self.peek()?.to_digit(B)?;
self.then();
Some(digit)
}
fn comment_or_slash(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('/') => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
_ => self.emit(TokenKind::Slash),
}
}
fn less(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('<') => self.then().emit(TokenKind::Lsh),
_ => todo!("less"),
}
}
fn greater(&mut self) -> Option<Token<'t>> {
match self.peek() {
Some('>') => self.then().emit(TokenKind::Lsh),
_ => todo!("greater"),
}
}
fn identifier(&mut self) -> Option<Token<'t>> {
while let Some(c) = self.then().peek() {
if !is_xid_continue(*c) {
break;
}
}
let lexeme = self.next_lexeme();
if let Ok(op) = Reg::from_str(lexeme) {
self.emit(TokenKind::Reg(op))
} else if let Ok(op) = NoEm::from_str(lexeme) {
self.emit(TokenKind::NoEm(op))
} else if let Ok(op) = OneEm::from_str(lexeme) {
self.emit(TokenKind::OneEm(op))
} else if let Ok(op) = Special::from_str(lexeme) {
self.emit(TokenKind::Special(op))
} else if let Ok(op) = OneArg::from_str(lexeme) {
self.emit(TokenKind::OneArg(op))
} else if let Ok(op) = TwoArg::from_str(lexeme) {
self.emit(TokenKind::TwoArg(op))
} else if let Ok(op) = Jump::from_str(lexeme) {
self.emit(TokenKind::Jump(op))
} else {
self.emit(TokenKind::Identifier)
}
}
fn directive_or_bw(&mut self) -> Option<Token<'t>> {
while let Some(c) = self.then().peek() {
if !is_xid_continue(*c) {
break;
}
}
match self.next_lexeme() {
".b" => self.emit(TokenKind::Byte),
".w" => self.emit(TokenKind::Word),
_ => self.emit(TokenKind::Directive),
}
}
/// Todo: Character unescaping in Lexer::string
fn string(&mut self) -> Option<Token<'t>> {
while '"' != self.next()? {}
self.emit(TokenKind::String)
}
fn char(&mut self) -> Option<Token<'t>> {
let out = self.unescape()?;
self.next().filter(|c| *c == '\'').and_then(|_| self.emit(TokenKind::Char(out)))
}
/// Unescape a single character
fn unescape(&mut self) -> Option<char> {
match self.next() {
Some('\\') => (),
other => return other,
}
Some(match self.next()? {
'a' => '\x07',
'b' => '\x08',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'x' => self.hex_escape()?,
'u' => self.unicode_escape()?,
'0' => '\0',
chr => chr,
})
}
/// unescape a single 2-digit hex escape
fn hex_escape(&mut self) -> Option<char> {
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
char::from_u32(out) //.ok_or(Error::bad_unicode(out, self.line(), self.col()))
}
/// unescape a single \u{} unicode escape
fn unicode_escape(&mut self) -> Option<char> {
let mut out = 0;
let Some('{') = self.peek() else {
return None; //Err(Error::invalid_escape('u', self.line(), self.col()));
};
self.then();
while let Some(c) = self.peek() {
match c {
'}' => {
self.then();
return char::from_u32(out); //.ok_or(Error::bad_unicode(out, self.line(), self.col()));
}
_ => out = (out << 4) + self.digit::<16>()?,
}
}
None //Err(Error::invalid_escape('u', self.line(), self.col()))
}
}
impl<'text> TokenStream<'text> for Tokenizer<'text> {
fn context(&self) -> Context { self.context }
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
// registers are valid identifiers.
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
self.count(&token);
Ok(token)
}
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))
}
}
#[cfg(test)]
mod tests;

View File

@@ -1,38 +0,0 @@
// © 2023 John Breaux
//! A [Context] stores contextual information about the current tokenizer state
//!
//! This data is trivially copyable and can be provided in error messages using the
//! [Error::Contextual] specialization)
use super::*;
/// Stores contextual information about the current tokenizer state, useful for printing errors
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Context {
line: usize,
position: usize,
tokens: usize,
}
impl Context {
pub fn new() -> Self { Default::default() }
pub fn line(&self) -> usize { self.line }
pub fn tokens(&self) -> usize { self.tokens }
pub fn position(&self) -> usize { self.position }
pub(super) fn count(&mut self, t: &Token) {
match t.variant() {
Type::EndOfFile => return,
Type::Endl => {
self.line += 1;
self.position = 1;
}
_ => self.position += t.len(),
}
self.tokens += 1;
}
}
impl Default for Context {
fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } }
}
impl std::fmt::Display for Context {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) }
}

View File

@@ -1,68 +0,0 @@
// © 2023 John Breauxs
use super::{
context::Context,
token::{OwnedToken, *},
};
use std::fmt::Display;
#[derive(Debug)]
pub enum LexError {
/// Any other error, tagged with [Context]. Created by [`Error::context()`]
Contextual(Context, Box<Self>),
/// Produced by [Token] when the input is entirely unexpected.
UnexpectedSymbol(String),
/// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type]
UnexpectedToken { expected: Type, got: OwnedToken },
/// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the
/// expected [Types](Type)
AllExpectationsFailed { expected: Types, got: OwnedToken },
}
impl LexError {
pub fn context(self, c: Context) -> Self {
match self {
Self::Contextual(..) => self,
_ => Self::Contextual(c, Box::new(self)),
}
}
// Extracts the root of the error tree
pub fn bare(self) -> Self {
match self {
Self::Contextual(_, bare) => bare.bare(),
_ => self,
}
}
pub fn expected<E: AsRef<[Type]>, T: Into<OwnedToken>>(expected: E, got: T) -> Self {
match expected.as_ref().len() {
1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() },
_ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() },
}
}
pub fn mask_expectation(mut self, expected: Type) -> Self {
match self {
LexError::UnexpectedToken { got, .. } => self = LexError::UnexpectedToken { expected, got },
LexError::AllExpectationsFailed { got, .. } => self = LexError::UnexpectedToken { expected, got },
LexError::Contextual(context, err) => {
self = LexError::Contextual(context, Box::new(err.mask_expectation(expected)))
}
_ => (),
}
self
}
}
impl Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexError::Contextual(ctx, error) => write!(f, "{ctx}: {error}"),
LexError::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""),
LexError::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."),
LexError::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."),
}
}
}
impl std::error::Error for LexError {}

View File

@@ -1,55 +0,0 @@
// © 2023 John Breaux
//! Removes a single [kind](Type) of [`Token`] from a [`TokenStream`]
use super::*;
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Ignore<'t, T>
where T: TokenStream<'t>
{
ignore: Type,
inner: &'t mut T,
}
impl<'t, T> Ignore<'t, T>
where T: TokenStream<'t>
{
/// Creates a new [Ignore], which ignores the [ignore Type](Type)
pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } }
/// Gets a mutable reference to the inner [Iterator]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
}
impl<'t, T> Iterator for Ignore<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.inner.next()?;
// Space tokens are greedy, so the next token shouldn't be a Space
match next.variant() {
Type::Space => self.next(),
_ => Some(next),
}
}
}
impl<'t, T> TokenStream<'t> for Ignore<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
self.inner.allow(self.ignore);
self.inner.expect(expected)
}
fn peek(&mut self) -> Self::Item {
self.inner.allow(self.ignore);
self.inner.peek()
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
self.inner.allow(self.ignore);
self.inner.peek_expect(expected)
}
}

View File

@@ -1,174 +0,0 @@
// © 2023 John Breaux
//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band
//! ".define" rules
use super::*;
use std::collections::{HashMap, VecDeque};
// TODO: Clean this spaghetti mess up
/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define"
/// rules
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(PartialEq, Eq)]
pub struct Preprocessed<'t, T>
where T: TokenStream<'t>
{
sub_table: HashMap<Token<'t>, Vec<Token<'t>>>,
sub_types: Vec<Type>,
queue: VecDeque<Token<'t>>,
inner: &'t mut T,
}
impl<'t, T> Iterator for Preprocessed<'t, T>
where T: TokenStream<'t>
{
type Item = Token<'t>;
fn next(&mut self) -> Option<Self::Item> {
match self.queue.pop_front() {
Some(token) => Some(token),
None => {
let next = self.inner.next()?;
if let Some(subs) = self.sub_table.get(&next) {
self.queue.extend(subs);
return self.next();
}
Some(next)
}
}
}
}
impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> {
/// Creates a new [Preprocessed] [TokenStream]
pub fn new(inner: &'t mut T) -> Self {
Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner }
}
/// Gets a mutable reference to the inner [TokenStream]
pub fn inner_mut(&mut self) -> &mut T { self.inner }
/// Preserve the next token in the queue
fn enqueue(&mut self, token: Token<'t>) -> Token<'t> {
self.queue.push_back(token);
token
}
/// Process .define directives in the preprocessor
fn define(&mut self, token: Token<'t>) -> Result<(), LexError> {
if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) {
return Ok(());
}
// Tokenize the subdocument
self.allow(Type::Directive);
self.allow(Type::Space);
let Some(k) = self.inner.next() else { return Ok(()) };
if !self.sub_types.contains(&k.variant()) {
self.sub_types.push(k.variant());
};
self.allow(Type::Space);
let mut replacement = vec![];
loop {
match self.inner.peek().variant() {
Type::Endl | Type::EndOfFile => break,
Type::Comment | Type::Space => {
// ignore comments
self.inner.next();
}
_ => {
let next = self.inner.next().unwrap();
replacement.push(self.enqueue(next));
}
}
}
self.sub_table.insert(k, replacement);
Ok(())
}
/// Does the preprocessing step
fn preprocess(&mut self, token: Token<'t>) {
if let Some(subs) = self.sub_table.get(&token) {
self.queue.extend(subs);
self.inner.next();
}
}
}
impl<'t, T> TokenStream<'t> for Preprocessed<'t, T>
where T: TokenStream<'t>
{
fn context(&self) -> Context { self.inner.context() }
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
match self.queue.front() {
Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()),
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
None => {
// Only resolve defines when expecting, otherwise you'll run into issues.
if let Ok(next) = self.inner.expect(expected) {
self.define(next)?;
return Ok(next);
}
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
if let Some(subs) = self.sub_table.get(&next) {
self.inner.allow_any_of(&self.sub_types);
self.queue.extend(subs);
}
return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) };
}
Err(LexError::expected([expected], self.inner.peek()).context(self.context()))
}
}
}
fn peek(&mut self) -> Self::Item {
match self.queue.front() {
Some(token) => *token,
None => {
// Only allow substitution when the next token is unexpected
let old = self.inner.peek();
self.preprocess(old);
match self.queue.front() {
Some(&new) => new,
None => old,
}
}
}
}
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
match self.queue.front() {
Some(&token) if token.is_variant(expected) => Ok(token),
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
None => {
if let Ok(next) = self.inner.peek_expect(expected) {
return Ok(next);
}
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
self.preprocess(next);
return if self.queue.is_empty() {
self.inner.peek_expect(expected)
} else {
self.peek_expect(expected)
};
}
Err(LexError::expected([expected], self.inner.peek()))
}
}
}
}
impl<'t, T> std::fmt::Debug for Preprocessed<'t, T>
where T: TokenStream<'t>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Preprocessed")
.field("sub_table", &self.sub_table)
.field("sub_types", &self.sub_types)
.field("queue", &self.queue)
.field("context", &self.context())
.finish_non_exhaustive()
}
}

66
src/lexer/tests.rs Normal file
View File

@@ -0,0 +1,66 @@
use super::*;
macro_rules! lex {
(type ($t:tt), $expected:expr) => {
let token = Lexer::new(stringify!($t)).scan().expect(stringify!($t:tt should yield a valid token));
assert_eq!(token.kind, $expected);
};
({ $($t:tt)* }) => {
Lexer::new(stringify!($($t)*))
};
}
#[test]
fn ascii_char() {
lex!(type ('A'), TokenKind::Char('A')); // 'A' should be a valid char
lex!(type ('\x1b'), TokenKind::Char('\x1b')); // '\\x1b' should be a valid char
}
#[test]
fn unicode_escape_char() {
lex!(type ('\u{1f988}'), TokenKind::Char('🦈')); // '\\u{1f988}' should be a valid 🦈
}
#[test]
fn number_with_base() {
lex!(type (0), TokenKind::Number(0, 10)); // 0 should be a 16-bit base-10 number
lex!(type (42069), TokenKind::Number(42069, 10)); // 42069 should be a 16-bit base-10 number
lex!(type (0x420), TokenKind::Number(0x420, 16)); // 0x420 should be a 16-bit base-16 number
lex!(type (0d100), TokenKind::Number(100, 10)); // 0d100 should be a 16-bit base-10 number
lex!(type (0o100), TokenKind::Number(64, 8)); // 0o100 should be a 16-bit base-8 number
lex!(type (0b100), TokenKind::Number(4, 2)); // 0b100 should be a 16-bit base-8 number
}
#[test]
fn no_operand_emulated() {
lex!(type (nop), TokenKind::NoEm(NoEm::Nop)); // nop should be a valid NoEm
lex!(type (ret), TokenKind::NoEm(NoEm::Ret)); // ret should be a valid NoEm
lex!(type (clrc), TokenKind::NoEm(NoEm::Clrc)); // clrc should be a valid NoEm
lex!(type (clrz), TokenKind::NoEm(NoEm::Clrz)); // clrz should be a valid NoEm
lex!(type (clrn), TokenKind::NoEm(NoEm::Clrn)); // clrn should be a valid NoEm
lex!(type (setc), TokenKind::NoEm(NoEm::Setc)); // setc should be a valid NoEm
lex!(type (setz), TokenKind::NoEm(NoEm::Setz)); // setz should be a valid NoEm
lex!(type (setn), TokenKind::NoEm(NoEm::Setn)); // setn should be a valid NoEm
lex!(type (dint), TokenKind::NoEm(NoEm::Dint)); // dint should be a valid NoEm
lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm
}
#[test]
fn registers() {
lex!(type(pc), TokenKind::Reg(Reg::PC));
lex!(type(sp), TokenKind::Reg(Reg::SP));
lex!(type(sr), TokenKind::Reg(Reg::SR));
lex!(type(cg), TokenKind::Reg(Reg::CG));
lex!(type(r0), TokenKind::Reg(Reg::PC));
lex!(type(r1), TokenKind::Reg(Reg::SP));
lex!(type(r2), TokenKind::Reg(Reg::SR));
lex!(type(r3), TokenKind::Reg(Reg::CG));
lex!(type(r4), TokenKind::Reg(Reg::R4));
lex!(type(r5), TokenKind::Reg(Reg::R5));
lex!(type(r6), TokenKind::Reg(Reg::R6));
lex!(type(r7), TokenKind::Reg(Reg::R7));
lex!(type(r8), TokenKind::Reg(Reg::R8));
lex!(type(r9), TokenKind::Reg(Reg::R9));
lex!(type(r10), TokenKind::Reg(Reg::R10));
lex!(type(r11), TokenKind::Reg(Reg::R11));
lex!(type(r12), TokenKind::Reg(Reg::R12));
lex!(type(r13), TokenKind::Reg(Reg::R13));
lex!(type(r14), TokenKind::Reg(Reg::R14));
lex!(type(r15), TokenKind::Reg(Reg::R15));
}
// TODO: opcode tests, misc. special character tests, etc.

View File

@@ -1,335 +1,479 @@
// © 2023 John Breaux
//! A [Token] is a [semantically tagged](Type) sequence of characters.
// © 2023-2024 John Breaux
//! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span]
//!
//! Token, and the tokenizer, intend to copy as little as possible.
use super::error::LexError;
use regex::Regex;
use std::{
fmt::{Debug, Display},
sync::OnceLock,
};
/// Implements regex matching functions on [`Token`] for each [`Type`],
/// and implements [`From<&str>`] for [`Token`]
macro_rules! regex_impl {
(<$t:lifetime> $type:ty {$(
$(#[$meta:meta])*
pub fn $func:ident (text: &str) -> Option<Self> {
regex!($out:path = $re:literal)
//! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`)
use crate::util::Span;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'t> {
pub lexeme: &'t str,
pub kind: TokenKind,
pub pos: Span<usize>,
}
impl<'t> Token<'t> {
pub fn new(lexeme: &'t str, kind: TokenKind, pos: Span<usize>) -> Self {
Self { lexeme, kind, pos }
}
)*}) => {
impl<$t> $type {
/// Lexes a token only for the expected `variant`
///
/// Warning: This bypasses precedence rules. Only use for specific patterns.
pub fn expect(text: &$t str, expected: Type) -> Result<Self, LexError> {
match expected {$(
$out => Self::$func(text),
)*}.ok_or(LexError::UnexpectedToken {
expected,
got: Self::from(text).into(),
})
}
$(
$(#[$meta])*
/// Tries to read [`
#[doc = stringify!($out)]
/// `] from `text`
pub fn $func(text: &$t str) -> Option<Self> {
static RE: OnceLock<Regex> = OnceLock::new();
let lexeme = RE.get_or_init(|| Regex::new($re).unwrap())
.find(text)?.into();
Some(Self { variant: $out, lexeme })
})*
}
impl<$t> From<&$t str> for $type {
fn from (value: &$t str) -> Self {
$(
if let Some(token) = Self::$func(value) {
token
} else
)*
{todo!("Unexpected input: {value:#?} (Tokenization failure)")}
}
}
};
}
/// A [Token] is a [semantically tagged](Type) sequence of characters
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'text> {
/// The type of this token
variant: Type,
/// The sub[str]ing corresponding to this token
lexeme: &'text str,
}
impl<'text> Token<'text> {
/// Returns the [Type] of this [Token]
pub fn variant(&self) -> Type { self.variant }
/// Returns the lexeme (originating string slice) of this token
pub fn lexeme(&self) -> &'text str { self.lexeme }
/// Parses this [Token] into another type
pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
where F: std::str::FromStr {
self.lexeme.parse()
}
/// Returns whether the Lexeme is the expected [Type]
pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected }
/// Returns the length of [Self::lexeme] in bytes.
pub fn len(&self) -> usize { self.lexeme.len() }
/// Returns `true` if [Self::lexeme] has a length of zero bytes.
pub fn is_empty(&self) -> bool { self.lexeme.is_empty() }
}
impl<'text> Debug for Token<'text> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list().entry(&self.variant).entry(&self.lexeme).finish()
pub fn kind(&self) -> TokenKind {
self.kind
}
}
impl<'text> Display for Token<'text> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.variant {
Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f),
v => write!(f, "{v} \"{}\"", self.lexeme),
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum TokenKind {
Eof,
Newline, // \n
OpenParen, // (
CloseParen, // )
OpenCurly, // {
CloseCurly, // }
OpenBrace, // [
CloseBrace, // ]
Comma, // ,
Colon, // :
Bang, // !
At, // @
Amp, // &
Bar, // |
Caret, // ^
Star, // *
Hash, // #
Dollar, // $
Percent, // %
Plus, // +
Minus, // -
Slash, // /
Lsh, // <<
Rsh, // >>
Comment, // (';' | '//') .* '\n' |
Directive, // '.' XID_CONTINUE*
Identifier, // XID_START XID_CONTINUE*
Number(u16, u8), // varies depending on base
Char(char), // '\'' ('\' Escape | .) '\''
String, // '"' .* '"'
Reg(Reg),
NoEm(NoEm),
OneEm(OneEm),
Special(Special),
OneArg(OneArg),
TwoArg(TwoArg),
Jump(Jump),
Byte, // .b
Word, // .w
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Reg {
PC,
SP,
SR,
CG,
R4,
R5,
R6,
R7,
R8,
R9,
R10,
R11,
R12,
R13,
R14,
R15,
}
/// Fake instructions of the form `opcode`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum NoEm {
Nop,
Ret,
Clrc,
Clrz,
Clrn,
Setc,
Setz,
Setn,
Dint,
Eint,
}
/// Fake instructions of the form `opcode dst`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum OneEm {
Pop,
Rla,
Rlc,
Inv,
Clr,
Tst,
Dec,
Decd,
Inc,
Incd,
Adc,
Dadc,
Sbc,
}
/// These opcodes have bespoke grammatical rules
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Special {
/// Br = "br" Src
Br,
}
/// Real instructions of the form `opcode src`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum OneArg {
Rrc,
Swpb,
Rra,
Sxt,
Push,
Call,
Reti,
}
/// Real instructions of the form `opcode src, dst`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum TwoArg {
Mov,
Add,
Addc,
Subc,
Sub,
Cmp,
Dadd,
Bit,
Bic,
Bis,
Xor,
And,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Jump {
Jne,
Jnz,
Jeq,
Jz,
Jnc,
Jlo,
Jc,
Jhs,
Jn,
Jge,
Jl,
Jmp,
}
mod convert {
//! Implementations of [FromStr] for [token](super) types.
use super::*;
use std::str::FromStr;
impl FromStr for Reg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"pc" => Reg::PC,
"sp" => Reg::SP,
"sr" => Reg::SR,
"cg" => Reg::CG,
"r0" => Reg::PC,
"r1" => Reg::SP,
"r2" => Reg::SR,
"r3" => Reg::CG,
"r4" => Reg::R4,
"r5" => Reg::R5,
"r6" => Reg::R6,
"r7" => Reg::R7,
"r8" => Reg::R8,
"r9" => Reg::R9,
"r10" => Reg::R10,
"r11" => Reg::R11,
"r12" => Reg::R12,
"r13" => Reg::R13,
"r14" => Reg::R14,
"r15" => Reg::R15,
_ => Err(())?,
})
}
}
impl FromStr for NoEm {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"nop" => NoEm::Nop,
"ret" => NoEm::Ret,
"clrc" => NoEm::Clrc,
"clrz" => NoEm::Clrz,
"clrn" => NoEm::Clrn,
"setc" => NoEm::Setc,
"setz" => NoEm::Setz,
"setn" => NoEm::Setn,
"dint" => NoEm::Dint,
"eint" => NoEm::Eint,
_ => Err(())?,
})
}
}
impl FromStr for OneEm {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"pop" => OneEm::Pop,
"rla" => OneEm::Rla,
"rlc" => OneEm::Rlc,
"inv" => OneEm::Inv,
"clr" => OneEm::Clr,
"tst" => OneEm::Tst,
"dec" => OneEm::Dec,
"decd" => OneEm::Decd,
"inc" => OneEm::Inc,
"incd" => OneEm::Incd,
"adc" => OneEm::Adc,
"dadc" => OneEm::Dadc,
"sbc" => OneEm::Sbc,
_ => Err(())?,
})
}
}
impl FromStr for Special {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"br" => Special::Br,
_ => Err(())?,
})
}
}
impl FromStr for OneArg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"rrc" => OneArg::Rrc,
"swpb" => OneArg::Swpb,
"rra" => OneArg::Rra,
"sxt" => OneArg::Sxt,
"push" => OneArg::Push,
"call" => OneArg::Call,
"reti" => OneArg::Reti,
_ => Err(())?,
})
}
}
impl FromStr for TwoArg {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"mov" => TwoArg::Mov,
"add" => TwoArg::Add,
"addc" => TwoArg::Addc,
"subc" => TwoArg::Subc,
"sub" => TwoArg::Sub,
"cmp" => TwoArg::Cmp,
"dadd" => TwoArg::Dadd,
"bit" => TwoArg::Bit,
"bic" => TwoArg::Bic,
"bis" => TwoArg::Bis,
"xor" => TwoArg::Xor,
"and" => TwoArg::And,
_ => Err(())?,
})
}
}
impl FromStr for Jump {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"jne" => Jump::Jne,
"jnz" => Jump::Jnz,
"jeq" => Jump::Jeq,
"jz" => Jump::Jz,
"jnc" => Jump::Jnc,
"jlo" => Jump::Jlo,
"jc" => Jump::Jc,
"jhs" => Jump::Jhs,
"jn" => Jump::Jn,
"jge" => Jump::Jge,
"jl" => Jump::Jl,
"jmp" => Jump::Jmp,
_ => Err(())?,
})
}
}
}
/// A [token Type](Type) is a semantic tag for a sequence of characters
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Type {
/// contiguous whitespace, excluding newline
Space,
/// newline and contiguous whitespace
Endl,
/// A line-comment
Comment,
/// Jump label *definition*
Label,
/// Instructions
Insn,
/// Operand width is byte
ByteWidth,
/// Operand width is word
WordWidth,
/// Register mnemonic (i.e. `pc`, `r14`)
Register,
/// Marker for base-10
RadixMarkerDec,
/// Marker for base-16
RadixMarkerHex,
/// Marker for base-8
RadixMarkerOct,
/// Marker for base-2
RadixMarkerBin,
/// 1-4 hexadigit numbers only
Number,
/// Negative number marker
Minus,
/// post-increment mode marker
Plus,
/// Open-Indexed-Mode marker
LParen,
/// Close-Indexed-Mode marker
RParen,
/// Open Square Bracket
LBracket,
/// Closed Square Bracket
RBracket,
/// Indirect mode marker
Indirect,
/// absolute address marker
Absolute,
/// immediate value marker
Immediate,
/// Valid identifier. Identifiers must start with a Latin alphabetic character or underline
Identifier,
/// A string, encased in "quotes"
String,
/// Assembler directive
Directive,
/// Separator (comma)
Separator,
/// End of File marker
#[default]
EndOfFile,
/// Invalid token
Invalid,
}
regex_impl! {<'text> Token<'text> {
pub fn expect_space(text: &str) -> Option<Self> {
regex!(Type::Space = r"^[\s--\n]+")
}
pub fn expect_endl(text: &str) -> Option<Self> {
regex!(Type::Endl = r"^\n[\s--\n]*")
}
pub fn expect_comment(text: &str) -> Option<Self> {
regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*")
}
pub fn expect_label(text: &str) -> Option<Self> {
regex!(Type::Label = r"^:")
}
pub fn expect_insn(text: &str) -> Option<Self> {
regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)")
}
pub fn expect_byte_width(text: &str) -> Option<Self> {
regex!(Type::ByteWidth = r"(?i)^\.b")
}
pub fn expect_word_width(text: &str) -> Option<Self> {
regex!(Type::WordWidth = r"(?i)^\.w")
}
pub fn expect_register(text: &str) -> Option<Self> {
// old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)")
}
pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerDec = r"(?i)^0d")
}
pub fn expect_radix_marker_hex(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)")
}
pub fn expect_radix_marker_oct(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerOct = r"(?i)^0o")
}
pub fn expect_radix_marker_bin(text: &str) -> Option<Self> {
regex!(Type::RadixMarkerBin = r"(?i)^0b")
}
pub fn expect_number(text: &str) -> Option<Self> {
regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)")
}
pub fn expect_minus(text: &str) -> Option<Self> {
regex!(Type::Minus = r"^-")
}
pub fn expect_plus(text: &str) -> Option<Self> {
regex!(Type::Plus = r"^\+")
}
pub fn expect_l_paren(text: &str) -> Option<Self> {
regex!(Type::LParen = r"^\(")
}
pub fn expect_r_paren(text: &str) -> Option<Self> {
regex!(Type::RParen = r"^\)")
}
pub fn expect_l_bracket(text: &str) -> Option<Self> {
regex!(Type::LBracket = r"^\[")
}
pub fn expect_r_bracket(text: &str) -> Option<Self> {
regex!(Type::RBracket = r"^]")
}
pub fn expect_indrect(text: &str) -> Option<Self> {
regex!(Type::Indirect = r"^@")
}
pub fn expect_absolute(text: &str) -> Option<Self> {
regex!(Type::Absolute = r"^&")
}
pub fn expect_immediate(text: &str) -> Option<Self> {
regex!(Type::Immediate = r"^#")
}
pub fn expect_string(text: &str) -> Option<Self> {
regex!(Type::String = r#"^"[^"]*""#)
}
pub fn expect_directive(text: &str) -> Option<Self> {
regex!(Type::Directive = r"^\.\w+")
}
pub fn expect_identifier(text: &str) -> Option<Self> {
regex!(Type::Identifier = r"^[A-Za-z_]\w*")
}
pub fn expect_separator(text: &str) -> Option<Self> {
regex!(Type::Separator = r"^,")
}
pub fn expect_end_of_file(text: &str) -> Option<Self> {
regex!(Type::EndOfFile = r"^$")
}
pub fn expect_anything(text: &str) -> Option<Self> {
regex!(Type::Invalid = r"^.*")
}
}}
impl Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Space => Display::fmt("space", f),
Self::Endl => Display::fmt("newline", f),
Self::Comment => Display::fmt("comment", f),
Self::Label => Display::fmt("label definition", f),
Self::Insn => Display::fmt("opcode", f),
Self::ByteWidth => Display::fmt("byte-width", f),
Self::WordWidth => Display::fmt("word-width", f),
Self::Register => Display::fmt("register", f),
Self::RadixMarkerDec => Display::fmt("decimal marker", f),
Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f),
Self::RadixMarkerOct => Display::fmt("octal marker", f),
Self::RadixMarkerBin => Display::fmt("binary marker", f),
Self::Number => Display::fmt("number", f),
Self::Minus => Display::fmt("minus sign", f),
Self::Plus => Display::fmt("plus sign", f),
Self::LParen => Display::fmt("left parenthesis", f),
Self::RParen => Display::fmt("right parenthesis", f),
Self::LBracket => Display::fmt("left bracket", f),
Self::RBracket => Display::fmt("right bracket", f),
Self::Indirect => Display::fmt("indirect", f),
Self::Absolute => Display::fmt("absolute", f),
Self::Immediate => Display::fmt("immediate", f),
Self::Identifier => Display::fmt("identifier", f),
Self::String => Display::fmt("string", f),
Self::Directive => Display::fmt("directive", f),
Self::Separator => Display::fmt("comma", f),
Self::EndOfFile => Display::fmt("EOF", f),
Self::Invalid => Display::fmt("invalid token", f),
}
}
}
/// A [Token] which can outlive its parent buffer
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct OwnedToken {
/// The type of this token
variant: Type,
/// The sub[String] corresponding to this token
lexeme: String,
}
impl Display for OwnedToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) }
}
impl<'t> From<&'t OwnedToken> for Token<'t> {
fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } }
}
impl From<Token<'_>> for OwnedToken {
fn from(value: Token<'_>) -> Self {
let Token { variant, lexeme } = value;
OwnedToken { variant, lexeme: lexeme.to_owned() }
}
}
/// [Types] are an owned array of [types](Type), with a custom [Display] implementation
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Types(Vec<Type>);
impl<T: AsRef<[Type]>> From<T> for Types {
// TODO: Possibly bad. Check out in rust playground.
fn from(value: T) -> Self { Self(value.as_ref().to_owned()) }
}
impl Display for Types {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (idx, t) in self.0.iter().enumerate() {
Display::fmt(t, f)?;
match idx {
i if i < self.0.len() - 2 => Display::fmt(", ", f)?,
i if i < self.0.len() - 1 => Display::fmt(" or ", f)?,
_ => (),
mod display {
//! Implementations of [Display] for [token](super) types.
use super::*;
use std::fmt::Display;
impl<'t> Display for Token<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { lexeme, kind, pos: _ } = self;
match kind {
TokenKind::Comment
| TokenKind::Directive
| TokenKind::Identifier
| TokenKind::String => {
write!(f, "{}", lexeme)
}
ty => ty.fmt(f),
}
}
}
impl Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenKind::Eof => write!(f, "[EOF]"),
TokenKind::Newline => writeln!(f),
TokenKind::OpenParen => write!(f, "("),
TokenKind::CloseParen => write!(f, ")"),
TokenKind::OpenCurly => write!(f, "{{"),
TokenKind::CloseCurly => write!(f, "}}"),
TokenKind::OpenBrace => write!(f, "["),
TokenKind::CloseBrace => write!(f, "]"),
TokenKind::Comma => write!(f, ","),
TokenKind::Colon => write!(f, ":"),
TokenKind::Bang => write!(f, "!"),
TokenKind::At => write!(f, "@"),
TokenKind::Amp => write!(f, "&"),
TokenKind::Bar => write!(f, "|"),
TokenKind::Caret => write!(f, "^"),
TokenKind::Star => write!(f, "*"),
TokenKind::Hash => write!(f, "#"),
TokenKind::Dollar => write!(f, "$"),
TokenKind::Percent => write!(f, "%"),
TokenKind::Plus => write!(f, "+"),
TokenKind::Minus => write!(f, "-"),
TokenKind::Slash => write!(f, "/"),
TokenKind::Lsh => write!(f, "<<"),
TokenKind::Rsh => write!(f, ">>"),
TokenKind::Comment => write!(f, "; "),
TokenKind::Directive => write!(f, "."),
TokenKind::Identifier => write!(f, "Identifier"),
TokenKind::Number(val, 2) => write!(f, "0b{val:b}"),
TokenKind::Number(val, 8) => write!(f, "0o{val:o}"),
TokenKind::Number(val, 16) => write!(f, "0x{val:x}"),
TokenKind::Number(val, _) => write!(f, "{val}"),
TokenKind::Char(c) => write!(f, "'{c}'"),
TokenKind::String => write!(f, "\"String\""),
TokenKind::Reg(kw) => write!(f, "{kw}"),
TokenKind::NoEm(kw) => write!(f, "{kw}"),
TokenKind::OneEm(kw) => write!(f, "{kw}"),
TokenKind::Special(kw) => write!(f, "{kw}"),
TokenKind::OneArg(kw) => write!(f, "{kw}"),
TokenKind::TwoArg(kw) => write!(f, "{kw}"),
TokenKind::Jump(kw) => write!(f, "{kw}"),
TokenKind::Byte => write!(f, ".b"),
TokenKind::Word => write!(f, ".w"),
}
}
}
impl Display for Reg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Reg::PC => "pc".fmt(f),
Reg::SP => "sp".fmt(f),
Reg::SR => "sr".fmt(f),
Reg::CG => "cg".fmt(f),
Reg::R4 => "r4".fmt(f),
Reg::R5 => "r5".fmt(f),
Reg::R6 => "r6".fmt(f),
Reg::R7 => "r7".fmt(f),
Reg::R8 => "r8".fmt(f),
Reg::R9 => "r9".fmt(f),
Reg::R10 => "r10".fmt(f),
Reg::R11 => "r11".fmt(f),
Reg::R12 => "r12".fmt(f),
Reg::R13 => "r13".fmt(f),
Reg::R14 => "r14".fmt(f),
Reg::R15 => "r15".fmt(f),
}
}
}
impl Display for NoEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
NoEm::Nop => "nop".fmt(f),
NoEm::Ret => "ret".fmt(f),
NoEm::Clrc => "clrc".fmt(f),
NoEm::Clrz => "clrz".fmt(f),
NoEm::Clrn => "clrn".fmt(f),
NoEm::Setc => "setc".fmt(f),
NoEm::Setz => "setz".fmt(f),
NoEm::Setn => "setn".fmt(f),
NoEm::Dint => "dint".fmt(f),
NoEm::Eint => "eint".fmt(f),
}
}
}
impl Display for OneEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OneEm::Pop => "pop".fmt(f),
OneEm::Rla => "rla".fmt(f),
OneEm::Rlc => "rlc".fmt(f),
OneEm::Inv => "inv".fmt(f),
OneEm::Clr => "clr".fmt(f),
OneEm::Tst => "tst".fmt(f),
OneEm::Dec => "dec".fmt(f),
OneEm::Decd => "decd".fmt(f),
OneEm::Inc => "inc".fmt(f),
OneEm::Incd => "incd".fmt(f),
OneEm::Adc => "adc".fmt(f),
OneEm::Dadc => "dadc".fmt(f),
OneEm::Sbc => "sbc".fmt(f),
}
}
}
impl Display for Special {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Special::Br => "br".fmt(f),
}
}
}
impl Display for OneArg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OneArg::Rrc => "rrc".fmt(f),
OneArg::Swpb => "swpb".fmt(f),
OneArg::Rra => "rra".fmt(f),
OneArg::Sxt => "sxt".fmt(f),
OneArg::Push => "push".fmt(f),
OneArg::Call => "call".fmt(f),
OneArg::Reti => "reti".fmt(f),
}
}
}
impl Display for TwoArg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TwoArg::Mov => "mov".fmt(f),
TwoArg::Add => "add".fmt(f),
TwoArg::Addc => "addc".fmt(f),
TwoArg::Subc => "subc".fmt(f),
TwoArg::Sub => "sub".fmt(f),
TwoArg::Cmp => "cmp".fmt(f),
TwoArg::Dadd => "dadd".fmt(f),
TwoArg::Bit => "bit".fmt(f),
TwoArg::Bic => "bic".fmt(f),
TwoArg::Bis => "bis".fmt(f),
TwoArg::Xor => "xor".fmt(f),
TwoArg::And => "and".fmt(f),
}
}
}
impl Display for Jump {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Jump::Jne => "jne".fmt(f),
Jump::Jnz => "jnz".fmt(f),
Jump::Jeq => "jeq".fmt(f),
Jump::Jz => "jz".fmt(f),
Jump::Jnc => "jnc".fmt(f),
Jump::Jlo => "jlo".fmt(f),
Jump::Jc => "jc".fmt(f),
Jump::Jhs => "jhs".fmt(f),
Jump::Jn => "jn".fmt(f),
Jump::Jge => "jge".fmt(f),
Jump::Jl => "jl".fmt(f),
Jump::Jmp => "jmp".fmt(f),
}
}
Ok(())
}
}

View File

@@ -1,85 +0,0 @@
// © 2023 John Breaux
//! A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
use super::*;
use super::ignore::Ignore;
use super::preprocessed::Preprocessed;
/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> + std::fmt::Debug {
/// Gets this stream's [Context]
fn context(&self) -> Context;
/// Creates an iterator that skips [Type::Space] in the input
#[inline]
fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self>
where Self: Sized {
Ignore::new(variant, self)
}
/// Creates a [TokenStream] that performs live substitution of the input
#[inline]
fn preprocessed(&'text mut self) -> Preprocessed<'text, Self>
where Self: Sized {
Preprocessed::new(self)
}
/// Returns the next [Token] without advancing
fn peek(&mut self) -> Self::Item;
/// Returns the next [Token] if it is of the expected [Type], without advancing
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
/// Consumes and returns a [Token] if it is the expected [Type]
///
/// Otherwise, does not consume a [Token]
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
/// Ignores a [Token] of the expected [Type], propegating errors.
#[inline]
fn require(&mut self, expected: Type) -> Result<(), LexError> { self.expect(expected).map(|_| ()) }
/// Ignores a [Token] of the expected [Type], discarding errors.
#[inline]
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
/// Runs a function on each
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, LexError>, expected: T) -> Result<U, LexError>
where T: AsRef<[Type]> {
for &expected in expected.as_ref() {
match f(self, expected).map_err(|e| e.bare()) {
Ok(t) => return Ok(t),
Err(LexError::UnexpectedToken { .. }) => continue,
Err(e) => return Err(e.context(self.context())),
}
}
Err(LexError::expected(expected, self.peek()).context(self.context()))
}
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
#[inline]
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
where T: AsRef<[Type]> {
self.any_of(Self::peek_expect, expected)
}
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
///
/// Otherwise, does not consume a [Token]
#[inline]
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
where T: AsRef<[Type]> {
self.any_of(Self::expect, expected)
}
/// Ignores a [Token] of any expected [Type], discarding errors.
#[inline]
fn allow_any_of<T>(&mut self, expected: T)
where T: AsRef<[Type]> {
let _ = self.expect_any_of(expected);
}
/// Ignores a [Token] of any expected [Type], propegating errors.
#[inline]
fn require_any_of<T>(&mut self, expected: T) -> Result<(), LexError>
where T: AsRef<[Type]> {
self.any_of(Self::require, expected)
}
}

View File

@@ -54,23 +54,55 @@
//! └─ EndOfFile
//! ```
pub mod preamble {
//! Common imports for msp430-asm
use super::*;
pub use assembler::Assembler;
pub use error::Error;
pub use lexer::{
context::Context,
token::{Token, Type},
token_stream::TokenStream,
Tokenizer,
pub mod util {
use std::{
fmt::{Debug, Display},
ops::{Index, Range},
};
pub use parser::Parser;
/// A <code> [Clone] + [Copy] + [!Iterator](Iterator) <\code> version of a [Range]
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Span<Idx> {
pub start: Idx,
pub end: Idx,
}
impl<Idx> From<Span<Idx>> for Range<Idx> {
fn from(value: Span<Idx>) -> Self {
value.start..value.end
}
}
impl<Idx> From<Range<Idx>> for Span<Idx> {
fn from(value: Range<Idx>) -> Self {
Self { start: value.start, end: value.end }
}
}
impl<T> Index<Span<usize>> for [T] {
type Output = [T];
fn index(&self, index: Span<usize>) -> &Self::Output {
self.index(Range::from(index))
}
}
impl Index<Span<usize>> for str {
type Output = str;
fn index(&self, index: Span<usize>) -> &Self::Output {
self.index(Range::from(index))
}
}
impl<Idx: Debug> Debug for Span<Idx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}..{:?}", self.start, self.end)
}
}
impl<Idx: Display> Display for Span<Idx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}..{}", self.start, self.end)
}
}
}
use preamble::*;
pub mod error;
pub mod lexer;
pub mod preprocessor;
pub mod parser;
pub mod assembler;
pub mod lexer;
pub mod parser;

View File

@@ -1,81 +1,591 @@
// © 2023 John Breaux
//! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root)
// © 2023-2024 John Breaux
//! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast)
pub mod ast;
use crate::{TokenStream, Type};
use error::ParseError;
use preamble::*;
use std::{
fmt::{Debug, Display},
path::Path,
use self::error::{
Error,
ErrorKind::{self, *},
PResult, Parsing,
};
use crate::{
lexer::{
token::{Reg, Special, Token, TokenKind as Kind},
Lexer,
},
preprocessor::Preprocessor,
util::Span,
};
use ast::*;
pub mod preamble {
//! All the different AST node types
use super::*;
// Traits
pub use parsable::Parsable;
// Nodes
pub use comment::Comment;
pub use directive::Directive;
pub use identifier::Identifier;
pub use instruction::{
encoding::{
encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand,
register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding,
},
opcode::Opcode,
#[derive(Clone, Debug)]
pub struct Parser<'t> {
lexer: Preprocessor<'t>,
next: Option<Token<'t>>,
loc: Span<usize>,
}
impl<'t> Parser<'t> {
/// Creates a new [Parser]
pub fn new(text: &'t str) -> Self {
let lexer = Preprocessor::new(text);
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
}
/// Createes a new [Parser] from an existing [Lexer]
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
let lexer = Preprocessor::with_lexer(lexer);
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
}
pub fn parse<T: Parsable<'t>>(&mut self) -> PResult<T> {
Parsable::parse(self)
}
pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error {
Error { parsing, kind, loc: self.loc }
}
/// Peek a token out of the lexer
pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> {
if self.next.is_none() {
self.next = self.lexer.scan();
}
self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p))
}
pub fn next(&mut self, p: Parsing) -> PResult<Token<'t>> {
Ok(match self.take() {
Some(token) => token,
None => {
self.peek(p)?;
self.take().expect("should have been populated by peek")
}
})
}
/// Consumes the next token
pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> {
match self.peek(p)?.kind {
kind if kind == expect => {
self.take();
Ok(self)
}
kind => Err(self.error(Unexpected(kind), p)),
}
}
/// Consumes the next token without checking it
pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> {
self.next(p)?;
Ok(self)
}
/// Take the last peeked token
pub fn take(&mut self) -> Option<Token<'t>> {
self.next.take()
}
}
// Expressions
impl<'t> Parser<'t> {
/// Parses an expression
pub fn expr(&mut self) -> PResult<Expr<'t>> {
self.term()
}
/// Parses a term-expression (binary `*`mul, `/`div, `%`rem)
pub fn term(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.factor()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)),
Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)),
Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a factor expression (binary `+`add, `-`sub)
pub fn factor(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.shift()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)),
Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right)
pub fn shift(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.bin()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)),
Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
pub fn bin(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let a = self.unary()?;
let mut other = vec![];
loop {
match self.peek(p)?.kind {
Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)),
Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)),
Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)),
_ if other.is_empty() => break Ok(a),
_ => break Ok(Expr::Binary(a.into(), other)),
}
}
}
/// Parses a unary expression (`!`invert, `-`negate)
pub fn unary(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let mut ops = vec![];
loop {
match self.peek(p)?.kind {
Kind::Star => ops.push(UnOp::Deref),
Kind::Minus => ops.push(UnOp::Neg),
Kind::Bang => ops.push(UnOp::Not),
_ if ops.is_empty() => break Ok(self.primary()?),
_ => break Ok(Expr::Unary(ops, self.primary()?.into())),
}
self.take();
}
}
/// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier
pub fn primary(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let Token { lexeme, kind, .. } = *self.peek(p)?;
Ok(match kind {
Kind::OpenParen => {
let out = Expr::Group(self.then(p)?.parse()?);
self.assert(Kind::CloseParen, p)?;
out
}
Kind::Number(n, _) => {
self.take();
Expr::Number(n)
}
Kind::Identifier => {
self.take();
Expr::Ident(lexeme)
}
Kind::Amp => self.then(p)?.addrof()?,
ty => Err(self.error(NonNumeric(ty), p))?,
})
}
pub fn addrof(&mut self) -> PResult<Expr<'t>> {
let p = Parsing::Expr;
let token = self.peek(p)?;
let out = match token.kind {
Kind::Identifier => Expr::AddrOf(token.lexeme),
Kind::Number(n, _) => Expr::Number(n),
ty => Err(self.error(Unexpected(ty), p))?,
};
self.take();
Ok(out)
}
}
pub trait Parsable<'t>: Sized {
fn parse(p: &mut Parser<'t>) -> PResult<Self>;
}
impl<'t> Parsable<'t> for Statements<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let mut stmts = vec![];
while p.peek(Parsing::File)?.kind != Kind::Eof {
stmts.push(p.parse()?)
}
Ok(Self { stmts })
}
}
impl<'t> Parsable<'t> for Statement<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let token = *p.peek(Parsing::Stmt)?;
Ok(match token.kind {
Kind::Comment => {
p.take();
Statement::Comment(token.lexeme)
}
Kind::Directive => Statement::Directive(p.parse()?),
Kind::Identifier => Statement::Label(p.label()?),
_ => Statement::Insn(p.parse()?),
})
}
}
impl<'t> Parsable<'t> for Directive<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Directive;
let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?;
let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) };
p.take();
Ok(match lexeme {
".define" => Directive::Define(p.parse()?),
".org" => Directive::Org(p.expr()?.into()),
".word" => Directive::Word(p.parse()?),
".words" => Directive::Words(p.parse()?),
".string" => Directive::String(p.string()?),
_ => Err(p.error(Unexpected(Kind::Directive), parsing))?,
})
}
}
impl<'t> Parsable<'t> for Vec<Token<'t>> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Directive;
let mut tokens = vec![];
loop {
if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind {
break;
}
tokens.push(p.next(parsing)?)
}
p.take();
Ok(tokens)
}
}
impl<'t> Parsable<'t> for Instruction<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let start = p.peek(Parsing::Instruction)?.pos.start;
Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } })
}
}
impl<'t> Parsable<'t> for InstructionKind<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
use crate::lexer::token::OneArg;
// an instruction starts with an opcode
Ok(match p.peek(Parsing::Instruction)?.kind() {
Kind::NoEm(_) => Self::NoEm(p.parse()?),
Kind::OneEm(_) => Self::OneEm(p.parse()?),
Kind::Special(Special::Br) => Self::Br(p.parse()?),
Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?),
Kind::OneArg(_) => Self::OneArg(p.parse()?),
Kind::TwoArg(_) => Self::TwoArg(p.parse()?),
Kind::Jump(_) => Self::Jump(p.parse()?),
ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?,
})
}
}
impl<'t> Parsable<'t> for NoEm {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
match p.next(Parsing::NoEm)?.kind {
Kind::NoEm(opcode) => Ok(Self { opcode }),
ty => Err(p.error(Unexpected(ty), Parsing::NoEm)),
}
}
}
impl<'t> Parsable<'t> for OneEm<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Self {
opcode: match p.next(Parsing::OneEm)?.kind {
Kind::OneEm(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?,
},
width: p.parse()?,
dst: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for OneArg<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Self {
opcode: match p.next(Parsing::OneArg)?.kind {
Kind::OneArg(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?,
},
width: p.parse()?,
src: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for TwoArg<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::TwoArg;
Ok(Self {
opcode: match p.next(parsing)?.kind {
Kind::TwoArg(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
width: p.parse()?,
src: p.parse()?,
dst: p.assert(Kind::Comma, parsing)?.parse()?,
})
}
}
impl<'t> Parsable<'t> for Jump<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Jump;
Ok(Self {
opcode: match p.next(parsing)?.kind {
Kind::Jump(opcode) => opcode,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
dst: p.parse()?,
})
}
}
impl<'t> Parsable<'t> for Reti {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
use crate::lexer::token::OneArg;
p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?;
Ok(Reti)
}
}
impl<'t> Parsable<'t> for Br<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
p.assert(Kind::Special(Special::Br), Parsing::Br)?;
Ok(Self { src: p.parse()? })
}
}
impl<'t> Parsable<'t> for Src<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Src;
Ok(match p.peek(parsing)?.kind {
Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special
Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?), // &addr
Kind::At => {
let reg = match p.then(parsing)?.next(parsing)?.kind {
Kind::Reg(r) => r,
ty => Err(p.error(Unexpected(ty), parsing))?,
};
if let Kind::Plus = p.peek(parsing)?.kind {
p.take();
Src::PostInc(reg)
} else {
Src::Indirect(reg)
}
} // @reg+, @reg
Kind::Reg(_) => Src::Direct(p.parse()?),
_ => {
let expr = p.parse()?;
match p.peek(parsing)?.kind {
Kind::OpenParen => Src::Indexed(expr, {
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
p.assert(Kind::CloseParen, parsing)?;
reg
}),
_ => Src::BareExpr(expr),
}
}
})
}
}
impl<'t> Parsable<'t> for Dst<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Dst;
Ok(match p.peek(parsing)?.kind {
Kind::Hash => match p.then(parsing)?.next(parsing)?.kind {
Kind::Number(0, _) => Dst::Special(DstSpecial::Zero),
Kind::Number(1, _) => Dst::Special(DstSpecial::One),
Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?,
ty => Err(p.error(Unexpected(ty), parsing))?,
},
Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?),
Kind::Reg(_) => Dst::Direct(p.parse()?),
_ => Dst::Indexed(p.expr()?.into(), {
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
p.assert(Kind::CloseParen, parsing)?;
reg
}),
})
}
}
impl<'t> Parsable<'t> for JumpDst<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Jump;
let mut neg = false;
let out = loop {
let token = p.peek(parsing)?;
match token.kind {
Kind::Minus => {
neg = !neg;
}
Kind::Plus => {}
Kind::Identifier => break Self::Label(token.lexeme),
Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }),
ty => Err(p.error(Unexpected(ty), parsing))?,
}
p.take();
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Width {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let out = match p.peek(Parsing::Width)?.kind() {
Kind::Byte => Width::Byte,
Kind::Word => Width::Word,
_ => return Ok(Width::Word),
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Reg {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let out = match p.peek(Parsing::Reg)?.kind {
Kind::Reg(r) => r,
ty => Err(p.error(Unexpected(ty), Parsing::Reg))?,
};
p.take();
Ok(out)
}
}
impl<'t> Parsable<'t> for Expr<'t> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
p.expr()
}
}
impl<'t, T: Parsable<'t>> Parsable<'t> for Box<T> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
Ok(Box::new(p.parse()?))
}
}
impl<'t, T: Parsable<'t>> Parsable<'t> for Vec<T> {
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
let parsing = Parsing::Vec;
p.assert(Kind::OpenBrace, parsing)?;
let mut out = vec![];
while Kind::CloseBrace != p.peek(parsing)?.kind {
out.push(p.parse()?)
}
p.assert(Kind::CloseBrace, parsing)?;
Ok(out)
}
}
/// Context-sensitive parsing rules
impl<'t> Parser<'t> {
pub fn string(&mut self) -> PResult<&'t str> {
let token = *self.peek(Parsing::Directive)?;
match token.kind {
Kind::String => {
self.take();
Ok(&token.lexeme[1..token.lexeme.len() - 1])
}
ty => Err(self.error(Unexpected(ty), Parsing::Directive)),
}
}
pub fn label(&mut self) -> PResult<&'t str> {
let p = Parsing::Label;
let token = self.next(p)?;
assert_eq!(Kind::Identifier, token.kind);
self.assert(Kind::Colon, p)?;
Ok(token.lexeme)
}
pub fn reg(&mut self) -> PResult<Reg> {
match self.peek(Parsing::Reg)?.kind {
Kind::Reg(r) => {
self.take();
Ok(r)
}
ty => Err(self.error(Unexpected(ty), Parsing::Reg)),
}
}
}
pub mod error {
use super::Kind;
use crate::util::Span;
use std::{fmt::Display, num::TryFromIntError};
pub type PResult<T> = Result<T, Error>;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Error {
pub parsing: Parsing,
pub kind: ErrorKind,
pub loc: Span<usize>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
LexError,
/// Returned when [Parsing::ConstExpr] fails without consuming
NotExpr,
DivZero,
NonNumeric(Kind),
BadIntForDst(u16),
TryFromIntError(TryFromIntError),
Unexpected(Kind),
BufEmpty,
Todo,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Parsing {
File,
Stmt,
Label,
Directive,
Instruction,
};
pub use label::Label;
pub use line::Line;
pub use root::Root;
// Error
pub use error::ParseError;
}
pub mod parsable;
NoEm,
OneEm,
Reti,
Br,
OneArg,
TwoArg,
Jump,
pub mod comment;
pub mod directive;
pub mod error;
pub mod identifier;
pub mod instruction;
pub mod label;
pub mod line;
pub mod root;
Width,
Src,
Dst,
Reg,
pub struct Parser {
radix: u32,
}
impl Parser {
pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result<Root, ParseError> {
Root::parse(&self, &mut stream.ignore(Type::Space))
Expr,
Vec,
}
pub fn parse<T>(self, input: &T) -> Result<Root, ParseError>
where T: AsRef<str> + ?Sized {
Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing)
}
}
pub fn parse_file<P>(self, path: &P) -> Result<Root, ParseError>
where P: AsRef<Path> + ?Sized {
self.parse(&std::fs::read_to_string(path.as_ref())?).map(|r| r.set_file(path.as_ref().into()))
impl Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorKind::LexError => write!(f, "lexical error"),
ErrorKind::TryFromIntError(e) => write!(f, "{e}"),
ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"),
ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"),
ErrorKind::DivZero => write!(f, "Division by zero"),
ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"),
ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"),
ErrorKind::BufEmpty => write!(f, "Peek buffer empty"),
ErrorKind::Todo => write!(f, "Not yet implemented"),
}
}
}
pub fn parse_one<T>(self, input: &T) -> Result<Line, ParseError>
where T: AsRef<str> + ?Sized {
Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
}
/// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number]
/// conversion
pub fn radix(mut self, radix: u32) { self.radix = radix; }
}
impl Default for Parser {
fn default() -> Self { Self { radix: 16 } }
}
impl Debug for Parser {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive()
impl Display for Parsing {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Parsing::File => "a file".fmt(f),
Parsing::Stmt => "a line".fmt(f),
Parsing::Label => "a label".fmt(f),
Parsing::Directive => "a directive".fmt(f),
Parsing::Instruction => "an instruction".fmt(f),
Parsing::NoEm => "a no-operand emulated instruction".fmt(f),
Parsing::OneEm => "a one-operand emulated instruction".fmt(f),
Parsing::Reti => "a `reti` instruction".fmt(f),
Parsing::Br => "a `br` instruction".fmt(f),
Parsing::OneArg => "a one-operand instruction".fmt(f),
Parsing::TwoArg => "a two-operand instruction".fmt(f),
Parsing::Jump => "a jump instruction".fmt(f),
Parsing::Width => "an instruction width".fmt(f),
Parsing::Src => "a source".fmt(f),
Parsing::Dst => "a destination".fmt(f),
Parsing::Reg => "a register".fmt(f),
Parsing::Expr => "a constant expression".fmt(f),
Parsing::Vec => "a list".fmt(f),
}
}
}
impl std::error::Error for Error {}
}

679
src/parser/ast.rs Normal file
View File

@@ -0,0 +1,679 @@
// © 2023-2024 John Breaux
/// Represents MSP430 instructions,
use crate::{
lexer::token::{self, Reg, Token},
util::Span,
};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Statements<'t> {
pub stmts: Vec<Statement<'t>>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Statement<'t> {
Label(&'t str),
Insn(Instruction<'t>),
Directive(Directive<'t>),
Comment(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Directive<'t> {
/// TODO: Store define as a vec of tokens. This will require help from the
/// [preprocessor](crate::preprocessor)
Define(Vec<Token<'t>>),
Org(Box<Expr<'t>>),
Word(Box<Expr<'t>>),
Words(Vec<Expr<'t>>),
String(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Instruction<'t> {
pub span: Span<usize>,
pub kind: InstructionKind<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum InstructionKind<'t> {
NoEm(NoEm),
OneEm(OneEm<'t>),
OneArg(OneArg<'t>),
TwoArg(TwoArg<'t>),
Jump(Jump<'t>),
Reti(Reti),
Br(Br<'t>),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct NoEm {
pub opcode: token::NoEm,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct OneEm<'t> {
pub opcode: token::OneEm,
pub width: Width,
pub dst: Dst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct OneArg<'t> {
pub opcode: token::OneArg,
pub width: Width,
pub src: Src<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct TwoArg<'t> {
pub opcode: token::TwoArg,
pub width: Width,
pub src: Src<'t>,
pub dst: Dst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Jump<'t> {
pub opcode: token::Jump,
pub dst: JumpDst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Reti;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Br<'t> {
pub src: Src<'t>,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub enum Width {
#[default]
Word,
Byte,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Src<'t> {
Direct(Reg),
Indexed(Box<Expr<'t>>, Reg),
Indirect(Reg),
PostInc(Reg),
Absolute(Box<Expr<'t>>),
Immediate(Box<Expr<'t>>),
Special(SrcSpecial),
BareExpr(Box<Expr<'t>>),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum SrcSpecial {
Zero,
One,
Four,
Two,
Eight,
NegOne,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Dst<'t> {
Direct(Reg),
Indexed(Box<Expr<'t>>, Reg),
Absolute(Box<Expr<'t>>),
Special(DstSpecial),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum DstSpecial {
Zero,
One,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum JumpDst<'t> {
/// A relative offset, nominally an even number from -0x400..=0x3fe
Rel(i16),
Label(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Expr<'t> {
Binary(Box<Expr<'t>>, Vec<(BinOp, Expr<'t>)>),
Unary(Vec<UnOp>, Box<Expr<'t>>),
Group(Box<Expr<'t>>),
Number(u16),
Ident(&'t str),
AddrOf(&'t str),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum BinOp {
Mul,
Div,
Rem,
Add,
Sub,
Lsh,
Rsh,
And,
Xor,
Or,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum UnOp {
Deref,
Not,
Neg,
}
pub mod conv {
//! Conversions between [ast](super) types, via [From], or via `new` constructor
use super::{InstructionKind as Ik, *};
macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$(
impl<'t> From<$src> for $dst {
fn from(value: $src) -> Self {
$expr(value)
}
}
)*}}
// sure am glad macros aren't hygenic over lifetimes
impl_from! { Ik<'t> {
NoEm => Ik::NoEm,
OneEm<'t> => Ik::OneEm,
OneArg<'t> => Ik::OneArg,
TwoArg<'t> => Ik::TwoArg,
Jump<'t> => Ik::Jump,
Reti => Ik::Reti,
Br<'t> => Ik::Br,
}}
impl_from! { Expr<'t> {
u16 => Expr::Number
}}
impl<'t> From<Dst<'t>> for Src<'t> {
fn from(value: Dst<'t>) -> Self {
match value {
Dst::Special(v) => Src::Special(v.into()),
Dst::Absolute(v) => Src::Absolute(v),
Dst::Indexed(i, r) => Src::Indexed(i, r),
Dst::Direct(r) => Src::Direct(r),
}
}
}
impl From<DstSpecial> for SrcSpecial {
fn from(value: DstSpecial) -> Self {
match value {
DstSpecial::Zero => SrcSpecial::Zero,
DstSpecial::One => SrcSpecial::One,
}
}
}
impl<'t> TwoArg<'t> {
pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self {
Self { opcode, width, src, dst }
}
}
}
pub mod display {
use super::*;
use std::fmt::Display;
impl<'t> Display for Statements<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for stmt in &self.stmts {
writeln!(f, "{stmt}")?;
}
Ok(())
}
}
impl<'t> Display for Statement<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Statement::Label(v) => write!(f, "{v}:"),
Statement::Insn(v) => write!(f, "{v}"),
Statement::Directive(v) => write!(f, "{v}"),
Statement::Comment(v) => write!(f, "{v}"),
}
}
}
impl<'t> Display for Directive<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Directive::Define(_) => write!(f, ".directive"),
Directive::Org(e) => write!(f, ".org {e}"),
Directive::Word(w) => write!(f, ".word {w}"),
Directive::Words(words) => {
write!(f, ".words [ ")?;
for word in words {
write!(f, "{word} ")?;
}
write!(f, "]")
}
Directive::String(s) => write!(f, ".string \"{s}\""),
}
}
}
impl<'t> Display for Instruction<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { span: _, kind } = self;
write!(f, "{kind}")
}
}
impl<'t> Display for InstructionKind<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InstructionKind::NoEm(v) => v.fmt(f),
InstructionKind::OneEm(v) => v.fmt(f),
InstructionKind::OneArg(v) => v.fmt(f),
InstructionKind::TwoArg(v) => v.fmt(f),
InstructionKind::Jump(v) => v.fmt(f),
InstructionKind::Reti(v) => v.fmt(f),
InstructionKind::Br(v) => v.fmt(f),
}
}
}
impl Display for NoEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode } = self;
write!(f, "{opcode}")
}
}
impl<'t> Display for OneEm<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, dst } = self;
write!(f, "{opcode}{width}\t{dst}")
}
}
impl<'t> Display for OneArg<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, src } = self;
write!(f, "{opcode}{width}\t{src}")
}
}
impl<'t> Display for TwoArg<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, src, dst } = self;
write!(f, "{opcode}{width}\t{src}, {dst}")
}
}
impl<'t> Display for Jump<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, dst } = self;
write!(f, "{opcode}\t{dst}")
}
}
impl Display for Reti {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "reti")
}
}
impl<'t> Display for Br<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { src } = self;
write!(f, "br\t{src}")
}
}
impl<'t> Display for Src<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Src::Direct(r) => write!(f, "{r}"),
Src::Indexed(e, r) => write!(f, "{e}({r})"),
Src::Indirect(r) => write!(f, "@{r}"),
Src::PostInc(r) => write!(f, "@{r}+"),
Src::Absolute(e) => write!(f, "&{e}"),
Src::Immediate(e) => write!(f, "#{e}"),
Src::Special(i) => write!(f, "#{i}"),
Src::BareExpr(id) => write!(f, "{id}"),
}
}
}
impl Display for SrcSpecial {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SrcSpecial::Zero => write!(f, "0"),
SrcSpecial::One => write!(f, "1"),
SrcSpecial::Four => write!(f, "4"),
SrcSpecial::Two => write!(f, "2"),
SrcSpecial::Eight => write!(f, "8"),
SrcSpecial::NegOne => write!(f, "-1"),
}
}
}
impl<'t> Display for Dst<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Dst::Direct(r) => write!(f, "{r}"),
Dst::Indexed(e, r) => write!(f, "{e}({r})"),
Dst::Absolute(e) => write!(f, "&{e}"),
Dst::Special(i) => write!(f, "#{i}"),
}
}
}
impl Display for DstSpecial {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DstSpecial::Zero => write!(f, "0"),
DstSpecial::One => write!(f, "1"),
}
}
}
impl<'t> Display for JumpDst<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
JumpDst::Rel(i) => write!(f, "{i}"),
JumpDst::Label(l) => write!(f, "{l}"),
}
}
}
impl<'t> Display for Expr<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expr::Binary(head, tail) => {
write!(f, "{head}")?;
for (op, tail) in tail {
write!(f, "{op}{tail}")?;
}
Ok(())
}
Expr::Unary(ops, tail) => {
for op in ops {
write!(f, "{op}")?
}
write!(f, "{tail}")
}
Expr::Group(e) => write!(f, "({e})"),
Expr::Number(n) => write!(f, "{n:x}"),
Expr::Ident(n) => write!(f, "{n}"),
Expr::AddrOf(n) => write!(f, "&{n}"),
}
}
}
impl Display for BinOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BinOp::Mul => write!(f, "*"),
BinOp::Div => write!(f, "/"),
BinOp::Rem => write!(f, "%"),
BinOp::Add => write!(f, "+"),
BinOp::Sub => write!(f, "-"),
BinOp::Lsh => write!(f, "<<"),
BinOp::Rsh => write!(f, ">>"),
BinOp::And => write!(f, "&"),
BinOp::Xor => write!(f, "^"),
BinOp::Or => write!(f, "|"),
}
}
}
impl Display for UnOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
UnOp::Deref => write!(f, "*"),
UnOp::Not => write!(f, "!"),
UnOp::Neg => write!(f, "-"),
}
}
}
impl Display for Width {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Width::Word => Ok(()),
Width::Byte => write!(f, ".b"),
}
}
}
}
pub mod canonical {
use std::iter;
use super::*;
use token::TwoArg::*;
pub trait Canonicalize {
/// The output after canonicalization
type Output;
/// Transmutes Self into its "canonical" form. "Emulated" instructions are converted
/// into their respective non-emulated forms.
fn to_canonical(self) -> Self::Output;
}
impl<'t> Canonicalize for Statements<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() }
}
}
impl<'t> Canonicalize for Statement<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Statement::Insn(i) => Self::Insn(i.to_canonical()),
_ => self,
}
}
}
impl<'t> Canonicalize for Instruction<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
Self { kind: self.kind.to_canonical(), ..self }
}
}
impl<'t> Canonicalize for InstructionKind<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Self::NoEm(v) => Self::TwoArg(v.to_canonical()),
Self::OneEm(v) => Self::TwoArg(v.to_canonical()),
Self::Reti(v) => Self::Reti(v.to_canonical()),
Self::Br(v) => Self::TwoArg(v.to_canonical()),
Self::OneArg(v) => Self::OneArg(v.to_canonical()),
Self::TwoArg(v) => Self::TwoArg(v.to_canonical()),
Self::Jump(v) => Self::Jump(v.to_canonical()),
}
}
}
impl Canonicalize for NoEm {
type Output = TwoArg<'static>;
fn to_canonical(self) -> Self::Output {
let Self { opcode } = self;
use SrcSpecial::*;
use Width::*;
match opcode {
token::NoEm::Nop => {
TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG))
}
token::NoEm::Ret => {
TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC))
}
token::NoEm::Clrc => {
TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR))
}
token::NoEm::Clrz => {
TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR))
}
token::NoEm::Clrn => {
TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR))
}
token::NoEm::Setc => {
TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR))
}
token::NoEm::Setz => {
TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR))
}
token::NoEm::Setn => {
TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR))
}
token::NoEm::Dint => {
TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
}
token::NoEm::Eint => {
TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
}
}
}
}
impl<'t> Canonicalize for OneEm<'t> {
type Output = TwoArg<'t>;
fn to_canonical(self) -> Self::Output {
use SrcSpecial::*;
let Self { opcode, width, dst } = self;
match opcode {
token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst),
token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst),
token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst),
token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst),
token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst),
token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst),
token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst),
token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst),
token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst),
token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst),
token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst),
token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst),
token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst),
}
}
}
impl<'t> Canonicalize for OneArg<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, width, src } = self;
Self {
opcode,
width: match opcode {
token::OneArg::Call => Width::Word,
_ => width,
},
src: src.to_canonical(),
}
}
}
impl<'t> Canonicalize for TwoArg<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, width, src, dst } = self;
Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() }
}
}
impl<'t> Canonicalize for Jump<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, dst } = self;
Self {
opcode: match opcode {
token::Jump::Jnz => token::Jump::Jne,
token::Jump::Jz => token::Jump::Jeq,
token::Jump::Jnc => token::Jump::Jlo,
token::Jump::Jc => token::Jump::Jhs,
t => t,
},
dst: dst.to_canonical(),
}
}
}
impl Canonicalize for Reti {
type Output = Self;
fn to_canonical(self) -> Self::Output {
self
}
}
impl<'t> Canonicalize for Br<'t> {
type Output = TwoArg<'t>;
fn to_canonical(self) -> Self::Output {
let Self { src } = self;
TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC))
}
}
impl<'t> Canonicalize for Src<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
use SrcSpecial::*;
match self {
Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self,
Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r),
Src::Absolute(e) => Src::Absolute(e.to_canonical().into()),
Src::Immediate(e) => match e.to_canonical() {
Expr::Number(0) => Src::Special(Zero),
Expr::Number(1) => Src::Special(One),
Expr::Number(2) => Src::Special(Two),
Expr::Number(4) => Src::Special(Four),
Expr::Number(8) => Src::Special(Eight),
Expr::Number(0xffff) => Src::Special(NegOne),
expr => Src::Immediate(expr.into()),
},
Src::BareExpr(_) => self,
}
}
}
impl<'t> Canonicalize for Dst<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Dst::Direct(_) | Dst::Special(_) => self,
Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r),
Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()),
}
}
}
impl<'t> Canonicalize for JumpDst<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
self
}
}
impl<'t> Canonicalize for Expr<'t> {
type Output = Self;
/// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number],
/// this returns a single [Expr::Number]. If not, it evaluates until
/// it runs into an unevaluatable leaf.
fn to_canonical(self) -> Self::Output {
match self {
Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self,
Expr::Group(e) => e.to_canonical(),
Expr::Unary(ops, tail) => {
let mut tail = match tail.to_canonical() {
Expr::Number(n) => n,
other => return other,
};
// If the tail is dereferenced, canonicalization must halt,
// since we have no knowledge of memory layout
let mut ops = ops.into_iter();
for op in ops.by_ref() {
tail = match op {
UnOp::Deref => {
return Expr::Unary(
iter::once(op).chain(ops).collect(),
Box::new(tail.into()),
)
}
UnOp::Not => !tail,
UnOp::Neg => 0u16.wrapping_sub(tail),
}
}
Expr::Number(tail)
}
Expr::Binary(head, tails) => {
let mut head = match head.to_canonical() {
Expr::Number(n) => n,
head => return Expr::Binary(head.into(), tails),
};
let mut tails = tails.into_iter();
for (op, tail) in &mut tails {
let tail = tail.to_canonical();
// If the canonical tail isn't a number, rebuild and return
let Expr::Number(tail) = tail else {
return Expr::Binary(
Box::new(head.into()),
iter::once((op, tail)).chain(tails).collect(),
);
};
head = match op {
BinOp::Mul => head.wrapping_mul(tail),
BinOp::Div => head.wrapping_div(tail),
BinOp::Rem => head.wrapping_rem(tail),
BinOp::Add => head.wrapping_add(tail),
BinOp::Sub => head.wrapping_sub(tail),
BinOp::Lsh => head.wrapping_shl(tail as u32),
BinOp::Rsh => head.wrapping_shr(tail as u32),
BinOp::And => head & tail,
BinOp::Xor => head ^ tail,
BinOp::Or => head | tail,
};
}
Expr::Number(head)
}
}
}
}
}

View File

@@ -1,15 +0,0 @@
// © 2023 John Breaux
//! A [`Comment`] stores the contents of a line comment, including the preceding `;` or `//`
use super::*;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Comment(pub String);
impl Parsable for Comment {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(Self(stream.expect(Type::Comment)?.lexeme().to_string()))
}
}
impl Display for Comment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) }
}

View File

@@ -1,90 +0,0 @@
// © 2023 John Breaux
//! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and
//! [Linker](crate::Linker)
use std::path::PathBuf;
use super::*;
use crate::lexer::token::OwnedToken;
// TODO: Parse each kind of *postprocessor* directive into an AST node
// - .org 8000: Directive::Org { base: Number }
// - .define ident tt... Directive::Define { } ; should this be in the AST? How do I put this
// in the AST?
// - .include "<filename>" Directive::Include { Root } ; should this include an entire AST in
// the AST?
// - .word 8000 Directive::Word(Number)
// - .words dead beef Directive::Words(Vec<u16>|Vec<Number>)
// - .byte ff Directive::Byte(Number)
// - .bytes de, ad, be, ef Directive::Bytes(Vec<u8>)
// - .string "string" Directive::String(String)
// - .ascii "string" Directive::Ascii(Vec<u8>)
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Directive {
Org(Number),
Define(Vec<OwnedToken>),
Include(Root),
Byte(Number),
Bytes(Vec<Number>),
Word(Number),
Words(Vec<Number>),
String(String),
Strings(Vec<String>),
}
impl Directive {}
impl Parsable for Directive {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let d = stream.expect(Type::Directive)?;
// match on the directive
Ok(match d.lexeme() {
".org" => Self::Org(Number::parse(p, stream)?),
".define" => {
let mut tokens = vec![];
loop {
match stream.peek().variant() {
Type::Endl | Type::EndOfFile => break,
_ => tokens.push(stream.next().unwrap_or_default().into()),
}
}
Self::Define(tokens)
}
".include" => {
// Try to get path
Self::Include(Parser::default().parse_file(&PathBuf::parse(p, stream)?)?)
}
".byte" => Self::Byte(Number::parse(p, stream)?),
".bytes" => Self::Bytes(Vec::<Number>::parse(p, stream)?),
".word" => Self::Word(Number::parse(p, stream)?),
".words" => Self::Words(Vec::<Number>::parse(p, stream)?),
".string" => Self::String(String::parse(p, stream)?),
".strings" => Self::Strings(Vec::<String>::parse(p, stream)?),
e => Err(ParseError::UnrecognizedDirective(e.into()))?,
})
}
}
impl Display for Directive {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Directive::Org(num) => write!(f, ".org {num}"),
Directive::Define(rep) => {
write!(f, ".define")?;
for t in rep {
write!(f, " {t}")?;
}
Ok(())
}
Directive::Include(r) => Display::fmt(r, f),
Directive::Byte(num) => write!(f, ".org {num}"),
Directive::Bytes(v) => write!(f, ".bytes {v:?}"),
Directive::Word(num) => write!(f, ".org {num}"),
Directive::Words(v) => write!(f, ".bytes {v:?}"),
Directive::String(s) => write!(f, ".string \"{s}\""),
Directive::Strings(s) => write!(f, ".string \"{s:?}\""),
}
}
}

View File

@@ -1,74 +0,0 @@
// © 2023 John Breauxs
use super::*;
use crate::lexer::error::LexError;
#[derive(Debug)]
pub enum ParseError {
/// Produced by [lexer](crate::lexer)
LexError(LexError),
/// Produced by [std::io]
IoError(std::io::Error),
/// Produced by [Number](Number)[::parse()](Parsable::parse())
/// when the parsed number contains digits too high for the specified radix
UnexpectedDigits(String, u32),
/// Produced by [Opcode](Opcode)[::parse()](Parsable::parse())
/// when the opcode passed lexing but did not match recognized opcodes.
///
/// This is always a lexer bug.
UnrecognizedOpcode(String),
/// Produced by [Directive](Directive)[::parse()](Parsable::parse())
/// when an unknown or unimplemented directive is used
UnrecognizedDirective(String),
/// Produced by [Register] when attempting to convert from a [str]
/// that isn't a register (pc, sp, sr, cg, or r{number})
NotARegister(String),
/// Produced by [Register] when the r{number} is outside the range 0-15
RegisterTooHigh(u16),
/// Produced by [SecondaryOperand] when the joke "secondary immediate" form
/// is out of range 0..=1
FatSecondaryImmediate(isize),
/// Produced by a [Number] too wide to fit in 16 bits
/// (outside the range `(-2^15) .. (2^16-1)` )
NumberTooWide(isize),
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
/// when the jump offset is outside the range (-0x3ff..0x3fc)
JumpedTooFar(isize),
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
JumpedOdd(isize),
}
impl From<LexError> for ParseError {
fn from(value: LexError) -> Self { Self::LexError(value) }
}
impl From<std::io::Error> for ParseError {
fn from(value: std::io::Error) -> Self { Self::IoError(value) }
}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LexError(error) => Display::fmt(error, f),
Self::IoError(error) => Display::fmt(error, f),
Self::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."),
Self::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"),
Self::UnrecognizedDirective(d) => write!(f, "{d} is not a directive."),
Self::NotARegister(reg) => write!(f, "{reg} is not a register"),
Self::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"),
Self::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"),
Self::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"),
Self::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"),
Self::JumpedOdd(num) => {
write!(f, "Jump targets only encode even numbers: {num} must not be odd.")
}
}
}
}
impl std::error::Error for ParseError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::LexError(e) => Some(e),
Self::IoError(e) => Some(e),
_ => None,
}
}
}

View File

@@ -1,26 +0,0 @@
// © 2023 John Breaux
//! An [Identifier] stores the hash of an identifier
use super::*;
use std::rc::Rc;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Identifier {
str: Rc<str>,
}
impl Identifier {
fn str<T: AsRef<str>>(s: T) -> Self { Self { str: s.as_ref().to_owned().into() } }
}
impl Parsable for Identifier {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let token = stream.expect(Type::Identifier)?;
match token.variant() {
Type::Identifier => Ok(Self::str(token.lexeme())),
_ => unreachable!("Expected identifier, got {token:?}"),
}
}
}
impl Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.str, f) }
}

View File

@@ -1,52 +0,0 @@
// © 2023 John Breaux
//! An [`Instruction`] contains the [`Opcode`] and [`Encoding`] information for a single msp430
//! instruction
//!
//!
//! Note: [`Opcode`] and [`Encoding`] are very tightly coupled, because they represent
//! interdependent parts of the same instruction. This is why [`Opcode`]::resolve() returns an
//! [`EncodingParser`] -- otherwise, there's an explosion of states that I can't really cope with on
//! my own. Really, there's about 9 valid classes of instruction, some of which are only used for
//! one or two of the MSP430's instructions.
use super::*;
pub mod encoding;
pub mod opcode;
/// Contains the [Opcode] and [Encoding] information for a single msp430 instruction
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Instruction(Opcode, Encoding);
impl Instruction {
pub fn opcode(&self) -> &Opcode { &self.0 }
pub fn encoding(&self) -> &Encoding { &self.1 }
/// Gets the Instruction as a [u16]
pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() }
/// Gets the [extension words]
pub fn ext_words(&self) -> [Option<u16>; 2] { self.1.extwords() }
}
impl Parsable for Instruction {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where
Self: Sized,
T: crate::TokenStream<'text>,
{
// parse an opcode
let opcode: Opcode = Opcode::parse(p, stream)?;
// resolve the opcode to a final opcode and an encoding
let (opcode, encoding) = opcode.resolve();
// parse the encoding
let encoding = encoding.parse(p, stream)?;
Ok(Self(opcode, encoding))
}
}
impl From<Instruction> for u16 {
fn from(value: Instruction) -> Self { value.word() }
}
impl Display for Instruction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) }
}

View File

@@ -1,81 +0,0 @@
// © 2023 John Breaux
//! An [`Encoding`] represents the set of arguments for a given [msp430 opcode](Opcode)
use super::*;
pub mod number;
pub mod register;
pub mod width;
pub mod jump_target;
pub mod primary_operand;
pub mod secondary_operand;
mod builder;
pub mod encoding_parser;
use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder};
use encoding_parser::EncodingParser;
/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html)
///
/// # Examples
/// ```rust
/// use msp430_asm::{preamble::*, parser::preamble::*};
/// // Create a token sequence
/// let asm_file = r".b 8000(r15)";
/// // Create a single-operand encoding parser
/// let single: EncodingParser = Encoding::single().end();
/// // Parse an Encoding from it
/// let encoding: Encoding = single
/// .parse(&Default::default(), &mut Tokenizer::new(asm_file).ignore_spaces())
/// .unwrap();
/// // Print the Encoding
/// println!("{encoding}");
/// ```
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Encoding {
Single { width: Width, dst: PrimaryOperand },
Jump { target: JumpTarget },
Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand },
}
impl Encoding {
/// Returns a builder for [Encoding::Single]
pub fn single() -> SingleBuilder { Default::default() }
/// Returns a builder for [Encoding::Jump]
pub fn jump() -> JumpBuilder { Default::default() }
/// Returns a builder for [Encoding::Double]
pub fn double() -> DoubleBuilder { Default::default() }
/// Returns a builder for [Encoding::Double]
///
/// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and
/// dst are the same
pub fn reflexive() -> ReflexiveBuilder { Default::default() }
///
pub fn word(&self) -> u16 {
match self {
Encoding::Single { width, dst } => u16::from(*width) | dst.mode() | dst.register() as u16,
Encoding::Jump { target } => target.word().unwrap_or_default(),
Encoding::Double { width, src, dst } => {
u16::from(*width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8)
}
}
}
/// Returns extwords for instruction
pub fn extwords(&self) -> [Option<u16>; 2] {
match self {
Encoding::Double { src, dst, .. } => [src.ext_word(), dst.ext_word()],
Encoding::Single { dst, .. } => [dst.ext_word(), None],
Encoding::Jump { .. } => [None, None],
}
}
}
impl Display for Encoding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Encoding::Single { width, dst } => write!(f, "{width} {dst}"),
Encoding::Jump { target } => write!(f, " {target}"),
Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"),
}
}
}

View File

@@ -1,76 +0,0 @@
// © 2023 John Breaux
//! Builder API for [`EncodingParser`]
use super::*;
#[derive(Debug, Default)]
pub struct SingleBuilder {
width: Option<Width>,
dst: Option<PrimaryOperand>,
}
impl SingleBuilder {
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
/// Sets the [PrimaryOperand] field
pub fn operand(mut self, dst: PrimaryOperand) -> Self {
self.dst = Some(dst);
self
}
/// Build
pub fn end(self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } }
}
#[derive(Debug, Default)]
pub struct JumpBuilder {
target: Option<JumpTarget>,
}
impl JumpBuilder {
pub fn target(mut self, target: JumpTarget) -> Self {
self.target = Some(target);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Jump { target: self.target } }
}
#[derive(Debug, Default)]
pub struct DoubleBuilder {
width: Option<Width>,
src: Option<PrimaryOperand>,
dst: Option<SecondaryOperand>,
}
impl DoubleBuilder {
/// Sets the [Width] field
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
/// Sets the [PrimaryOperand] field
pub fn src(mut self, src: PrimaryOperand) -> Self {
self.src = Some(src);
self
}
/// Sets the [PrimaryOperand] field
pub fn dst(mut self, dst: SecondaryOperand) -> Self {
self.dst = Some(dst);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } }
}
#[derive(Debug, Default)]
pub struct ReflexiveBuilder {
width: Option<Width>,
reg: Option<SecondaryOperand>,
}
impl ReflexiveBuilder {
/// Sets the [Width] field
pub fn width(mut self, width: bool) -> Self {
self.width = Some(width.into());
self
}
pub fn reg(mut self, reg: SecondaryOperand) -> Self {
self.reg = Some(reg);
self
}
pub fn end(self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } }
}

View File

@@ -1,37 +0,0 @@
// © 2023 John Breaux
//! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`]
use super::*;
#[derive(Clone, Debug)]
/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream]
pub enum EncodingParser {
Single { width: Option<Width>, dst: Option<PrimaryOperand> },
Jump { target: Option<JumpTarget> },
Double { width: Option<Width>, src: Option<PrimaryOperand>, dst: Option<SecondaryOperand> },
Reflexive { width: Option<Width>, reg: Option<SecondaryOperand> },
}
impl EncodingParser {
/// Constructs an [Encoding] from this [EncodingParser], filling holes
/// with the tokenstream
pub fn parse<'text, T>(self, p: &Parser, stream: &mut T) -> Result<Encoding, ParseError>
where T: crate::TokenStream<'text> {
Ok(match self {
Self::Single { width, dst } => Encoding::Single {
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
dst: if let Some(dst) = dst { dst } else { PrimaryOperand::parse(p, stream)? },
},
Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) },
Self::Double { width, src, dst } => Encoding::Double {
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
src: if let Some(src) = src { src } else { PrimaryOperand::parse(p, stream)? },
dst: if let Some(dst) = dst { dst } else { SecondaryOperand::parse(p, stream)? },
},
Self::Reflexive { width, reg } => {
let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default());
let reg = if let Some(reg) = reg { reg } else { SecondaryOperand::parse(p, stream)? };
Encoding::Double { width, src: reg.clone().into(), dst: reg }
}
})
}
}

View File

@@ -1,58 +0,0 @@
// © 2023 John Breaux
//! A [`JumpTarget`] contains the [pc-relative offset](Number) or [label](Identifier)
//! for a [Jump](Encoding::Jump) [instruction]
use super::*;
/// Contains the [pc-relative offset](Number) or [label](Identifier)
/// for a [Jump](Encoding::Jump) [Instruction]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum JumpTarget {
Number(Number),
Identifier(Identifier),
}
impl JumpTarget {
pub fn word(&self) -> Option<u16> {
match self {
JumpTarget::Number(n) => Some(u16::from(*n) & 0x3ff),
JumpTarget::Identifier(_) => None,
}
}
pub fn squish(value: isize) -> Result<u16, ParseError> {
match value {
i if i % 2 != 0 => Err(ParseError::JumpedOdd(i))?,
i if (-1024..=1022).contains(&(i - 2)) => Ok(((value >> 1) - 1) as u16 & 0x3ff),
i => Err(ParseError::JumpedTooFar(i))?,
}
}
pub fn unsquish(value: u16) -> isize { (value as isize + 1) << 1 }
}
impl Parsable for JumpTarget {
// - Identifier
// - Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
// Try to parse a number
if let Some(num) = Number::try_parse(p, stream)? {
Self::try_from(num)
} else {
// if that fails, try to parse an identifier instead
Ok(Self::Identifier(Identifier::parse(p, stream)?))
}
}
}
impl TryFrom<Number> for JumpTarget {
type Error = ParseError;
fn try_from(value: Number) -> Result<Self, Self::Error> { Ok(Self::Number(Self::squish(value.into())?.into())) }
}
impl Display for JumpTarget {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Number(num) => write!(f, "{:x}", Self::unsquish(u16::from(*num))),
Self::Identifier(id) => write!(f, "{id}"),
}
}
}

View File

@@ -1,81 +0,0 @@
// © 2023 John Breaux
//! A [`Number`] represents a 16-bit signed or unsigned word
use super::*;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Number(isize, u32); // (value, radix)
impl Parsable for Number {
// A number is:
// [Minus|Plus]? RadixMarker[Hex|Dec|Oct|Bin]? Number
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
use Type as Ty;
// The number is negative when it begins with a Minus, but Plus is also acceptable.
let negative = stream.expect_any_of([Ty::Minus, Ty::Plus]).map_or(false, |t| t.is_variant(Ty::Minus));
let radix = match stream
.expect_any_of([Ty::RadixMarkerHex, Ty::RadixMarkerDec, Ty::RadixMarkerOct, Ty::RadixMarkerBin])
.ok()
.map(|t| t.variant())
{
Some(Ty::RadixMarkerHex) => 16,
Some(Ty::RadixMarkerDec) => 10,
Some(Ty::RadixMarkerOct) => 8,
Some(Ty::RadixMarkerBin) => 2,
_ => p.radix,
};
let number = stream.expect(Ty::Number)?;
// TODO: Reintroduce error context
let number = isize::from_str_radix(number.lexeme(), radix)
.map_err(|_| ParseError::UnexpectedDigits(number.lexeme().into(), radix))?
* if negative { -1 } else { 1 };
// Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit)
Ok(Self(
if (-0x8000..0x10000).contains(&number) { number } else { Err(ParseError::NumberTooWide(number))? },
radix,
))
}
}
impl From<isize> for Number {
fn from(value: isize) -> Self { Self(value, 16) }
}
impl From<Number> for isize {
fn from(value: Number) -> Self { value.0 as Self }
}
impl From<u16> for Number {
fn from(value: u16) -> Self { Self(value as isize, 16) }
}
impl From<Number> for u16 {
fn from(value: Number) -> Self { value.0 as Self }
}
impl std::ops::Sub<isize> for Number {
type Output = Self;
fn sub(mut self, rhs: isize) -> Self::Output {
self.0 -= rhs;
self
}
}
impl std::ops::Shr<usize> for Number {
type Output = Self;
fn shr(mut self, rhs: usize) -> Self::Output {
self.0 >>= rhs;
self
}
}
impl std::fmt::Display for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.1 {
2 => std::fmt::Binary::fmt(&self.0, f),
8 => std::fmt::Octal::fmt(&self.0, f),
16 => std::fmt::LowerHex::fmt(&self.0, f),
_ => std::fmt::Display::fmt(&self.0, f),
}
}
}

View File

@@ -1,146 +0,0 @@
// © 2023 John Breaux
//! A [`PrimaryOperand`] contains the first [`Register`], addressing mode, and Extension
//! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [`Instruction`]
use super::*;
/// Contains the first [Register], addressing mode, and Extension Word for a
/// [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum PrimaryOperand {
Direct(Register),
Indirect(Register),
PostInc(Register),
Indexed(Register, Number),
Relative(Identifier),
Absolute(Number),
Immediate(Number),
Four,
Eight,
Zero,
One,
Two,
MinusOne,
}
impl PrimaryOperand {
/// Returns the mode bits
pub fn mode(&self) -> u16 {
use PrimaryOperand::*;
match self {
Direct(_) | Zero => 0,
Indexed(_, _) | Relative(_) | Absolute(_) | One => 1 << 4,
Indirect(_) | Two | Four => 2 << 4,
PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4,
}
}
/// Gets the register
pub fn register(&self) -> Register {
use PrimaryOperand::*;
match self {
Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r,
Immediate(_) | Relative(_) => Register::pc,
Absolute(_) | Four | Eight => Register::sr,
Zero | One | Two | MinusOne => Register::cg,
}
}
/// Gets the extension word, if present
pub fn ext_word(&self) -> Option<u16> {
use PrimaryOperand::*;
match self {
Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()),
_ => None,
}
}
}
impl Parsable for PrimaryOperand {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
// Try parsing as Register (Direct)
if let Some(r) = Register::try_parse(p, stream)? {
return Ok(Self::Direct(r));
}
// Try parsing as Number (Indexed)
if let Some(idx) = Number::try_parse(p, stream)? {
stream.expect(Type::LParen)?;
let reg = Register::parse(p, stream)?;
stream.expect(Type::RParen)?;
return Ok(Self::Indexed(reg, idx));
}
// Try parsing as Identifier (Relative, label mode)
if let Some(id) = Identifier::try_parse(p, stream)? {
return Ok(Self::Relative(id));
}
// Or directly match any of the valid prefix markers
// Register, Number, and Identifier are included here to make error messages clearer.
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
// (a failure condition)
let token = stream.expect_any_of([
Type::Indirect,
Type::Absolute,
Type::Immediate,
Type::Register,
Type::Number,
Type::Identifier,
])?;
Ok(match token.variant() {
Type::Indirect => {
let reg = Register::parse(p, stream)?;
match stream.expect(Type::Plus) {
Ok(_) => Self::PostInc(reg),
Err(_) => Self::Indirect(reg),
}
}
Type::Absolute => Self::Absolute(Number::parse(p, stream)?),
Type::Immediate => {
let number = Number::parse(p, stream)?;
match number.into() {
// There are two representations for the all-ones constant, since Number preserves
// signedness.
-1_isize | 0xffff => Self::MinusOne,
0 => Self::Zero,
1 => Self::One,
2 => Self::Two,
4 => Self::Four,
8 => Self::Eight,
_ => Self::Immediate(number),
}
}
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
})
}
}
impl From<SecondaryOperand> for PrimaryOperand {
fn from(value: SecondaryOperand) -> Self {
match value {
SecondaryOperand::Direct(r) => Self::Direct(r),
SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n),
SecondaryOperand::Absolute(n) => Self::Absolute(n),
SecondaryOperand::Relative(id) => Self::Relative(id),
SecondaryOperand::Zero => Self::Zero,
SecondaryOperand::One => Self::One,
}
}
}
impl Display for PrimaryOperand {
// Turn the operand back into a form which parses into the same type
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Direct(r) => Display::fmt(r, f),
Self::Indirect(r) => write!(f, "@{r}"),
Self::PostInc(r) => write!(f, "@{r}+"),
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
Self::Relative(id) => Display::fmt(id, f),
Self::Absolute(n) => write!(f, "&{n}"),
Self::Immediate(n) => write!(f, "#{n}"),
Self::Four => Display::fmt("#4", f),
Self::Eight => Display::fmt("#8", f),
Self::Zero => Display::fmt("#0", f),
Self::One => Display::fmt("#1", f),
Self::Two => Display::fmt("#2", f),
Self::MinusOne => Display::fmt("#-1", f),
}
}
}

View File

@@ -1,112 +0,0 @@
// © 2023 John Breaux
//! A [`Register`] represents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
use super::*;
use std::str::FromStr;
/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Register {
/// Program Counter
pc,
/// Stack Pointer
sp,
/// Status Register
sr,
/// Constant Generator
cg,
r4,
r5,
r6,
r7,
r8,
r9,
r10,
r11,
r12,
r13,
r14,
r15,
}
impl Parsable for Register {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
stream.expect(Type::Register)?.lexeme().parse()
}
}
impl From<Register> for u16 {
fn from(value: Register) -> Self { value as u16 }
}
impl TryFrom<u16> for Register {
type Error = ParseError;
fn try_from(value: u16) -> Result<Self, Self::Error> {
use Register::*;
Ok(match value {
0 => pc,
1 => sp,
2 => sr,
3 => cg,
4 => r4,
5 => r5,
6 => r6,
7 => r7,
8 => r8,
9 => r9,
10 => r10,
11 => r11,
12 => r12,
13 => r13,
14 => r14,
15 => r15,
_ => return Err(ParseError::RegisterTooHigh(value)),
})
}
}
impl FromStr for Register {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use Register::*;
match s {
"pc" => Ok(pc),
"sp" => Ok(sp),
"sr" => Ok(sr),
"cg" => Ok(cg),
_ => {
str::parse::<u16>(&s[1..]).map_err(|_| -> Self::Err { ParseError::NotARegister(s.into()) })?.try_into()
}
}
}
}
impl From<Register> for &str {
fn from(value: Register) -> Self {
use Register::*;
match value {
pc => "pc",
sp => "sp",
sr => "sr",
cg => "cg",
r4 => "r4",
r5 => "r5",
r6 => "r6",
r7 => "r7",
r8 => "r8",
r9 => "r9",
r10 => "r10",
r11 => "r11",
r12 => "r12",
r13 => "r13",
r14 => "r14",
r15 => "r15",
}
}
}
impl std::fmt::Display for Register {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) }
}

View File

@@ -1,105 +0,0 @@
// © 2023 John Breaux
//! A [`SecondaryOperand`] contains the second [`Register`], addressing mode, and Extension
//! Word for a [two-operand](Encoding::Double) [instruction]
use super::*;
/// The destination of a [Double](Encoding::Double)
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SecondaryOperand {
Direct(Register),
Indexed(Register, Number),
Relative(Identifier),
Absolute(Number),
// Joke encodings?
Zero,
One,
}
use SecondaryOperand as So;
impl SecondaryOperand {
pub fn mode(&self) -> u16 {
match self {
So::Direct(_) | So::Zero => 0,
So::Indexed(_, _) | So::Relative(_) | So::Absolute(_) | So::One => 1 << 7,
}
}
pub fn register(&self) -> Register {
use SecondaryOperand::*;
match self {
Direct(r) | Indexed(r, _) => *r,
Relative(_) => Register::pc,
Absolute(_) => Register::sr,
Zero | One => Register::cg,
}
}
/// This is the only way to have an extension word
pub fn ext_word(&self) -> Option<u16> {
use SecondaryOperand::*;
match self {
Indexed(_, w) | Absolute(w) => Some((*w).into()),
_ => None,
}
}
}
impl Parsable for SecondaryOperand {
// Separator
// - Register => Direct
// - Number => Indexed
// - OpenIdx
// - Register
// - CloseIdx
// - Absolute
// - Number
// - Immediate
// - Number == 0, 1
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: crate::TokenStream<'text> {
use SecondaryOperand::*;
stream.allow(Type::Separator);
// Try parsing as Register (Direct)
if let Some(r) = Register::try_parse(p, stream)? {
return Ok(Self::Direct(r));
}
// Try parsing as Number (Indexed)
if let Some(idx) = Number::try_parse(p, stream)? {
stream.expect(Type::LParen)?;
let reg = Register::parse(p, stream)?;
stream.expect(Type::RParen)?;
return Ok(Self::Indexed(reg, idx));
}
// Try parsing as Identifier (Relative, label mode)
if let Some(id) = Identifier::try_parse(p, stream)? {
return Ok(Self::Relative(id));
}
// Register, Number, and Identifier are included here to make error messages clearer.
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
// (a failure condition) but should not match a token
let token =
stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number, Type::Identifier])?;
Ok(match token.variant() {
Type::Absolute => Absolute(Number::parse(p, stream)?),
// TODO: Reintroduce error context
Type::Immediate => match Number::parse(p, stream)?.into() {
0 => Zero,
1 => One,
n => Err(ParseError::FatSecondaryImmediate(n))?,
},
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
})
}
}
impl Display for SecondaryOperand {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Direct(r) => Display::fmt(r, f),
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
Self::Relative(id) => Display::fmt(id, f),
Self::Absolute(n) => write!(f, "&{n}"),
Self::Zero => Display::fmt("#0", f),
Self::One => Display::fmt("#1", f),
}
}
}

View File

@@ -1,32 +0,0 @@
// © 2023 John Breaux
//! A [`Width`] represents whether an instruction operates on whole words or bytes
use super::*;
/// Represents an instruction's operand width.
///
/// Evaluates to false when instruction takes word-sized operands, or true when
/// instruction takes byte-sized operands
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Width(bool);
impl Parsable for Width {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else {
return Ok(Self(false));
};
Ok(Self(token.is_variant(Type::ByteWidth)))
}
}
impl From<Width> for u16 {
fn from(value: Width) -> Self { (value.0 as Self) << 6 }
}
impl From<Width> for bool {
fn from(value: Width) -> Self { value.0 }
}
impl From<bool> for Width {
fn from(value: bool) -> Self { Width(value) }
}
impl std::fmt::Display for Width {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) }
}

View File

@@ -1,261 +0,0 @@
// © 2023 John Breaux
//! An [`Opcode`] encodes an msp430 operation
use super::*;
use std::str::FromStr;
/// Opcode from the [MSPGCC Manual][1]
///
/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will
/// extract from a [TokenStream] only the required arguments for that call.
///
/// [1]: https://mspgcc.sourceforge.net/manual/x223.html
#[allow(clippy::identity_op)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Opcode {
// "Emulated" opcodes
Nop,
Pop,
Br,
Ret,
Clrc,
Setc,
Clrz,
Setz,
Clrn,
Setn,
Dint,
Eint,
Rla,
Rlc,
Inv,
Clr,
Tst,
Dec,
Decd,
Inc,
Incd,
Adc,
Dadc,
Sbc,
// Single
Rrc = 0x1000 | 0 << 7,
Swpb = 0x1000 | 1 << 7,
Rra = 0x1000 | 2 << 7,
Sxt = 0x1000 | 3 << 7,
Push = 0x1000 | 4 << 7,
Call = 0x1000 | 5 << 7,
Reti = 0x1000 | 6 << 7,
// Jump
Jnz = 0x2000 | 0 << 10,
Jz = 0x2000 | 1 << 10,
Jnc = 0x2000 | 2 << 10,
Jc = 0x2000 | 3 << 10,
Jn = 0x2000 | 4 << 10,
Jge = 0x2000 | 5 << 10,
Jl = 0x2000 | 6 << 10,
Jmp = 0x2000 | 7 << 10,
// Double
Mov = 0x4000,
Add = 0x5000,
Addc = 0x6000,
Subc = 0x7000,
Sub = 0x8000,
Cmp = 0x9000,
Dadd = 0xa000,
Bit = 0xb000,
Bic = 0xc000,
Bis = 0xd000,
Xor = 0xe000,
And = 0xf000,
}
impl Opcode {
/// Resolve an Opcode into an [Opcode] and an [EncodingParser]
pub fn resolve(self) -> (Opcode, EncodingParser) {
use super::Encoding as Enc;
use Register as Reg;
use {PrimaryOperand as Src, SecondaryOperand as Dst};
match self {
Self::Rrc | Self::Rra | Self::Push => (self, Enc::single().end()),
// these instructions do not take a width specifier (though they may still behave properly)
Self::Swpb | Self::Sxt | Self::Call => (self, Enc::single().width(false).end()),
// `reti` does not take any operands.
Self::Reti => (self, Enc::single().operand(Src::Direct(Reg::pc)).end()),
Self::Jnz | Self::Jz | Self::Jnc | Self::Jc | Self::Jn | Self::Jge | Self::Jl | Self::Jmp => {
(self, Enc::jump().end())
}
Self::Mov
| Self::Add
| Self::Addc
| Self::Subc
| Self::Sub
| Self::Cmp
| Self::Dadd
| Self::Bit
| Self::Bic
| Self::Bis
| Self::Xor
| Self::And => (self, Enc::double().end()),
Self::Nop => (Self::Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()),
Self::Pop => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).end()),
Self::Br => (Self::Mov, Enc::double().dst(Dst::Direct(Reg::pc)).end()),
Self::Ret => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).dst(Dst::Direct(Reg::pc)).end()),
Self::Clrc => (Self::Bic, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
Self::Setc => (Self::Bis, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
Self::Clrz => (Self::Bic, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
Self::Setz => (Self::Bis, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
Self::Clrn => (Self::Bic, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
Self::Setn => (Self::Bis, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
Self::Dint => (Self::Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
Self::Eint => (Self::Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
Self::Rla => (Self::Add, Enc::reflexive().end()),
Self::Rlc => (Self::Addc, Enc::reflexive().end()),
Self::Inv => (Self::Xor, Enc::double().src(Src::MinusOne).end()),
Self::Clr => (Self::Mov, Enc::double().src(Src::Zero).end()),
Self::Tst => (Self::Cmp, Enc::double().src(Src::Zero).end()),
Self::Dec => (Self::Sub, Enc::double().src(Src::One).end()),
Self::Decd => (Self::Sub, Enc::double().src(Src::Two).end()),
Self::Inc => (Self::Add, Enc::double().src(Src::One).end()),
Self::Incd => (Self::Add, Enc::double().src(Src::Two).end()),
Self::Adc => (Self::Addc, Enc::double().src(Src::Zero).end()),
Self::Dadc => (Self::Dadd, Enc::double().src(Src::Zero).end()),
Self::Sbc => (Self::Subc, Enc::double().src(Src::Zero).end()),
}
}
}
impl Parsable for Opcode {
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
// TODO: Reintroduce error context
stream.expect(Type::Insn)?.parse()
}
}
impl FromStr for Opcode {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
//TODO: Reduce allocations here?
let s = s.to_ascii_lowercase();
Ok(match s.as_str() {
"rrc" => Self::Rrc,
"swpb" => Self::Swpb,
"rra" => Self::Rra,
"sxt" => Self::Sxt,
"push" => Self::Push,
"call" => Self::Call,
"reti" => Self::Reti,
"jne" | "jnz" => Self::Jnz,
"jeq" | "jz" => Self::Jz,
"jnc" | "jlo" => Self::Jnc,
"jc" | "jhs" => Self::Jc,
"jn" => Self::Jn,
"jge" => Self::Jge,
"jl" => Self::Jl,
"jmp" => Self::Jmp,
"mov" => Self::Mov,
"add" => Self::Add,
"addc" => Self::Addc,
"subc" => Self::Subc,
"sub" => Self::Sub,
"cmp" => Self::Cmp,
"dadd" => Self::Dadd,
"bit" => Self::Bit,
"bic" => Self::Bic,
"bis" => Self::Bis,
"xor" => Self::Xor,
"and" => Self::And,
"nop" => Self::Nop,
"pop" => Self::Pop,
"br" => Self::Br,
"ret" => Self::Ret,
"clrc" => Self::Clrc,
"setc" => Self::Setc,
"clrz" => Self::Clrz,
"setz" => Self::Setz,
"clrn" => Self::Clrn,
"setn" => Self::Setn,
"dint" => Self::Dint,
"eint" => Self::Eint,
"rla" => Self::Rla,
"rlc" => Self::Rlc,
"inv" => Self::Inv,
"clr" => Self::Clr,
"tst" => Self::Tst,
"dec" => Self::Dec,
"decd" => Self::Decd,
"inc" => Self::Inc,
"incd" => Self::Incd,
"adc" => Self::Adc,
"dadc" => Self::Dadc,
"sbc" => Self::Sbc,
_ => Err(ParseError::UnrecognizedOpcode(s))?,
})
}
}
impl Display for Opcode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
Self::Nop => "nop",
Self::Pop => "pop",
Self::Br => "br",
Self::Ret => "ret",
Self::Clrc => "clrc",
Self::Setc => "setc",
Self::Clrz => "clrz",
Self::Setz => "setz",
Self::Clrn => "clrn",
Self::Setn => "setn",
Self::Dint => "dint",
Self::Eint => "eint",
Self::Rla => "rla",
Self::Rlc => "rlc",
Self::Inv => "inv",
Self::Clr => "clr",
Self::Tst => "tst",
Self::Dec => "dec",
Self::Decd => "decd",
Self::Inc => "inc",
Self::Incd => "incd",
Self::Adc => "adc",
Self::Dadc => "dadc",
Self::Sbc => "sbc",
Self::Rrc => "rrc",
Self::Swpb => "swpb",
Self::Rra => "rra",
Self::Sxt => "sxt",
Self::Push => "push",
Self::Call => "call",
Self::Reti => "reti",
Self::Jnz => "jnz",
Self::Jz => "jz",
Self::Jnc => "jnc",
Self::Jc => "jc",
Self::Jn => "jn",
Self::Jge => "jge",
Self::Jl => "jl",
Self::Jmp => "jmp",
Self::Mov => "mov",
Self::Add => "add",
Self::Addc => "addc",
Self::Subc => "subc",
Self::Sub => "sub",
Self::Cmp => "cmp",
Self::Dadd => "dadd",
Self::Bit => "bit",
Self::Bic => "bic",
Self::Bis => "bis",
Self::Xor => "xor",
Self::And => "and",
}
)
}
}

View File

@@ -1,21 +0,0 @@
// © 2023 John Breaux
//! The definition of a label
use super::*;
/// The definition of a label
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Label(pub Identifier);
impl Parsable for Label {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(Self(Identifier::parse(p, stream).and_then(|t| {
stream.require(Type::Label)?;
Ok(t)
})?))
}
}
impl Display for Label {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) }
}

View File

@@ -1,72 +0,0 @@
// © 2023 John Breaux
//! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same
//! document line will be treated as if they took up multiple [`Line`s](Line).
//!
//! A line contains one of:
//! - [`Label`]
//! - [`Instruction`]
//! - [`Directive`]
//! - [`Comment`]
//! - [Nothing](Line::Empty)
use super::*;
/// A line contains any one of:
/// - [`Label`] (definition)
/// - [`Instruction`]
/// - [`Directive`]
/// - [`Comment`]
/// - Nothing at all
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Line {
Empty,
Insn(Instruction),
Comment(Comment),
Directive(Directive),
Label(Label),
EndOfFile, // Expected end of file
}
impl Parsable for Line {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(
match stream
.peek_expect_any_of([
Type::Endl,
Type::Insn,
Type::Comment,
Type::Directive,
Type::Identifier,
Type::EndOfFile,
])?
.variant()
{
Type::Endl => {
stream.next();
Self::Empty
}
Type::Insn => Self::Insn(Instruction::parse(p, stream)?),
Type::Comment => Self::Comment(Comment::parse(p, stream)?),
Type::Directive => Self::Directive(Directive::parse(p, stream)?),
Type::Identifier => Self::Label(Label::parse(p, stream)?),
Type::EndOfFile => {
stream.next();
Self::EndOfFile
}
_ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"),
},
)
}
}
impl Display for Line {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Empty => writeln!(f, "\n"),
Self::Label(arg0) => Display::fmt(arg0, f),
Self::Insn(arg0) => Display::fmt(arg0, f),
Self::Directive(arg0) => Display::fmt(arg0, f),
Self::Comment(arg0) => Display::fmt(arg0, f),
Self::EndOfFile => write!(f, "; End of file."),
}
}
}

View File

@@ -1,85 +0,0 @@
// © 2023 John Breaux
//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html)
use super::*;
/// Parses tokens from [stream](TokenStream) into Self node
pub trait Parsable {
/// Parses tokens from [TokenStream](TokenStream) into Self nodes
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where
Self: Sized,
T: TokenStream<'text>;
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
///
/// Masks failed expectations.
fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, ParseError>
where
Self: Sized,
T: TokenStream<'text>,
{
match Self::parse(p, stream) {
Ok(some) => Ok(Some(some)),
Err(ParseError::LexError(_)) => Ok(None),
Err(e) => Err(e),
}
}
fn parse_and<'text, T, R>(
p: &Parser,
stream: &mut T,
f: fn(p: &Parser, &mut T) -> R,
) -> Result<(Self, R), ParseError>
where
Self: Sized,
T: TokenStream<'text>,
{
Ok((Self::parse(p, stream)?, f(p, stream)))
}
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
///
/// Returns [`Self::default()`](Default::default()) on error
fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self
where
Self: Sized + Default,
T: TokenStream<'text>,
{
Self::parse(p, stream).unwrap_or_default()
}
}
macro_rules! parsable_str_types {
($($t:ty),*$(,)?) => {$(
impl Parsable for $t {
fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into())
}
}
)*};
}
use std::{path::PathBuf, rc::Rc};
parsable_str_types![String, Rc<str>, Box<str>, PathBuf];
/// Vectors of arbitrary parsables are cool
impl<P: Parsable> Parsable for Vec<P> {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
// [dead beef]
// [A, B,]
// [c d e f]
// [ something
// else ]
stream.require(Type::LBracket)?;
stream.allow(Type::Endl);
let mut out = vec![];
while let Some(t) = P::try_parse(p, stream)? {
out.push(t);
stream.allow(Type::Separator);
stream.allow(Type::Endl);
}
stream.require(Type::RBracket)?;
Ok(out)
}
}

View File

@@ -1,51 +0,0 @@
use std::path::{Path, PathBuf};
// © 2023 John Breaux
use super::*;
/// Contains the entire AST
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Root(Option<PathBuf>, Vec<(usize, Line)>);
// pub struct Root { pub path: PathBuf, pub lines: Vec<Line> }
impl Root {
pub fn file(&self) -> Option<&Path> { self.0.as_deref() }
pub(crate) fn set_file(mut self, path: PathBuf) -> Self {
self.0 = Some(path);
self
}
pub fn lines(&self) -> &[(usize, Line)] { &self.1 }
}
impl Parsable for Root {
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
where T: TokenStream<'text> {
let mut lines = vec![];
loop {
let number = stream.context().line();
match Line::parse(p, stream)? {
Line::EndOfFile => break,
line => lines.push((number, line)),
}
}
Ok(Root(None, lines))
}
}
impl Display for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (num, line) in &self.1 {
f.pad(&format!("{num:3}: {line} "))?;
}
Ok(())
}
}
impl Debug for Root {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for line in self.0.iter() {
Debug::fmt(line, f)?;
}
Ok(())
}
}

87
src/preprocessor.rs Normal file
View File

@@ -0,0 +1,87 @@
// © 2023-2024 John Breaux
use crate::{
lexer::{
token::{Token, TokenKind as Kind},
Lexer,
},
util::Span,
};
use std::collections::{HashMap, VecDeque};
#[derive(Clone, Debug)]
pub struct Preprocessor<'t> {
lexer: Lexer<'t>,
buf: VecDeque<Token<'t>>,
defn: HashMap<&'t str, Vec<Token<'t>>>,
/// Location for injected tokens
pos: Span<usize>,
}
impl<'t> Preprocessor<'t> {
pub fn new(text: &'t str) -> Self {
Self {
lexer: Lexer::new(text),
buf: Default::default(),
defn: Default::default(),
pos: Default::default(),
}
}
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
Self { lexer, buf: Default::default(), defn: Default::default(), pos: Default::default() }
}
pub fn scan(&mut self) -> Option<Token<'t>> {
self.buf.pop_front().or_else(|| self.next()).inspect(|t| self.pos = t.pos)
}
pub fn start(&self) -> usize {
self.lexer.location()
}
/// Grabs a token from the lexer, and attempts to match its lexeme
fn next(&mut self) -> Option<Token<'t>> {
let token = self.lexer.scan()?;
if let Some(tokens) = self.defn.get(token.lexeme) {
self.buf.extend(tokens.iter().copied().map(|mut t| {
t.pos = self.pos;
t
}));
return self.scan();
} else {
match token.kind {
Kind::Directive => self.directive(token),
Kind::Newline => return self.scan(),
_ => {}
}
Some(token)
}
}
/// Passes a token through while parsing a directive
fn tee(&mut self) -> Option<Token<'t>> {
let token = self.lexer.scan()?;
self.buf.push_back(token);
// self.buf.push_back(token);
Some(token)
}
/// Parses and executes a directive
pub fn directive(&mut self, token: Token<'t>) {
if ".define" == token.lexeme {
self.define()
}
}
pub fn define(&mut self) {
let Some(key) = self.tee() else {
return;
};
let mut value = vec![];
while let Some(token) = self.tee() {
match token.kind {
Kind::Comment => {
self.buf.push_back(token);
break;
}
Kind::Newline => break,
_ => value.push(token),
}
}
self.defn.insert(key.lexeme, value);
}
}