v0.3.0: Total overhaul
- Everything has been rewritten - Modularity is improved somewhat - No dependency injection in preprocessor/parser, though - There are now early and late constant evaluation engines - This engine allows for by-value access to already-assembled code - Performs basic math operations, remainder, bitwise logic, bit shifts, negation, and bit inversion - Also allows for indexing into already-generated code using pointer-arithmetic syntax: `*(&main + 10)`. This is subject to change? It's clunky, and only allows word-aligned access. However, this rewrite is taking far too long, so I'll call the bikeshedding here. - Pretty sure this constant evaluation is computationally equivalent to Deadfish?
This commit is contained in:
574
src/assembler.rs
574
src/assembler.rs
@@ -1,197 +1,423 @@
|
||||
// © 2023 John Breaux
|
||||
//! Traverses an AST, assembling instructions.
|
||||
//!
|
||||
//! [Assembler] carries *some* state
|
||||
//! Assembles a binary using the given [AST](crate::parser::ast)
|
||||
|
||||
use crate::parser::preamble::*;
|
||||
use error::AssemblyError;
|
||||
use error::{AResult, ErrorKind::*};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub mod error;
|
||||
use crate::{assembler::canonical::Canonicalize, lexer::token, parser::ast::*, util::Span};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum IdentType {
|
||||
Word,
|
||||
Jump,
|
||||
}
|
||||
use self::error::{Error, ErrorKind};
|
||||
|
||||
/// Takes in an AST's [Root], and outputs a sequence of bytes
|
||||
/// Assembles a binary using the given [Assemble]-able item
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct Assembler {
|
||||
out: Vec<u16>,
|
||||
/// A map from Labels' [Identifier]s to their location in the binary
|
||||
labels: HashMap<Identifier, usize>,
|
||||
/// A list of all referenced [Identifier]s in the binary, and their locations
|
||||
identifiers: Vec<(usize, Identifier, IdentType)>,
|
||||
pub struct Assembler<'t> {
|
||||
/// The assembled output
|
||||
output: Vec<u16>,
|
||||
/// Table of labels, for backpatching
|
||||
labels: HashMap<&'t str, usize>,
|
||||
/// Backpatch table for jump instructions
|
||||
jump_queue: Vec<(usize, &'t str)>,
|
||||
/// Backpatch table for immediate values
|
||||
expr_queue: Vec<(usize, Expr<'t>)>,
|
||||
/// Base address from .org directives
|
||||
org_base: usize,
|
||||
/// Last seen index in input
|
||||
loc: Span<usize>,
|
||||
}
|
||||
|
||||
impl Assembler {
|
||||
pub fn assemble(r: &Root) -> Result<Vec<u16>, AssemblyError> {
|
||||
let mut out = Self::default();
|
||||
out.visit_root(r)?;
|
||||
Ok(out.out)
|
||||
impl<'t> Assembler<'t> {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
pub fn load(&mut self, r: &Root) -> Result<(), AssemblyError> { self.visit_root(r) }
|
||||
pub fn out(self) -> Vec<u16> { self.out }
|
||||
|
||||
fn last_mut(&mut self) -> Result<&mut u16, AssemblyError> { self.out.last_mut().ok_or(AssemblyError::EmptyBuffer) }
|
||||
fn push_default(&mut self) -> usize {
|
||||
self.out.push(Default::default());
|
||||
self.out.len() - 1
|
||||
pub fn assemble<T: Assemble<'t>>(&mut self, t: &T) -> AResult<&mut Self> {
|
||||
t.assemble_in(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Assembler {
|
||||
/// Visits the [Root] node of a parse tree
|
||||
fn visit_root(&mut self, r: &Root) -> Result<(), AssemblyError> {
|
||||
// Visit the entire tree
|
||||
for (num, line) in r.lines() {
|
||||
self.visit_line(line).map_err(|e| e.ctx(r.file().unwrap_or(Path::new("stdin")), *num))?;
|
||||
/// Gets the address of a label
|
||||
pub fn addrof(&self, name: &str) -> Option<u16> {
|
||||
self.labels.get(name).map(|v| *v as u16)
|
||||
}
|
||||
/// Gets the value at a label
|
||||
pub fn valueof(&self, name: &str) -> Option<u16> {
|
||||
self.output.get(self.addrof(name)? as usize).copied()
|
||||
}
|
||||
fn push(&mut self, word: u16) {
|
||||
self.output.push(word)
|
||||
}
|
||||
fn error(&self, kind: ErrorKind) -> Error {
|
||||
Error { span: self.loc, kind }
|
||||
}
|
||||
/// Backpatches everything, and yoinks the output buffer.
|
||||
pub fn out(&mut self) -> AResult<Vec<u16>> {
|
||||
// Resolve jumps
|
||||
for (idx, key) in &self.jump_queue {
|
||||
// eprintln!("Patching jump at {idx} with key {key}");
|
||||
match self.labels.get(key).map(|addr| addr.wrapping_sub(*idx as _) as i16) {
|
||||
None => Err(self.error(UndefinedLabel(key.to_string())))?,
|
||||
Some(value @ -0x3ff..=0x3fc) => self.output[*idx] |= (value - 1) as u16 & 0x3ff,
|
||||
Some(value) => Err(self.error(LongJump(value)))?,
|
||||
}
|
||||
}
|
||||
// Link identifiers
|
||||
for (idx, id, id_type) in self.identifiers.iter() {
|
||||
let Some(&num) = self.labels.get(id) else { return Err(AssemblyError::UnresolvedIdentifier(id.clone())) };
|
||||
let offset = (num as isize - *idx as isize) * 2;
|
||||
*self.out.get_mut(*idx).expect("idx should be a valid index into out") |= match id_type {
|
||||
IdentType::Word => offset as u16,
|
||||
IdentType::Jump => JumpTarget::squish(offset)?,
|
||||
};
|
||||
// Resolve immediates through late expression evaluation.
|
||||
for (idx, expr) in &self.expr_queue {
|
||||
// eprintln!("Patching immediate at {idx} with expression {expr:?}");
|
||||
self.output[*idx] = self.eval(expr)?;
|
||||
}
|
||||
let out = std::mem::take(&mut self.output);
|
||||
*self = Default::default();
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
pub fn add_label(&mut self, label: &'t str) -> AResult<()> {
|
||||
if *self.labels.entry(label).or_insert(self.output.len()) != self.output.len() {
|
||||
Err(self.error(RedefinedLabel(label.into())))?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// visit a [Line]
|
||||
fn visit_line(&mut self, line: &Line) -> Result<(), AssemblyError> {
|
||||
match line {
|
||||
Line::Insn(insn) => self.visit_instruction(insn),
|
||||
Line::Label(label) => self.visit_label(label),
|
||||
Line::Directive(d) => self.visit_directive(d),
|
||||
_ => Ok(()),
|
||||
/// Appends an expr as an extword, deferring its calculation for later
|
||||
pub fn defer_expr(&mut self, e: Expr<'t>) {
|
||||
self.expr_queue.push((self.output.len(), e));
|
||||
self.push(0);
|
||||
}
|
||||
/// Defers resolution of a jump label until output time
|
||||
/// The jump label will be later resolved to the NEXT word.
|
||||
pub fn defer_jump(&mut self, label: &'t str) {
|
||||
self.jump_queue.push((self.output.len(), label))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Assemble<'t> {
|
||||
fn assemble(&self) -> AResult<Vec<u16>> {
|
||||
self.assemble_in(&mut Default::default())?.out()
|
||||
}
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>>;
|
||||
}
|
||||
|
||||
impl<'t> Assemble<'t> for Statements<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
for stmt in &self.stmts {
|
||||
stmt.assemble_in(a)?;
|
||||
}
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Statement<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
match self {
|
||||
Statement::Label(label) => a.add_label(label).map(|_| a),
|
||||
Statement::Insn(i) => i.assemble_in(a),
|
||||
Statement::Directive(d) => d.assemble_in(a),
|
||||
Statement::Comment(_) => Ok(a),
|
||||
}
|
||||
}
|
||||
|
||||
/// Visits a [Directive]
|
||||
fn visit_directive(&mut self, node: &Directive) -> Result<(), AssemblyError> {
|
||||
match node {
|
||||
Directive::Org(_) => todo!("{node}"),
|
||||
Directive::Define(..) => (),
|
||||
Directive::Include(r) => self.visit_root(r)?,
|
||||
Directive::Byte(word) | Directive::Word(word) => self.out.push((*word).into()),
|
||||
Directive::Bytes(words) | Directive::Words(words) => {
|
||||
for word in words {
|
||||
self.out.push((*word).into());
|
||||
}
|
||||
impl<'t> Assemble<'t> for Directive<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
match self {
|
||||
Directive::Define(_) => {}
|
||||
Directive::Org(base) => a.org_base = a.eval(base)? as usize,
|
||||
Directive::Word(expr) => a.defer_expr(*expr.clone()),
|
||||
Directive::Words(exprs) => {
|
||||
for expr in exprs {
|
||||
a.defer_expr(expr.clone())
|
||||
}
|
||||
}
|
||||
Directive::String(s) => self.visit_string(s)?,
|
||||
Directive::Strings(strs) => {
|
||||
for s in strs {
|
||||
self.visit_string(s)?;
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a [Label]
|
||||
fn visit_label(&mut self, node: &Label) -> Result<(), AssemblyError> {
|
||||
// Register the label
|
||||
match self.labels.insert(node.0.to_owned(), self.out.len()) {
|
||||
Some(_) => Err(AssemblyError::RedefinedLabel(node.0.to_owned())),
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Visits an [Instruction]
|
||||
fn visit_instruction(&mut self, insn: &Instruction) -> Result<(), AssemblyError> {
|
||||
self.push_default();
|
||||
self.visit_opcode(insn.opcode())?;
|
||||
self.visit_encoding(insn.encoding())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits an [Opcode]
|
||||
fn visit_opcode(&mut self, node: &Opcode) -> Result<(), AssemblyError> {
|
||||
*self.last_mut()? |= *node as u16;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits an [Encoding]
|
||||
fn visit_encoding(&mut self, node: &Encoding) -> Result<(), AssemblyError> {
|
||||
*self.last_mut()? |= node.word();
|
||||
match node {
|
||||
Encoding::Single { dst, .. } => {
|
||||
self.visit_primary_operand(dst)?;
|
||||
}
|
||||
Encoding::Jump { target } => {
|
||||
self.visit_jump_target(target)?;
|
||||
}
|
||||
Encoding::Double { src, dst, .. } => {
|
||||
self.visit_primary_operand(src)?;
|
||||
self.visit_secondary_operand(dst)?;
|
||||
Directive::String(str) => {
|
||||
str.assemble_in(a)?;
|
||||
}
|
||||
}
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Assemble<'t> for &'t str {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
for chunk in self.as_bytes().chunks(2) {
|
||||
match chunk.len() {
|
||||
0 => a.push(0),
|
||||
1 => {
|
||||
a.push(chunk[0] as u16);
|
||||
return Ok(a);
|
||||
}
|
||||
2 => a.push((chunk[1] as u16) << 8 | chunk[0] as u16),
|
||||
n => unreachable!("expected chunks of length 2, got length {n}"),
|
||||
}
|
||||
}
|
||||
a.push(0);
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Assemble<'t> for Instruction<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
let Self { span, kind } = self;
|
||||
a.loc = *span;
|
||||
kind.assemble_in(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for InstructionKind<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
match self {
|
||||
InstructionKind::NoEm(v) => v.assemble_in(a),
|
||||
InstructionKind::OneEm(v) => v.assemble_in(a),
|
||||
InstructionKind::OneArg(v) => v.assemble_in(a),
|
||||
InstructionKind::TwoArg(v) => v.assemble_in(a),
|
||||
InstructionKind::Jump(v) => v.assemble_in(a),
|
||||
InstructionKind::Reti(v) => v.assemble_in(a),
|
||||
InstructionKind::Br(v) => v.assemble_in(a),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for NoEm {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
eprintln!(
|
||||
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
|
||||
);
|
||||
self.clone().to_canonical().assemble_in(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for OneEm<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
eprintln!(
|
||||
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
|
||||
);
|
||||
self.clone().to_canonical().assemble_in(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for OneArg<'t> {
|
||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
||||
/// [ 0 0 0 1 0 0 [op:3 ] bw [Ad ] [dst_reg:4] ]
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
let Self { opcode, width, src } = self;
|
||||
let (src_reg, src_mode, src_ext) = source(src);
|
||||
a.push(
|
||||
0b000100 << 10 | one_arg(*opcode) << 7 | (*width as u16) << 6 | src_mode << 4 | src_reg,
|
||||
);
|
||||
if let Some(expr) = src_ext {
|
||||
a.defer_expr(expr)
|
||||
}
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for TwoArg<'t> {
|
||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
||||
/// [ [opcode:4 ] [src_reg:4] Ad bw [As ] [dst_reg:4] ]
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
let Self { opcode, width, src, dst } = self;
|
||||
let (src_reg, src_mode, src_ext) = source(src);
|
||||
let (dst_reg, dst_mode, dst_ext) = destination(dst);
|
||||
a.push(
|
||||
two_arg(*opcode) << 12
|
||||
| src_reg << 8
|
||||
| dst_mode << 7
|
||||
| (*width as u16) << 6
|
||||
| src_mode << 4
|
||||
| dst_reg,
|
||||
);
|
||||
|
||||
if let Some(expr) = src_ext {
|
||||
a.defer_expr(expr)
|
||||
}
|
||||
if let Some(expr) = dst_ext {
|
||||
a.defer_expr(expr)
|
||||
}
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Jump<'t> {
|
||||
/// [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
|
||||
/// [ 0 0 1 [cond:3] +- [word_offset:10 ] ]
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
let Self { opcode, dst } = self;
|
||||
let word = 1 << 13
|
||||
| jump(*opcode) << 10
|
||||
| match *dst {
|
||||
JumpDst::Rel(value) if value & 1 == 1 => return Err(a.error(OddJump(value))),
|
||||
JumpDst::Rel(value) if !(-0x3fe..=0x400).contains(&value) => {
|
||||
return Err(a.error(LongJump(value)))
|
||||
}
|
||||
JumpDst::Rel(value) => (value - 1) as u16 >> 1 & 0x3ff,
|
||||
JumpDst::Label(label) => {
|
||||
a.defer_jump(label);
|
||||
0
|
||||
}
|
||||
} & 0x3ff;
|
||||
a.push(word);
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Reti {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
a.output.push(0b0001_0011_0000_0000);
|
||||
Ok(a)
|
||||
}
|
||||
}
|
||||
impl<'t> Assemble<'t> for Br<'t> {
|
||||
fn assemble_in<'a>(&self, a: &'a mut Assembler<'t>) -> AResult<&'a mut Assembler<'t>> {
|
||||
eprintln!(
|
||||
"Warning: directly assembling a noncanonical instruction may lead to unwanted overhead"
|
||||
);
|
||||
self.clone().to_canonical().assemble_in(a)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one_arg(opcode: token::OneArg) -> u16 {
|
||||
opcode as u16
|
||||
}
|
||||
|
||||
pub fn two_arg(opcode: token::TwoArg) -> u16 {
|
||||
opcode as u16 + 4
|
||||
}
|
||||
|
||||
pub fn jump(opcode: token::Jump) -> u16 {
|
||||
use token::Jump;
|
||||
match opcode {
|
||||
Jump::Jne | Jump::Jnz => 0,
|
||||
Jump::Jeq | Jump::Jz => 1,
|
||||
Jump::Jnc | Jump::Jlo => 2,
|
||||
Jump::Jc | Jump::Jhs => 3,
|
||||
Jump::Jn => 4,
|
||||
Jump::Jge => 5,
|
||||
Jump::Jl => 6,
|
||||
Jump::Jmp => 7,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a tuple of (Reg, AddrMode, extword)
|
||||
pub fn source<'t>(src: &Src<'t>) -> (u16, u16, Option<Expr<'t>>) {
|
||||
use SrcSpecial::*;
|
||||
match src {
|
||||
Src::Special(Four) => (2, 2, None),
|
||||
Src::Special(Eight) => (2, 3, None),
|
||||
Src::Special(Zero) => (3, 0, None),
|
||||
Src::Special(One) => (3, 1, None),
|
||||
Src::Special(Two) => (3, 2, None),
|
||||
Src::Special(NegOne) => (3, 3, None),
|
||||
Src::Immediate(e) => (0, 3, Some(*e.clone())),
|
||||
Src::Absolute(e) => (2, 1, Some(*e.clone())),
|
||||
Src::Direct(r) => (*r as u16, 0, None),
|
||||
Src::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
|
||||
Src::Indirect(r) => (*r as u16, 2, None),
|
||||
Src::PostInc(r) => (*r as u16, 3, None),
|
||||
Src::BareExpr(e) => (0, 3, Some(*e.clone())),
|
||||
}
|
||||
}
|
||||
/// Returns a tuple of (Reg, AddrMode, Extword)
|
||||
pub fn destination<'t>(dst: &Dst<'t>) -> (u16, u16, Option<Expr<'t>>) {
|
||||
use DstSpecial::*;
|
||||
match dst {
|
||||
Dst::Special(Zero) => (3, 0, None),
|
||||
Dst::Special(One) => (3, 1, None),
|
||||
Dst::Absolute(e) => (2, 1, Some(*e.clone())),
|
||||
Dst::Indexed(e, r) => (*r as u16, 1, Some(*e.clone())),
|
||||
Dst::Direct(r) => (*r as u16, 0, None),
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Assembler<'t> {
|
||||
/// Evaluates an [Expr] using labels and constants defined in the current program
|
||||
fn eval(&self, expr: &Expr) -> AResult<u16> {
|
||||
match expr {
|
||||
Expr::Binary(head, tails) => {
|
||||
let mut head = self.eval(head)?;
|
||||
for (op, tail) in tails {
|
||||
let tail = self.eval(tail)?;
|
||||
head = match op {
|
||||
BinOp::Mul => head.wrapping_mul(tail),
|
||||
BinOp::Div => head.wrapping_div(tail),
|
||||
BinOp::Rem => head.wrapping_rem(tail),
|
||||
BinOp::Add => head.wrapping_add(tail),
|
||||
BinOp::Sub => head.wrapping_sub(tail),
|
||||
BinOp::Lsh => head.wrapping_shl(tail as u32),
|
||||
BinOp::Rsh => head.wrapping_shr(tail as u32),
|
||||
BinOp::And => head & tail,
|
||||
BinOp::Xor => head ^ tail,
|
||||
BinOp::Or => head | tail,
|
||||
};
|
||||
}
|
||||
Ok(head)
|
||||
}
|
||||
Expr::Unary(ops, tail) => {
|
||||
let mut tail = self.eval(tail)?;
|
||||
for op in ops {
|
||||
tail = match op {
|
||||
UnOp::Not => !tail,
|
||||
UnOp::Neg => 0u16.wrapping_sub(tail),
|
||||
UnOp::Deref => *self
|
||||
.output
|
||||
.get(tail.wrapping_sub(self.org_base as u16) as usize >> 1)
|
||||
.ok_or_else(|| self.error(OobRead(tail)))?,
|
||||
}
|
||||
}
|
||||
Ok(tail)
|
||||
}
|
||||
Expr::Group(e) => self.eval(e),
|
||||
Expr::Number(n) => Ok(*n),
|
||||
Expr::Ident(name) => {
|
||||
self.valueof(name).ok_or_else(|| self.error(UndefinedLabel(name.to_string())))
|
||||
}
|
||||
Expr::AddrOf(name) => self
|
||||
.addrof(name)
|
||||
.map(|p| (p << 1).wrapping_add(self.org_base as u16))
|
||||
.ok_or_else(|| self.error(UndefinedLabel(name.to_string()))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod error {
|
||||
use std::fmt::Display;
|
||||
|
||||
use crate::util::Span;
|
||||
|
||||
pub type AResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Error {
|
||||
pub span: Span<usize>,
|
||||
pub kind: ErrorKind,
|
||||
}
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum ErrorKind {
|
||||
#[default]
|
||||
Todo,
|
||||
/// A label was used, but not defined
|
||||
UndefinedLabel(String),
|
||||
RedefinedLabel(String),
|
||||
OobRead(u16),
|
||||
OddJump(i16),
|
||||
LongJump(i16),
|
||||
/// A plethora of [Error]s
|
||||
Errors(Vec<Error>),
|
||||
}
|
||||
impl Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { kind, span } = self;
|
||||
write!(f, "[{span}]: ")?;
|
||||
write!(f, "Error: {kind}")
|
||||
}
|
||||
}
|
||||
impl Display for ErrorKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ErrorKind::Todo => write!(f, "Not yet implemented"),
|
||||
ErrorKind::UndefinedLabel(label) => write!(f, "Label '{label}' not defined"),
|
||||
ErrorKind::RedefinedLabel(label) => write!(f, "Label '{label}' already defined"),
|
||||
ErrorKind::OobRead(addr) => {
|
||||
write!(f, "Out of bounds read in constant expression: {addr}")
|
||||
}
|
||||
ErrorKind::OddJump(to) => write!(f, "Cannot jump to odd location: {to}"),
|
||||
ErrorKind::LongJump(to) => {
|
||||
write!(f, "Jump target ({to}) outside of range -0x400..=0x3fe")
|
||||
}
|
||||
ErrorKind::Errors(errors) => {
|
||||
writeln!(f, "Could not complete assembly:")?;
|
||||
for error in errors {
|
||||
writeln!(f, "{error}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a [JumpTarget]
|
||||
fn visit_jump_target(&mut self, node: &JumpTarget) -> Result<(), AssemblyError> {
|
||||
match node {
|
||||
JumpTarget::Number(num) => self.visit_number(num),
|
||||
JumpTarget::Identifier(id) => {
|
||||
self.visit_identifier(id, self.out.len() - 1, IdentType::Jump)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Visits a [SecondaryOperand]
|
||||
fn visit_secondary_operand(&mut self, node: &SecondaryOperand) -> Result<(), AssemblyError> {
|
||||
use SecondaryOperand as O;
|
||||
if let O::Indexed(_, num) | O::Absolute(num) = node {
|
||||
self.push_default();
|
||||
self.visit_number(num)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a [PrimaryOperand]
|
||||
fn visit_primary_operand(&mut self, node: &PrimaryOperand) -> Result<(), AssemblyError> {
|
||||
use PrimaryOperand as O;
|
||||
match node {
|
||||
O::Indexed(_, num) | O::Absolute(num) | O::Immediate(num) => {
|
||||
self.push_default();
|
||||
self.visit_number(num)?;
|
||||
}
|
||||
O::Relative(id) => {
|
||||
let addr = self.push_default();
|
||||
self.visit_identifier(id, addr, IdentType::Word)?;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a number and writes it into the last index
|
||||
fn visit_number(&mut self, node: &Number) -> Result<(), AssemblyError> {
|
||||
*self.last_mut()? |= u16::from(*node);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits a number and appends it to the output buffer
|
||||
fn visit_string(&mut self, node: &str) -> Result<(), AssemblyError> {
|
||||
for (idx, byte) in node.bytes().chain([0u8].into_iter()).enumerate() {
|
||||
if idx % 2 == 0 {
|
||||
self.push_default();
|
||||
}
|
||||
*self.last_mut()? |= (byte as u16) << (8 * (idx % 2));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Visits an [Identifier], and registers it to the identifier list
|
||||
fn visit_identifier(&mut self, node: &Identifier, addr: usize, ty: IdentType) -> Result<(), AssemblyError> {
|
||||
self.identifiers.push((addr, node.clone(), ty));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
// © 2023 John Breauxs
|
||||
use crate::parser::{error::ParseError, preamble::*};
|
||||
use std::{
|
||||
fmt::Display,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum AssemblyError {
|
||||
UnresolvedIdentifier(Identifier),
|
||||
RedefinedLabel(Identifier),
|
||||
JumpedTooFar(Identifier, isize),
|
||||
ParseError(ParseError),
|
||||
// TODO: This, better'
|
||||
Context(Box<AssemblyError>, PathBuf, usize),
|
||||
EmptyBuffer,
|
||||
}
|
||||
|
||||
impl AssemblyError {
|
||||
pub(super) fn ctx<P: AsRef<Path> + ?Sized>(self, file: &P, line: usize) -> Self {
|
||||
Self::Context(self.into(), file.as_ref().into(), line)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseError> for AssemblyError {
|
||||
fn from(value: ParseError) -> Self { Self::ParseError(value) }
|
||||
}
|
||||
|
||||
impl Display for AssemblyError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::UnresolvedIdentifier(id) => {
|
||||
write!(f, "Identifier {id} is undefined, but referenced anyway.")
|
||||
}
|
||||
Self::RedefinedLabel(id) => {
|
||||
write!(f, "Redefined label '{id}'.")
|
||||
}
|
||||
Self::JumpedTooFar(id, num) => {
|
||||
write!(f, "Label '{id}' is too far away. ({num} is outside range -0x400..=0x3fe)")
|
||||
}
|
||||
Self::ParseError(e) => Display::fmt(e, f),
|
||||
Self::Context(e, file, line) => write!(f, "{}:{line}:\n\t{e}", file.display()),
|
||||
Self::EmptyBuffer => Display::fmt("Tried to get last element of output buffer, but buffer was empty", f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for AssemblyError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::ParseError(e) => Some(e),
|
||||
Self::Context(e, ..) => Some(e),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
49
src/error.rs
49
src/error.rs
@@ -1,49 +0,0 @@
|
||||
// © 2023 John Breauxs
|
||||
//! Common error type for [msp430-asm](crate) errors
|
||||
|
||||
use super::*;
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// Produced by [lexer]
|
||||
LexError(lexer::error::LexError),
|
||||
/// Produced by [parser]
|
||||
ParseError(parser::error::ParseError),
|
||||
/// Produced by [assembler]
|
||||
AssemblyError(assembler::error::AssemblyError),
|
||||
}
|
||||
|
||||
impl Error {}
|
||||
|
||||
impl From<lexer::error::LexError> for Error {
|
||||
fn from(value: lexer::error::LexError) -> Self { Self::LexError(value) }
|
||||
}
|
||||
|
||||
impl From<parser::error::ParseError> for Error {
|
||||
fn from(value: parser::error::ParseError) -> Self { Self::ParseError(value) }
|
||||
}
|
||||
|
||||
impl From<assembler::error::AssemblyError> for Error {
|
||||
fn from(value: assembler::error::AssemblyError) -> Self { Self::AssemblyError(value) }
|
||||
}
|
||||
|
||||
impl Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Error::LexError(e) => Display::fmt(e, f),
|
||||
Error::ParseError(e) => Display::fmt(e, f),
|
||||
Error::AssemblyError(e) => Display::fmt(e, f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Error::LexError(e) => Some(e),
|
||||
Error::ParseError(e) => Some(e),
|
||||
Error::AssemblyError(e) => Some(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
22
src/hash.rs
22
src/hash.rs
@@ -1,22 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! Convenience functions and traits for dealing with hashable data
|
||||
pub type Hash = u64;
|
||||
|
||||
/// Calculates a hash using Rust hashmap's default hasher.
|
||||
pub fn hash<T: std::hash::Hash>(hashable: T) -> Hash {
|
||||
use std::hash::Hasher;
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
hashable.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
pub trait FromHash: From<Hash> {
|
||||
/// Hashes anything that implements [type@Hash] using the
|
||||
/// [DefaultHasher](std::collections::hash_map::DefaultHasher)
|
||||
fn hash<T: std::hash::Hash>(hashable: T) -> Hash { hash(hashable) }
|
||||
fn from_hash<T: std::hash::Hash>(hashable: T) -> Self
|
||||
where Self: Sized {
|
||||
Self::from(Self::hash(hashable))
|
||||
}
|
||||
}
|
||||
impl<T: From<Hash>> FromHash for T {}
|
||||
311
src/lexer.rs
311
src/lexer.rs
@@ -1,69 +1,268 @@
|
||||
// © 2023 John Breaux
|
||||
//! Iterates over [`&str`](str), producing [`Token`s](Token)
|
||||
// © 2023-2024 John Breaux
|
||||
//! The [Lexer] turns a [sequence of characters](str) into a stream of
|
||||
//! [lexically-tagged tokens](token)
|
||||
|
||||
pub mod context;
|
||||
pub mod error;
|
||||
pub mod ignore;
|
||||
pub mod preprocessed;
|
||||
pub mod token;
|
||||
pub mod token_stream;
|
||||
|
||||
use context::Context;
|
||||
use error::LexError;
|
||||
use token::{Token, Type};
|
||||
use token_stream::TokenStream;
|
||||
use self::token::{Special, TokenKind, *};
|
||||
use crate::util::Span;
|
||||
use std::{
|
||||
iter::Peekable,
|
||||
str::{CharIndices, FromStr},
|
||||
};
|
||||
use unicode_ident::*;
|
||||
|
||||
/// Iterates over &[str], producing [Token]s
|
||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Tokenizer<'t> {
|
||||
const DEFAULT_BASE: u32 = 10;
|
||||
|
||||
/// Turns a [sequence of characters](str) into a stream of [lexically identified tokens](token).
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// # use libmsp430::lexer::{Lexer, token::*};
|
||||
/// let text = "mov r14, r15";
|
||||
/// let mut lexer = Lexer::new(text);
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::TwoArg(TwoArg::Mov));
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R14));
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Comma);
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Reg(Reg::R15));
|
||||
/// assert_eq!(lexer.scan().unwrap().kind, TokenKind::Eof);
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Lexer<'t> {
|
||||
/// Keeps track of the byte offset into the string
|
||||
iter: Peekable<CharIndices<'t>>,
|
||||
text: &'t str,
|
||||
idx: usize,
|
||||
context: Context,
|
||||
start: usize,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl<'t> Tokenizer<'t> {
|
||||
/// Produces a new [Tokenizer] from a [str]ing slice
|
||||
pub fn new<T>(text: &'t T) -> Self
|
||||
where T: AsRef<str> + ?Sized {
|
||||
Tokenizer { text: text.as_ref(), idx: 0, context: Default::default() }
|
||||
impl<'t> Lexer<'t> {
|
||||
/// Creates a new [Lexer] over some [text](str)
|
||||
pub fn new(text: &'t str) -> Self {
|
||||
Self { iter: text.char_indices().peekable(), text, start: 0, index: 0 }
|
||||
}
|
||||
|
||||
fn count(&mut self, token: &Token) {
|
||||
// update the context
|
||||
self.context.count(token);
|
||||
// advance the index
|
||||
self.idx += token.len();
|
||||
/// Gets the current byte-position
|
||||
pub fn location(&self) -> usize {
|
||||
self.start
|
||||
}
|
||||
}
|
||||
|
||||
impl<'text> Iterator for Tokenizer<'text> {
|
||||
type Item = Token<'text>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.idx >= self.text.len() {
|
||||
return None;
|
||||
/// Internal: Emits a token with the provided [TokenKind], providing its extents.
|
||||
fn emit(&mut self, kind: TokenKind) -> Option<Token<'t>> {
|
||||
let out =
|
||||
Some(Token::new(self.next_lexeme(), kind, Span { start: self.start, end: self.index }));
|
||||
self.start = self.index;
|
||||
out
|
||||
}
|
||||
fn next_lexeme(&self) -> &'t str {
|
||||
&self.text[self.start..self.index]
|
||||
}
|
||||
fn repeat(&mut self, f: impl Fn(char) -> bool) -> &mut Self {
|
||||
while let Some(&c) = self.peek() {
|
||||
if !f(c) {
|
||||
break;
|
||||
}
|
||||
self.next();
|
||||
}
|
||||
let token = Token::from(&self.text[self.idx..]);
|
||||
// Process [Type::Directive]s
|
||||
// Count the token
|
||||
self.count(&token);
|
||||
Some(token)
|
||||
self
|
||||
}
|
||||
fn space(&mut self) -> Option<&mut Self> {
|
||||
while self.peek()?.is_whitespace() && *self.peek()? != '\n' {
|
||||
self.next();
|
||||
}
|
||||
self.start = self.index;
|
||||
Some(self)
|
||||
}
|
||||
/// Consumes a [char] without checking, for ergonomic chaining
|
||||
fn then(&mut self) -> &mut Self {
|
||||
self.next();
|
||||
self
|
||||
}
|
||||
fn peek(&mut self) -> Option<&char> {
|
||||
self.iter.peek().map(|(_, c)| c)
|
||||
}
|
||||
fn next(&mut self) -> Option<char> {
|
||||
let (index, c) = self.iter.next()?;
|
||||
self.index = index + c.len_utf8();
|
||||
Some(c)
|
||||
}
|
||||
|
||||
/// Scans for the next [Token] in the stream
|
||||
pub fn scan(&mut self) -> Option<Token<'t>> {
|
||||
if self.space().is_none() {
|
||||
return self.emit(TokenKind::Eof);
|
||||
}
|
||||
let Some(c) = self.peek() else {
|
||||
return self.emit(TokenKind::Eof);
|
||||
};
|
||||
match c {
|
||||
'\n' => self.then().emit(TokenKind::Newline),
|
||||
'!' => self.then().emit(TokenKind::Bang),
|
||||
'#' => self.then().emit(TokenKind::Hash),
|
||||
'$' => self.then().emit(TokenKind::Dollar),
|
||||
'%' => self.then().emit(TokenKind::Percent),
|
||||
'&' => self.then().emit(TokenKind::Amp),
|
||||
'\'' => self.then().char(),
|
||||
'"' => self.then().string(),
|
||||
'(' => self.then().emit(TokenKind::OpenParen),
|
||||
')' => self.then().emit(TokenKind::CloseParen),
|
||||
'*' => self.then().emit(TokenKind::Star),
|
||||
'+' => self.then().emit(TokenKind::Plus),
|
||||
',' => self.then().emit(TokenKind::Comma),
|
||||
'-' => self.then().emit(TokenKind::Minus),
|
||||
'.' => self.then().directive_or_bw(),
|
||||
'/' => self.then().comment_or_slash(),
|
||||
'0' => self.then().number_with_base(),
|
||||
':' => self.then().emit(TokenKind::Colon),
|
||||
';' => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
|
||||
'<' => self.then().less(),
|
||||
'>' => self.then().greater(),
|
||||
'@' => self.then().emit(TokenKind::At),
|
||||
'[' => self.then().emit(TokenKind::OpenBrace),
|
||||
']' => self.then().emit(TokenKind::CloseBrace),
|
||||
'^' => self.then().emit(TokenKind::Caret),
|
||||
'_' => self.then().identifier(),
|
||||
'{' => self.then().emit(TokenKind::OpenCurly),
|
||||
'|' => self.then().emit(TokenKind::Bar),
|
||||
'}' => self.then().emit(TokenKind::CloseCurly),
|
||||
c if c.is_numeric() => self.number::<DEFAULT_BASE>(),
|
||||
&c if is_xid_start(c) => self.then().identifier(),
|
||||
c => todo!("Unrecognized character: {c}"),
|
||||
}
|
||||
}
|
||||
fn number_with_base(&mut self) -> Option<Token<'t>> {
|
||||
match self.peek() {
|
||||
Some('x') => self.then().number::<16>(),
|
||||
Some('d') => self.then().number::<10>(),
|
||||
Some('o') => self.then().number::<8>(),
|
||||
Some('b') => self.then().number::<2>(),
|
||||
Some(c) if c.is_ascii_digit() => self.number::<DEFAULT_BASE>(),
|
||||
_ => self.emit(TokenKind::Number(0, 10)),
|
||||
}
|
||||
}
|
||||
fn number<const B: u32>(&mut self) -> Option<Token<'t>> {
|
||||
let mut num = self.digit::<B>()?;
|
||||
while let Some(digit) = self.digit::<B>() {
|
||||
num = num * B + digit;
|
||||
}
|
||||
if num > u16::MAX as u32 {
|
||||
None
|
||||
} else {
|
||||
self.emit(TokenKind::Number(num as u16, B as u8))
|
||||
}
|
||||
}
|
||||
fn digit<const B: u32>(&mut self) -> Option<u32> {
|
||||
let digit = self.peek()?.to_digit(B)?;
|
||||
self.then();
|
||||
Some(digit)
|
||||
}
|
||||
|
||||
fn comment_or_slash(&mut self) -> Option<Token<'t>> {
|
||||
match self.peek() {
|
||||
Some('/') => self.repeat(|c| c != '\n').emit(TokenKind::Comment),
|
||||
_ => self.emit(TokenKind::Slash),
|
||||
}
|
||||
}
|
||||
fn less(&mut self) -> Option<Token<'t>> {
|
||||
match self.peek() {
|
||||
Some('<') => self.then().emit(TokenKind::Lsh),
|
||||
_ => todo!("less"),
|
||||
}
|
||||
}
|
||||
fn greater(&mut self) -> Option<Token<'t>> {
|
||||
match self.peek() {
|
||||
Some('>') => self.then().emit(TokenKind::Lsh),
|
||||
_ => todo!("greater"),
|
||||
}
|
||||
}
|
||||
fn identifier(&mut self) -> Option<Token<'t>> {
|
||||
while let Some(c) = self.then().peek() {
|
||||
if !is_xid_continue(*c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let lexeme = self.next_lexeme();
|
||||
if let Ok(op) = Reg::from_str(lexeme) {
|
||||
self.emit(TokenKind::Reg(op))
|
||||
} else if let Ok(op) = NoEm::from_str(lexeme) {
|
||||
self.emit(TokenKind::NoEm(op))
|
||||
} else if let Ok(op) = OneEm::from_str(lexeme) {
|
||||
self.emit(TokenKind::OneEm(op))
|
||||
} else if let Ok(op) = Special::from_str(lexeme) {
|
||||
self.emit(TokenKind::Special(op))
|
||||
} else if let Ok(op) = OneArg::from_str(lexeme) {
|
||||
self.emit(TokenKind::OneArg(op))
|
||||
} else if let Ok(op) = TwoArg::from_str(lexeme) {
|
||||
self.emit(TokenKind::TwoArg(op))
|
||||
} else if let Ok(op) = Jump::from_str(lexeme) {
|
||||
self.emit(TokenKind::Jump(op))
|
||||
} else {
|
||||
self.emit(TokenKind::Identifier)
|
||||
}
|
||||
}
|
||||
fn directive_or_bw(&mut self) -> Option<Token<'t>> {
|
||||
while let Some(c) = self.then().peek() {
|
||||
if !is_xid_continue(*c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
match self.next_lexeme() {
|
||||
".b" => self.emit(TokenKind::Byte),
|
||||
".w" => self.emit(TokenKind::Word),
|
||||
_ => self.emit(TokenKind::Directive),
|
||||
}
|
||||
}
|
||||
|
||||
/// Todo: Character unescaping in Lexer::string
|
||||
fn string(&mut self) -> Option<Token<'t>> {
|
||||
while '"' != self.next()? {}
|
||||
self.emit(TokenKind::String)
|
||||
}
|
||||
fn char(&mut self) -> Option<Token<'t>> {
|
||||
let out = self.unescape()?;
|
||||
self.next().filter(|c| *c == '\'').and_then(|_| self.emit(TokenKind::Char(out)))
|
||||
}
|
||||
/// Unescape a single character
|
||||
fn unescape(&mut self) -> Option<char> {
|
||||
match self.next() {
|
||||
Some('\\') => (),
|
||||
other => return other,
|
||||
}
|
||||
Some(match self.next()? {
|
||||
'a' => '\x07',
|
||||
'b' => '\x08',
|
||||
'f' => '\x0c',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'x' => self.hex_escape()?,
|
||||
'u' => self.unicode_escape()?,
|
||||
'0' => '\0',
|
||||
chr => chr,
|
||||
})
|
||||
}
|
||||
/// unescape a single 2-digit hex escape
|
||||
fn hex_escape(&mut self) -> Option<char> {
|
||||
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
|
||||
char::from_u32(out) //.ok_or(Error::bad_unicode(out, self.line(), self.col()))
|
||||
}
|
||||
/// unescape a single \u{} unicode escape
|
||||
fn unicode_escape(&mut self) -> Option<char> {
|
||||
let mut out = 0;
|
||||
let Some('{') = self.peek() else {
|
||||
return None; //Err(Error::invalid_escape('u', self.line(), self.col()));
|
||||
};
|
||||
self.then();
|
||||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'}' => {
|
||||
self.then();
|
||||
return char::from_u32(out); //.ok_or(Error::bad_unicode(out, self.line(), self.col()));
|
||||
}
|
||||
_ => out = (out << 4) + self.digit::<16>()?,
|
||||
}
|
||||
}
|
||||
None //Err(Error::invalid_escape('u', self.line(), self.col()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'text> TokenStream<'text> for Tokenizer<'text> {
|
||||
fn context(&self) -> Context { self.context }
|
||||
// Tokenizer has access to the source buffer, and can implement expect and peek without cloning
|
||||
// itself. This can go wrong, of course, if an [Identifier] is expected, since all instructions and
|
||||
// registers are valid identifiers.
|
||||
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
let token = Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))?;
|
||||
self.count(&token);
|
||||
Ok(token)
|
||||
}
|
||||
fn peek(&mut self) -> Self::Item { Token::from(&self.text[self.idx..]) }
|
||||
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
Token::expect(&self.text[self.idx..], expected).map_err(|e| e.context(self.context()))
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [Context] stores contextual information about the current tokenizer state
|
||||
//!
|
||||
//! This data is trivially copyable and can be provided in error messages using the
|
||||
//! [Error::Contextual] specialization)
|
||||
use super::*;
|
||||
/// Stores contextual information about the current tokenizer state, useful for printing errors
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Context {
|
||||
line: usize,
|
||||
position: usize,
|
||||
tokens: usize,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn new() -> Self { Default::default() }
|
||||
pub fn line(&self) -> usize { self.line }
|
||||
pub fn tokens(&self) -> usize { self.tokens }
|
||||
pub fn position(&self) -> usize { self.position }
|
||||
pub(super) fn count(&mut self, t: &Token) {
|
||||
match t.variant() {
|
||||
Type::EndOfFile => return,
|
||||
Type::Endl => {
|
||||
self.line += 1;
|
||||
self.position = 1;
|
||||
}
|
||||
_ => self.position += t.len(),
|
||||
}
|
||||
self.tokens += 1;
|
||||
}
|
||||
}
|
||||
impl Default for Context {
|
||||
fn default() -> Self { Self { line: 1, position: 1, tokens: 0 } }
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Context {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{}", self.line, self.position) }
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
// © 2023 John Breauxs
|
||||
use super::{
|
||||
context::Context,
|
||||
token::{OwnedToken, *},
|
||||
};
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LexError {
|
||||
/// Any other error, tagged with [Context]. Created by [`Error::context()`]
|
||||
Contextual(Context, Box<Self>),
|
||||
/// Produced by [Token] when the input is entirely unexpected.
|
||||
UnexpectedSymbol(String),
|
||||
/// Produced by [`TokenStream::expect`] when the next [Token] isn't the expected [Type]
|
||||
UnexpectedToken { expected: Type, got: OwnedToken },
|
||||
/// Produced by [`TokenStream::expect_any_of`] when the next [Token] isn't any of the
|
||||
/// expected [Types](Type)
|
||||
AllExpectationsFailed { expected: Types, got: OwnedToken },
|
||||
}
|
||||
|
||||
impl LexError {
|
||||
pub fn context(self, c: Context) -> Self {
|
||||
match self {
|
||||
Self::Contextual(..) => self,
|
||||
_ => Self::Contextual(c, Box::new(self)),
|
||||
}
|
||||
}
|
||||
|
||||
// Extracts the root of the error tree
|
||||
pub fn bare(self) -> Self {
|
||||
match self {
|
||||
Self::Contextual(_, bare) => bare.bare(),
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expected<E: AsRef<[Type]>, T: Into<OwnedToken>>(expected: E, got: T) -> Self {
|
||||
match expected.as_ref().len() {
|
||||
1 => Self::UnexpectedToken { expected: expected.as_ref()[0], got: got.into() },
|
||||
_ => Self::AllExpectationsFailed { expected: expected.as_ref().into(), got: got.into() },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mask_expectation(mut self, expected: Type) -> Self {
|
||||
match self {
|
||||
LexError::UnexpectedToken { got, .. } => self = LexError::UnexpectedToken { expected, got },
|
||||
LexError::AllExpectationsFailed { got, .. } => self = LexError::UnexpectedToken { expected, got },
|
||||
LexError::Contextual(context, err) => {
|
||||
self = LexError::Contextual(context, Box::new(err.mask_expectation(expected)))
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for LexError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LexError::Contextual(ctx, error) => write!(f, "{ctx}: {error}"),
|
||||
LexError::UnexpectedSymbol(sym) => write!(f, "Unexpected item in bagging area: \"{sym}\""),
|
||||
LexError::UnexpectedToken { expected, got } => write!(f, "Expected {expected}, got {got}."),
|
||||
LexError::AllExpectationsFailed { expected, got } => write!(f, "Expected {expected}, got {got}."),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LexError {}
|
||||
@@ -1,55 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! Removes a single [kind](Type) of [`Token`] from a [`TokenStream`]
|
||||
use super::*;
|
||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Ignore<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
ignore: Type,
|
||||
inner: &'t mut T,
|
||||
}
|
||||
|
||||
impl<'t, T> Ignore<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
/// Creates a new [Ignore], which ignores the [ignore Type](Type)
|
||||
pub fn new(ignore: Type, t: &'t mut T) -> Self { Ignore { ignore, inner: t } }
|
||||
|
||||
/// Gets a mutable reference to the inner [Iterator]
|
||||
pub fn inner_mut(&mut self) -> &mut T { self.inner }
|
||||
}
|
||||
|
||||
impl<'t, T> Iterator for Ignore<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
type Item = Token<'t>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let next = self.inner.next()?;
|
||||
// Space tokens are greedy, so the next token shouldn't be a Space
|
||||
match next.variant() {
|
||||
Type::Space => self.next(),
|
||||
_ => Some(next),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T> TokenStream<'t> for Ignore<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
fn context(&self) -> Context { self.inner.context() }
|
||||
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
self.inner.allow(self.ignore);
|
||||
self.inner.expect(expected)
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Self::Item {
|
||||
self.inner.allow(self.ignore);
|
||||
self.inner.peek()
|
||||
}
|
||||
|
||||
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
self.inner.allow(self.ignore);
|
||||
self.inner.peek_expect(expected)
|
||||
}
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! Preprocesses a [`TokenStream`], substituting tokens for earlier tokens based on in-band
|
||||
//! ".define" rules
|
||||
use super::*;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
// TODO: Clean this spaghetti mess up
|
||||
|
||||
/// Preprocesses a [TokenStream], substituting tokens for earlier tokens based on in-band ".define"
|
||||
/// rules
|
||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct Preprocessed<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
sub_table: HashMap<Token<'t>, Vec<Token<'t>>>,
|
||||
sub_types: Vec<Type>,
|
||||
queue: VecDeque<Token<'t>>,
|
||||
inner: &'t mut T,
|
||||
}
|
||||
|
||||
impl<'t, T> Iterator for Preprocessed<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
type Item = Token<'t>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.queue.pop_front() {
|
||||
Some(token) => Some(token),
|
||||
None => {
|
||||
let next = self.inner.next()?;
|
||||
if let Some(subs) = self.sub_table.get(&next) {
|
||||
self.queue.extend(subs);
|
||||
return self.next();
|
||||
}
|
||||
Some(next)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T: TokenStream<'t>> Preprocessed<'t, T> {
|
||||
/// Creates a new [Preprocessed] [TokenStream]
|
||||
pub fn new(inner: &'t mut T) -> Self {
|
||||
Self { sub_table: Default::default(), sub_types: Default::default(), queue: Default::default(), inner }
|
||||
}
|
||||
|
||||
/// Gets a mutable reference to the inner [TokenStream]
|
||||
pub fn inner_mut(&mut self) -> &mut T { self.inner }
|
||||
|
||||
/// Preserve the next token in the queue
|
||||
fn enqueue(&mut self, token: Token<'t>) -> Token<'t> {
|
||||
self.queue.push_back(token);
|
||||
token
|
||||
}
|
||||
|
||||
/// Process .define directives in the preprocessor
|
||||
fn define(&mut self, token: Token<'t>) -> Result<(), LexError> {
|
||||
if !(token.is_variant(Type::Directive) && token.lexeme().starts_with(".define")) {
|
||||
return Ok(());
|
||||
}
|
||||
// Tokenize the subdocument
|
||||
self.allow(Type::Directive);
|
||||
self.allow(Type::Space);
|
||||
|
||||
let Some(k) = self.inner.next() else { return Ok(()) };
|
||||
if !self.sub_types.contains(&k.variant()) {
|
||||
self.sub_types.push(k.variant());
|
||||
};
|
||||
|
||||
self.allow(Type::Space);
|
||||
|
||||
let mut replacement = vec![];
|
||||
loop {
|
||||
match self.inner.peek().variant() {
|
||||
Type::Endl | Type::EndOfFile => break,
|
||||
Type::Comment | Type::Space => {
|
||||
// ignore comments
|
||||
self.inner.next();
|
||||
}
|
||||
_ => {
|
||||
let next = self.inner.next().unwrap();
|
||||
replacement.push(self.enqueue(next));
|
||||
}
|
||||
}
|
||||
}
|
||||
self.sub_table.insert(k, replacement);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Does the preprocessing step
|
||||
fn preprocess(&mut self, token: Token<'t>) {
|
||||
if let Some(subs) = self.sub_table.get(&token) {
|
||||
self.queue.extend(subs);
|
||||
self.inner.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T> TokenStream<'t> for Preprocessed<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
fn context(&self) -> Context { self.inner.context() }
|
||||
|
||||
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
match self.queue.front() {
|
||||
Some(&token) if token.is_variant(expected) => Ok(self.queue.pop_front().unwrap_or_default()),
|
||||
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
|
||||
None => {
|
||||
// Only resolve defines when expecting, otherwise you'll run into issues.
|
||||
if let Ok(next) = self.inner.expect(expected) {
|
||||
self.define(next)?;
|
||||
return Ok(next);
|
||||
}
|
||||
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
|
||||
if let Some(subs) = self.sub_table.get(&next) {
|
||||
self.inner.allow_any_of(&self.sub_types);
|
||||
self.queue.extend(subs);
|
||||
}
|
||||
return if self.queue.is_empty() { self.inner.expect(expected) } else { self.expect(expected) };
|
||||
}
|
||||
Err(LexError::expected([expected], self.inner.peek()).context(self.context()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Self::Item {
|
||||
match self.queue.front() {
|
||||
Some(token) => *token,
|
||||
None => {
|
||||
// Only allow substitution when the next token is unexpected
|
||||
let old = self.inner.peek();
|
||||
self.preprocess(old);
|
||||
match self.queue.front() {
|
||||
Some(&new) => new,
|
||||
None => old,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError> {
|
||||
match self.queue.front() {
|
||||
Some(&token) if token.is_variant(expected) => Ok(token),
|
||||
Some(&token) => Err(LexError::expected([expected], token).context(self.context())),
|
||||
None => {
|
||||
if let Ok(next) = self.inner.peek_expect(expected) {
|
||||
return Ok(next);
|
||||
}
|
||||
if let Ok(next) = self.inner.peek_expect_any_of(&self.sub_types) {
|
||||
self.preprocess(next);
|
||||
return if self.queue.is_empty() {
|
||||
self.inner.peek_expect(expected)
|
||||
} else {
|
||||
self.peek_expect(expected)
|
||||
};
|
||||
}
|
||||
Err(LexError::expected([expected], self.inner.peek()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T> std::fmt::Debug for Preprocessed<'t, T>
|
||||
where T: TokenStream<'t>
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Preprocessed")
|
||||
.field("sub_table", &self.sub_table)
|
||||
.field("sub_types", &self.sub_types)
|
||||
.field("queue", &self.queue)
|
||||
.field("context", &self.context())
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
66
src/lexer/tests.rs
Normal file
66
src/lexer/tests.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
use super::*;
|
||||
macro_rules! lex {
|
||||
(type ($t:tt), $expected:expr) => {
|
||||
let token = Lexer::new(stringify!($t)).scan().expect(stringify!($t:tt should yield a valid token));
|
||||
assert_eq!(token.kind, $expected);
|
||||
};
|
||||
({ $($t:tt)* }) => {
|
||||
Lexer::new(stringify!($($t)*))
|
||||
};
|
||||
}
|
||||
#[test]
|
||||
fn ascii_char() {
|
||||
lex!(type ('A'), TokenKind::Char('A')); // 'A' should be a valid char
|
||||
lex!(type ('\x1b'), TokenKind::Char('\x1b')); // '\\x1b' should be a valid char
|
||||
}
|
||||
#[test]
|
||||
fn unicode_escape_char() {
|
||||
lex!(type ('\u{1f988}'), TokenKind::Char('🦈')); // '\\u{1f988}' should be a valid 🦈
|
||||
}
|
||||
#[test]
|
||||
fn number_with_base() {
|
||||
lex!(type (0), TokenKind::Number(0, 10)); // 0 should be a 16-bit base-10 number
|
||||
lex!(type (42069), TokenKind::Number(42069, 10)); // 42069 should be a 16-bit base-10 number
|
||||
lex!(type (0x420), TokenKind::Number(0x420, 16)); // 0x420 should be a 16-bit base-16 number
|
||||
lex!(type (0d100), TokenKind::Number(100, 10)); // 0d100 should be a 16-bit base-10 number
|
||||
lex!(type (0o100), TokenKind::Number(64, 8)); // 0o100 should be a 16-bit base-8 number
|
||||
lex!(type (0b100), TokenKind::Number(4, 2)); // 0b100 should be a 16-bit base-8 number
|
||||
}
|
||||
#[test]
|
||||
fn no_operand_emulated() {
|
||||
lex!(type (nop), TokenKind::NoEm(NoEm::Nop)); // nop should be a valid NoEm
|
||||
lex!(type (ret), TokenKind::NoEm(NoEm::Ret)); // ret should be a valid NoEm
|
||||
lex!(type (clrc), TokenKind::NoEm(NoEm::Clrc)); // clrc should be a valid NoEm
|
||||
lex!(type (clrz), TokenKind::NoEm(NoEm::Clrz)); // clrz should be a valid NoEm
|
||||
lex!(type (clrn), TokenKind::NoEm(NoEm::Clrn)); // clrn should be a valid NoEm
|
||||
lex!(type (setc), TokenKind::NoEm(NoEm::Setc)); // setc should be a valid NoEm
|
||||
lex!(type (setz), TokenKind::NoEm(NoEm::Setz)); // setz should be a valid NoEm
|
||||
lex!(type (setn), TokenKind::NoEm(NoEm::Setn)); // setn should be a valid NoEm
|
||||
lex!(type (dint), TokenKind::NoEm(NoEm::Dint)); // dint should be a valid NoEm
|
||||
lex!(type (eint), TokenKind::NoEm(NoEm::Eint)); // eint should be a valid NoEm
|
||||
}
|
||||
#[test]
|
||||
fn registers() {
|
||||
lex!(type(pc), TokenKind::Reg(Reg::PC));
|
||||
lex!(type(sp), TokenKind::Reg(Reg::SP));
|
||||
lex!(type(sr), TokenKind::Reg(Reg::SR));
|
||||
lex!(type(cg), TokenKind::Reg(Reg::CG));
|
||||
lex!(type(r0), TokenKind::Reg(Reg::PC));
|
||||
lex!(type(r1), TokenKind::Reg(Reg::SP));
|
||||
lex!(type(r2), TokenKind::Reg(Reg::SR));
|
||||
lex!(type(r3), TokenKind::Reg(Reg::CG));
|
||||
lex!(type(r4), TokenKind::Reg(Reg::R4));
|
||||
lex!(type(r5), TokenKind::Reg(Reg::R5));
|
||||
lex!(type(r6), TokenKind::Reg(Reg::R6));
|
||||
lex!(type(r7), TokenKind::Reg(Reg::R7));
|
||||
lex!(type(r8), TokenKind::Reg(Reg::R8));
|
||||
lex!(type(r9), TokenKind::Reg(Reg::R9));
|
||||
lex!(type(r10), TokenKind::Reg(Reg::R10));
|
||||
lex!(type(r11), TokenKind::Reg(Reg::R11));
|
||||
lex!(type(r12), TokenKind::Reg(Reg::R12));
|
||||
lex!(type(r13), TokenKind::Reg(Reg::R13));
|
||||
lex!(type(r14), TokenKind::Reg(Reg::R14));
|
||||
lex!(type(r15), TokenKind::Reg(Reg::R15));
|
||||
}
|
||||
|
||||
// TODO: opcode tests, misc. special character tests, etc.
|
||||
@@ -1,335 +1,479 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [Token] is a [semantically tagged](Type) sequence of characters.
|
||||
// © 2023-2024 John Breaux
|
||||
//! A [Token] is a [semantically-tagged](TokenKind) [sequence of characters](str) and a [Span]
|
||||
//!
|
||||
//! Token, and the tokenizer, intend to copy as little as possible.
|
||||
|
||||
use super::error::LexError;
|
||||
use regex::Regex;
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
sync::OnceLock,
|
||||
};
|
||||
|
||||
/// Implements regex matching functions on [`Token`] for each [`Type`],
|
||||
/// and implements [`From<&str>`] for [`Token`]
|
||||
macro_rules! regex_impl {
|
||||
(<$t:lifetime> $type:ty {$(
|
||||
$(#[$meta:meta])*
|
||||
pub fn $func:ident (text: &str) -> Option<Self> {
|
||||
regex!($out:path = $re:literal)
|
||||
//! [Tokens](Token) are a borrowed, and cannot outlive their source slice (lifetime `'t`)
|
||||
use crate::util::Span;
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Token<'t> {
|
||||
pub lexeme: &'t str,
|
||||
pub kind: TokenKind,
|
||||
pub pos: Span<usize>,
|
||||
}
|
||||
impl<'t> Token<'t> {
|
||||
pub fn new(lexeme: &'t str, kind: TokenKind, pos: Span<usize>) -> Self {
|
||||
Self { lexeme, kind, pos }
|
||||
}
|
||||
)*}) => {
|
||||
impl<$t> $type {
|
||||
/// Lexes a token only for the expected `variant`
|
||||
///
|
||||
/// Warning: This bypasses precedence rules. Only use for specific patterns.
|
||||
pub fn expect(text: &$t str, expected: Type) -> Result<Self, LexError> {
|
||||
match expected {$(
|
||||
$out => Self::$func(text),
|
||||
)*}.ok_or(LexError::UnexpectedToken {
|
||||
expected,
|
||||
got: Self::from(text).into(),
|
||||
})
|
||||
}
|
||||
$(
|
||||
$(#[$meta])*
|
||||
/// Tries to read [`
|
||||
#[doc = stringify!($out)]
|
||||
/// `] from `text`
|
||||
pub fn $func(text: &$t str) -> Option<Self> {
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
let lexeme = RE.get_or_init(|| Regex::new($re).unwrap())
|
||||
.find(text)?.into();
|
||||
Some(Self { variant: $out, lexeme })
|
||||
})*
|
||||
}
|
||||
impl<$t> From<&$t str> for $type {
|
||||
fn from (value: &$t str) -> Self {
|
||||
$(
|
||||
if let Some(token) = Self::$func(value) {
|
||||
token
|
||||
} else
|
||||
)*
|
||||
{todo!("Unexpected input: {value:#?} (Tokenization failure)")}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// A [Token] is a [semantically tagged](Type) sequence of characters
|
||||
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Token<'text> {
|
||||
/// The type of this token
|
||||
variant: Type,
|
||||
/// The sub[str]ing corresponding to this token
|
||||
lexeme: &'text str,
|
||||
}
|
||||
|
||||
impl<'text> Token<'text> {
|
||||
/// Returns the [Type] of this [Token]
|
||||
pub fn variant(&self) -> Type { self.variant }
|
||||
|
||||
/// Returns the lexeme (originating string slice) of this token
|
||||
pub fn lexeme(&self) -> &'text str { self.lexeme }
|
||||
|
||||
/// Parses this [Token] into another type
|
||||
pub fn parse<F>(&self) -> Result<F, <F as std::str::FromStr>::Err>
|
||||
where F: std::str::FromStr {
|
||||
self.lexeme.parse()
|
||||
}
|
||||
/// Returns whether the Lexeme is the expected [Type]
|
||||
pub fn is_variant(&self, expected: Type) -> bool { self.variant == expected }
|
||||
|
||||
/// Returns the length of [Self::lexeme] in bytes.
|
||||
pub fn len(&self) -> usize { self.lexeme.len() }
|
||||
|
||||
/// Returns `true` if [Self::lexeme] has a length of zero bytes.
|
||||
pub fn is_empty(&self) -> bool { self.lexeme.is_empty() }
|
||||
}
|
||||
|
||||
impl<'text> Debug for Token<'text> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_list().entry(&self.variant).entry(&self.lexeme).finish()
|
||||
pub fn kind(&self) -> TokenKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
impl<'text> Display for Token<'text> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self.variant {
|
||||
Type::Endl | Type::EndOfFile => Display::fmt(&self.variant, f),
|
||||
v => write!(f, "{v} \"{}\"", self.lexeme),
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum TokenKind {
|
||||
Eof,
|
||||
Newline, // \n
|
||||
OpenParen, // (
|
||||
CloseParen, // )
|
||||
OpenCurly, // {
|
||||
CloseCurly, // }
|
||||
OpenBrace, // [
|
||||
CloseBrace, // ]
|
||||
Comma, // ,
|
||||
Colon, // :
|
||||
Bang, // !
|
||||
At, // @
|
||||
Amp, // &
|
||||
Bar, // |
|
||||
Caret, // ^
|
||||
Star, // *
|
||||
Hash, // #
|
||||
Dollar, // $
|
||||
Percent, // %
|
||||
Plus, // +
|
||||
Minus, // -
|
||||
Slash, // /
|
||||
Lsh, // <<
|
||||
Rsh, // >>
|
||||
|
||||
Comment, // (';' | '//') .* '\n' |
|
||||
Directive, // '.' XID_CONTINUE*
|
||||
Identifier, // XID_START XID_CONTINUE*
|
||||
Number(u16, u8), // varies depending on base
|
||||
Char(char), // '\'' ('\' Escape | .) '\''
|
||||
String, // '"' .* '"'
|
||||
Reg(Reg),
|
||||
NoEm(NoEm),
|
||||
OneEm(OneEm),
|
||||
Special(Special),
|
||||
OneArg(OneArg),
|
||||
TwoArg(TwoArg),
|
||||
Jump(Jump),
|
||||
|
||||
Byte, // .b
|
||||
Word, // .w
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Reg {
|
||||
PC,
|
||||
SP,
|
||||
SR,
|
||||
CG,
|
||||
R4,
|
||||
R5,
|
||||
R6,
|
||||
R7,
|
||||
R8,
|
||||
R9,
|
||||
R10,
|
||||
R11,
|
||||
R12,
|
||||
R13,
|
||||
R14,
|
||||
R15,
|
||||
}
|
||||
/// Fake instructions of the form `opcode`
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum NoEm {
|
||||
Nop,
|
||||
Ret,
|
||||
Clrc,
|
||||
Clrz,
|
||||
Clrn,
|
||||
Setc,
|
||||
Setz,
|
||||
Setn,
|
||||
Dint,
|
||||
Eint,
|
||||
}
|
||||
/// Fake instructions of the form `opcode dst`
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum OneEm {
|
||||
Pop,
|
||||
Rla,
|
||||
Rlc,
|
||||
Inv,
|
||||
Clr,
|
||||
Tst,
|
||||
Dec,
|
||||
Decd,
|
||||
Inc,
|
||||
Incd,
|
||||
Adc,
|
||||
Dadc,
|
||||
Sbc,
|
||||
}
|
||||
/// These opcodes have bespoke grammatical rules
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Special {
|
||||
/// Br = "br" Src
|
||||
Br,
|
||||
}
|
||||
/// Real instructions of the form `opcode src`
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum OneArg {
|
||||
Rrc,
|
||||
Swpb,
|
||||
Rra,
|
||||
Sxt,
|
||||
Push,
|
||||
Call,
|
||||
Reti,
|
||||
}
|
||||
/// Real instructions of the form `opcode src, dst`
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum TwoArg {
|
||||
Mov,
|
||||
Add,
|
||||
Addc,
|
||||
Subc,
|
||||
Sub,
|
||||
Cmp,
|
||||
Dadd,
|
||||
Bit,
|
||||
Bic,
|
||||
Bis,
|
||||
Xor,
|
||||
And,
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Jump {
|
||||
Jne,
|
||||
Jnz,
|
||||
Jeq,
|
||||
Jz,
|
||||
Jnc,
|
||||
Jlo,
|
||||
Jc,
|
||||
Jhs,
|
||||
Jn,
|
||||
Jge,
|
||||
Jl,
|
||||
Jmp,
|
||||
}
|
||||
mod convert {
|
||||
//! Implementations of [FromStr] for [token](super) types.
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
|
||||
impl FromStr for Reg {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"pc" => Reg::PC,
|
||||
"sp" => Reg::SP,
|
||||
"sr" => Reg::SR,
|
||||
"cg" => Reg::CG,
|
||||
"r0" => Reg::PC,
|
||||
"r1" => Reg::SP,
|
||||
"r2" => Reg::SR,
|
||||
"r3" => Reg::CG,
|
||||
"r4" => Reg::R4,
|
||||
"r5" => Reg::R5,
|
||||
"r6" => Reg::R6,
|
||||
"r7" => Reg::R7,
|
||||
"r8" => Reg::R8,
|
||||
"r9" => Reg::R9,
|
||||
"r10" => Reg::R10,
|
||||
"r11" => Reg::R11,
|
||||
"r12" => Reg::R12,
|
||||
"r13" => Reg::R13,
|
||||
"r14" => Reg::R14,
|
||||
"r15" => Reg::R15,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FromStr for NoEm {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"nop" => NoEm::Nop,
|
||||
"ret" => NoEm::Ret,
|
||||
"clrc" => NoEm::Clrc,
|
||||
"clrz" => NoEm::Clrz,
|
||||
"clrn" => NoEm::Clrn,
|
||||
"setc" => NoEm::Setc,
|
||||
"setz" => NoEm::Setz,
|
||||
"setn" => NoEm::Setn,
|
||||
"dint" => NoEm::Dint,
|
||||
"eint" => NoEm::Eint,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FromStr for OneEm {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"pop" => OneEm::Pop,
|
||||
"rla" => OneEm::Rla,
|
||||
"rlc" => OneEm::Rlc,
|
||||
"inv" => OneEm::Inv,
|
||||
"clr" => OneEm::Clr,
|
||||
"tst" => OneEm::Tst,
|
||||
"dec" => OneEm::Dec,
|
||||
"decd" => OneEm::Decd,
|
||||
"inc" => OneEm::Inc,
|
||||
"incd" => OneEm::Incd,
|
||||
"adc" => OneEm::Adc,
|
||||
"dadc" => OneEm::Dadc,
|
||||
"sbc" => OneEm::Sbc,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FromStr for Special {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"br" => Special::Br,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FromStr for OneArg {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"rrc" => OneArg::Rrc,
|
||||
"swpb" => OneArg::Swpb,
|
||||
"rra" => OneArg::Rra,
|
||||
"sxt" => OneArg::Sxt,
|
||||
"push" => OneArg::Push,
|
||||
"call" => OneArg::Call,
|
||||
"reti" => OneArg::Reti,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FromStr for TwoArg {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"mov" => TwoArg::Mov,
|
||||
"add" => TwoArg::Add,
|
||||
"addc" => TwoArg::Addc,
|
||||
"subc" => TwoArg::Subc,
|
||||
"sub" => TwoArg::Sub,
|
||||
"cmp" => TwoArg::Cmp,
|
||||
"dadd" => TwoArg::Dadd,
|
||||
"bit" => TwoArg::Bit,
|
||||
"bic" => TwoArg::Bic,
|
||||
"bis" => TwoArg::Bis,
|
||||
"xor" => TwoArg::Xor,
|
||||
"and" => TwoArg::And,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FromStr for Jump {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"jne" => Jump::Jne,
|
||||
"jnz" => Jump::Jnz,
|
||||
"jeq" => Jump::Jeq,
|
||||
"jz" => Jump::Jz,
|
||||
"jnc" => Jump::Jnc,
|
||||
"jlo" => Jump::Jlo,
|
||||
"jc" => Jump::Jc,
|
||||
"jhs" => Jump::Jhs,
|
||||
"jn" => Jump::Jn,
|
||||
"jge" => Jump::Jge,
|
||||
"jl" => Jump::Jl,
|
||||
"jmp" => Jump::Jmp,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A [token Type](Type) is a semantic tag for a sequence of characters
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Type {
|
||||
/// contiguous whitespace, excluding newline
|
||||
Space,
|
||||
/// newline and contiguous whitespace
|
||||
Endl,
|
||||
/// A line-comment
|
||||
Comment,
|
||||
/// Jump label *definition*
|
||||
Label,
|
||||
/// Instructions
|
||||
Insn,
|
||||
/// Operand width is byte
|
||||
ByteWidth,
|
||||
/// Operand width is word
|
||||
WordWidth,
|
||||
/// Register mnemonic (i.e. `pc`, `r14`)
|
||||
Register,
|
||||
/// Marker for base-10
|
||||
RadixMarkerDec,
|
||||
/// Marker for base-16
|
||||
RadixMarkerHex,
|
||||
/// Marker for base-8
|
||||
RadixMarkerOct,
|
||||
/// Marker for base-2
|
||||
RadixMarkerBin,
|
||||
/// 1-4 hexadigit numbers only
|
||||
Number,
|
||||
/// Negative number marker
|
||||
Minus,
|
||||
/// post-increment mode marker
|
||||
Plus,
|
||||
/// Open-Indexed-Mode marker
|
||||
LParen,
|
||||
/// Close-Indexed-Mode marker
|
||||
RParen,
|
||||
/// Open Square Bracket
|
||||
LBracket,
|
||||
/// Closed Square Bracket
|
||||
RBracket,
|
||||
/// Indirect mode marker
|
||||
Indirect,
|
||||
/// absolute address marker
|
||||
Absolute,
|
||||
/// immediate value marker
|
||||
Immediate,
|
||||
/// Valid identifier. Identifiers must start with a Latin alphabetic character or underline
|
||||
Identifier,
|
||||
/// A string, encased in "quotes"
|
||||
String,
|
||||
/// Assembler directive
|
||||
Directive,
|
||||
/// Separator (comma)
|
||||
Separator,
|
||||
/// End of File marker
|
||||
#[default]
|
||||
EndOfFile,
|
||||
/// Invalid token
|
||||
Invalid,
|
||||
}
|
||||
|
||||
regex_impl! {<'text> Token<'text> {
|
||||
pub fn expect_space(text: &str) -> Option<Self> {
|
||||
regex!(Type::Space = r"^[\s--\n]+")
|
||||
}
|
||||
pub fn expect_endl(text: &str) -> Option<Self> {
|
||||
regex!(Type::Endl = r"^\n[\s--\n]*")
|
||||
}
|
||||
pub fn expect_comment(text: &str) -> Option<Self> {
|
||||
regex!(Type::Comment = r"^(;|//|<.*>|\{.*\}).*")
|
||||
}
|
||||
pub fn expect_label(text: &str) -> Option<Self> {
|
||||
regex!(Type::Label = r"^:")
|
||||
}
|
||||
pub fn expect_insn(text: &str) -> Option<Self> {
|
||||
regex!(Type::Insn = r"(?i)^(adc|addc?|and|bi[cs]|bitb?|br|call|clr[cnz]?|cmp|dad[cd]|decd?|[de]int|incd?|inv|j([cz]|eq|ge|hs|lo?|mp|n[cez]?)|mov|[np]op|push|reti?|r[lr][ac]|sbc|set[cnz]|subc?|swpb|sxt|tst|xor)(?-u:\b)")
|
||||
}
|
||||
pub fn expect_byte_width(text: &str) -> Option<Self> {
|
||||
regex!(Type::ByteWidth = r"(?i)^\.b")
|
||||
}
|
||||
pub fn expect_word_width(text: &str) -> Option<Self> {
|
||||
regex!(Type::WordWidth = r"(?i)^\.w")
|
||||
}
|
||||
pub fn expect_register(text: &str) -> Option<Self> {
|
||||
// old regex regex!(Type::Register = r"(?i)^(r(1[0-5]|[0-9])|pc|s[pr]|cg)")
|
||||
regex!(Type::Register = r"(?i)^(r\d+|pc|s[pr]|cg)(?-u:\b)")
|
||||
}
|
||||
pub fn expect_radix_marker_dec(text: &str) -> Option<Self> {
|
||||
regex!(Type::RadixMarkerDec = r"(?i)^0d")
|
||||
}
|
||||
pub fn expect_radix_marker_hex(text: &str) -> Option<Self> {
|
||||
regex!(Type::RadixMarkerHex = r"(?i)^(0x|\$)")
|
||||
}
|
||||
pub fn expect_radix_marker_oct(text: &str) -> Option<Self> {
|
||||
regex!(Type::RadixMarkerOct = r"(?i)^0o")
|
||||
}
|
||||
pub fn expect_radix_marker_bin(text: &str) -> Option<Self> {
|
||||
regex!(Type::RadixMarkerBin = r"(?i)^0b")
|
||||
}
|
||||
pub fn expect_number(text: &str) -> Option<Self> {
|
||||
regex!(Type::Number = r"^+?[[:xdigit:]]+(?-u:\b)")
|
||||
}
|
||||
pub fn expect_minus(text: &str) -> Option<Self> {
|
||||
regex!(Type::Minus = r"^-")
|
||||
}
|
||||
pub fn expect_plus(text: &str) -> Option<Self> {
|
||||
regex!(Type::Plus = r"^\+")
|
||||
}
|
||||
pub fn expect_l_paren(text: &str) -> Option<Self> {
|
||||
regex!(Type::LParen = r"^\(")
|
||||
}
|
||||
pub fn expect_r_paren(text: &str) -> Option<Self> {
|
||||
regex!(Type::RParen = r"^\)")
|
||||
}
|
||||
pub fn expect_l_bracket(text: &str) -> Option<Self> {
|
||||
regex!(Type::LBracket = r"^\[")
|
||||
}
|
||||
pub fn expect_r_bracket(text: &str) -> Option<Self> {
|
||||
regex!(Type::RBracket = r"^]")
|
||||
}
|
||||
pub fn expect_indrect(text: &str) -> Option<Self> {
|
||||
regex!(Type::Indirect = r"^@")
|
||||
}
|
||||
pub fn expect_absolute(text: &str) -> Option<Self> {
|
||||
regex!(Type::Absolute = r"^&")
|
||||
}
|
||||
pub fn expect_immediate(text: &str) -> Option<Self> {
|
||||
regex!(Type::Immediate = r"^#")
|
||||
}
|
||||
pub fn expect_string(text: &str) -> Option<Self> {
|
||||
regex!(Type::String = r#"^"[^"]*""#)
|
||||
}
|
||||
pub fn expect_directive(text: &str) -> Option<Self> {
|
||||
regex!(Type::Directive = r"^\.\w+")
|
||||
}
|
||||
pub fn expect_identifier(text: &str) -> Option<Self> {
|
||||
regex!(Type::Identifier = r"^[A-Za-z_]\w*")
|
||||
}
|
||||
pub fn expect_separator(text: &str) -> Option<Self> {
|
||||
regex!(Type::Separator = r"^,")
|
||||
}
|
||||
pub fn expect_end_of_file(text: &str) -> Option<Self> {
|
||||
regex!(Type::EndOfFile = r"^$")
|
||||
}
|
||||
pub fn expect_anything(text: &str) -> Option<Self> {
|
||||
regex!(Type::Invalid = r"^.*")
|
||||
}
|
||||
}}
|
||||
|
||||
impl Display for Type {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Space => Display::fmt("space", f),
|
||||
Self::Endl => Display::fmt("newline", f),
|
||||
Self::Comment => Display::fmt("comment", f),
|
||||
Self::Label => Display::fmt("label definition", f),
|
||||
Self::Insn => Display::fmt("opcode", f),
|
||||
Self::ByteWidth => Display::fmt("byte-width", f),
|
||||
Self::WordWidth => Display::fmt("word-width", f),
|
||||
Self::Register => Display::fmt("register", f),
|
||||
Self::RadixMarkerDec => Display::fmt("decimal marker", f),
|
||||
Self::RadixMarkerHex => Display::fmt("hexadecimal marker", f),
|
||||
Self::RadixMarkerOct => Display::fmt("octal marker", f),
|
||||
Self::RadixMarkerBin => Display::fmt("binary marker", f),
|
||||
Self::Number => Display::fmt("number", f),
|
||||
Self::Minus => Display::fmt("minus sign", f),
|
||||
Self::Plus => Display::fmt("plus sign", f),
|
||||
Self::LParen => Display::fmt("left parenthesis", f),
|
||||
Self::RParen => Display::fmt("right parenthesis", f),
|
||||
Self::LBracket => Display::fmt("left bracket", f),
|
||||
Self::RBracket => Display::fmt("right bracket", f),
|
||||
Self::Indirect => Display::fmt("indirect", f),
|
||||
Self::Absolute => Display::fmt("absolute", f),
|
||||
Self::Immediate => Display::fmt("immediate", f),
|
||||
Self::Identifier => Display::fmt("identifier", f),
|
||||
Self::String => Display::fmt("string", f),
|
||||
Self::Directive => Display::fmt("directive", f),
|
||||
Self::Separator => Display::fmt("comma", f),
|
||||
Self::EndOfFile => Display::fmt("EOF", f),
|
||||
Self::Invalid => Display::fmt("invalid token", f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A [Token] which can outlive its parent buffer
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct OwnedToken {
|
||||
/// The type of this token
|
||||
variant: Type,
|
||||
/// The sub[String] corresponding to this token
|
||||
lexeme: String,
|
||||
}
|
||||
|
||||
impl Display for OwnedToken {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", Token::from(self)) }
|
||||
}
|
||||
|
||||
impl<'t> From<&'t OwnedToken> for Token<'t> {
|
||||
fn from(value: &'t OwnedToken) -> Self { Token { variant: value.variant, lexeme: &value.lexeme } }
|
||||
}
|
||||
|
||||
impl From<Token<'_>> for OwnedToken {
|
||||
fn from(value: Token<'_>) -> Self {
|
||||
let Token { variant, lexeme } = value;
|
||||
OwnedToken { variant, lexeme: lexeme.to_owned() }
|
||||
}
|
||||
}
|
||||
|
||||
/// [Types] are an owned array of [types](Type), with a custom [Display] implementation
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Types(Vec<Type>);
|
||||
|
||||
impl<T: AsRef<[Type]>> From<T> for Types {
|
||||
// TODO: Possibly bad. Check out in rust playground.
|
||||
fn from(value: T) -> Self { Self(value.as_ref().to_owned()) }
|
||||
}
|
||||
|
||||
impl Display for Types {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for (idx, t) in self.0.iter().enumerate() {
|
||||
Display::fmt(t, f)?;
|
||||
match idx {
|
||||
i if i < self.0.len() - 2 => Display::fmt(", ", f)?,
|
||||
i if i < self.0.len() - 1 => Display::fmt(" or ", f)?,
|
||||
_ => (),
|
||||
mod display {
|
||||
//! Implementations of [Display] for [token](super) types.
|
||||
use super::*;
|
||||
use std::fmt::Display;
|
||||
impl<'t> Display for Token<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { lexeme, kind, pos: _ } = self;
|
||||
match kind {
|
||||
TokenKind::Comment
|
||||
| TokenKind::Directive
|
||||
| TokenKind::Identifier
|
||||
| TokenKind::String => {
|
||||
write!(f, "{}", lexeme)
|
||||
}
|
||||
ty => ty.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for TokenKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TokenKind::Eof => write!(f, "[EOF]"),
|
||||
TokenKind::Newline => writeln!(f),
|
||||
TokenKind::OpenParen => write!(f, "("),
|
||||
TokenKind::CloseParen => write!(f, ")"),
|
||||
TokenKind::OpenCurly => write!(f, "{{"),
|
||||
TokenKind::CloseCurly => write!(f, "}}"),
|
||||
TokenKind::OpenBrace => write!(f, "["),
|
||||
TokenKind::CloseBrace => write!(f, "]"),
|
||||
TokenKind::Comma => write!(f, ","),
|
||||
TokenKind::Colon => write!(f, ":"),
|
||||
TokenKind::Bang => write!(f, "!"),
|
||||
TokenKind::At => write!(f, "@"),
|
||||
TokenKind::Amp => write!(f, "&"),
|
||||
TokenKind::Bar => write!(f, "|"),
|
||||
TokenKind::Caret => write!(f, "^"),
|
||||
TokenKind::Star => write!(f, "*"),
|
||||
TokenKind::Hash => write!(f, "#"),
|
||||
TokenKind::Dollar => write!(f, "$"),
|
||||
TokenKind::Percent => write!(f, "%"),
|
||||
TokenKind::Plus => write!(f, "+"),
|
||||
TokenKind::Minus => write!(f, "-"),
|
||||
TokenKind::Slash => write!(f, "/"),
|
||||
TokenKind::Lsh => write!(f, "<<"),
|
||||
TokenKind::Rsh => write!(f, ">>"),
|
||||
TokenKind::Comment => write!(f, "; "),
|
||||
TokenKind::Directive => write!(f, "."),
|
||||
TokenKind::Identifier => write!(f, "Identifier"),
|
||||
TokenKind::Number(val, 2) => write!(f, "0b{val:b}"),
|
||||
TokenKind::Number(val, 8) => write!(f, "0o{val:o}"),
|
||||
TokenKind::Number(val, 16) => write!(f, "0x{val:x}"),
|
||||
TokenKind::Number(val, _) => write!(f, "{val}"),
|
||||
TokenKind::Char(c) => write!(f, "'{c}'"),
|
||||
TokenKind::String => write!(f, "\"String\""),
|
||||
TokenKind::Reg(kw) => write!(f, "{kw}"),
|
||||
TokenKind::NoEm(kw) => write!(f, "{kw}"),
|
||||
TokenKind::OneEm(kw) => write!(f, "{kw}"),
|
||||
TokenKind::Special(kw) => write!(f, "{kw}"),
|
||||
TokenKind::OneArg(kw) => write!(f, "{kw}"),
|
||||
TokenKind::TwoArg(kw) => write!(f, "{kw}"),
|
||||
TokenKind::Jump(kw) => write!(f, "{kw}"),
|
||||
TokenKind::Byte => write!(f, ".b"),
|
||||
TokenKind::Word => write!(f, ".w"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Reg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Reg::PC => "pc".fmt(f),
|
||||
Reg::SP => "sp".fmt(f),
|
||||
Reg::SR => "sr".fmt(f),
|
||||
Reg::CG => "cg".fmt(f),
|
||||
Reg::R4 => "r4".fmt(f),
|
||||
Reg::R5 => "r5".fmt(f),
|
||||
Reg::R6 => "r6".fmt(f),
|
||||
Reg::R7 => "r7".fmt(f),
|
||||
Reg::R8 => "r8".fmt(f),
|
||||
Reg::R9 => "r9".fmt(f),
|
||||
Reg::R10 => "r10".fmt(f),
|
||||
Reg::R11 => "r11".fmt(f),
|
||||
Reg::R12 => "r12".fmt(f),
|
||||
Reg::R13 => "r13".fmt(f),
|
||||
Reg::R14 => "r14".fmt(f),
|
||||
Reg::R15 => "r15".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for NoEm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
NoEm::Nop => "nop".fmt(f),
|
||||
NoEm::Ret => "ret".fmt(f),
|
||||
NoEm::Clrc => "clrc".fmt(f),
|
||||
NoEm::Clrz => "clrz".fmt(f),
|
||||
NoEm::Clrn => "clrn".fmt(f),
|
||||
NoEm::Setc => "setc".fmt(f),
|
||||
NoEm::Setz => "setz".fmt(f),
|
||||
NoEm::Setn => "setn".fmt(f),
|
||||
NoEm::Dint => "dint".fmt(f),
|
||||
NoEm::Eint => "eint".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for OneEm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
OneEm::Pop => "pop".fmt(f),
|
||||
OneEm::Rla => "rla".fmt(f),
|
||||
OneEm::Rlc => "rlc".fmt(f),
|
||||
OneEm::Inv => "inv".fmt(f),
|
||||
OneEm::Clr => "clr".fmt(f),
|
||||
OneEm::Tst => "tst".fmt(f),
|
||||
OneEm::Dec => "dec".fmt(f),
|
||||
OneEm::Decd => "decd".fmt(f),
|
||||
OneEm::Inc => "inc".fmt(f),
|
||||
OneEm::Incd => "incd".fmt(f),
|
||||
OneEm::Adc => "adc".fmt(f),
|
||||
OneEm::Dadc => "dadc".fmt(f),
|
||||
OneEm::Sbc => "sbc".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Special {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Special::Br => "br".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for OneArg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
OneArg::Rrc => "rrc".fmt(f),
|
||||
OneArg::Swpb => "swpb".fmt(f),
|
||||
OneArg::Rra => "rra".fmt(f),
|
||||
OneArg::Sxt => "sxt".fmt(f),
|
||||
OneArg::Push => "push".fmt(f),
|
||||
OneArg::Call => "call".fmt(f),
|
||||
OneArg::Reti => "reti".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for TwoArg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TwoArg::Mov => "mov".fmt(f),
|
||||
TwoArg::Add => "add".fmt(f),
|
||||
TwoArg::Addc => "addc".fmt(f),
|
||||
TwoArg::Subc => "subc".fmt(f),
|
||||
TwoArg::Sub => "sub".fmt(f),
|
||||
TwoArg::Cmp => "cmp".fmt(f),
|
||||
TwoArg::Dadd => "dadd".fmt(f),
|
||||
TwoArg::Bit => "bit".fmt(f),
|
||||
TwoArg::Bic => "bic".fmt(f),
|
||||
TwoArg::Bis => "bis".fmt(f),
|
||||
TwoArg::Xor => "xor".fmt(f),
|
||||
TwoArg::And => "and".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Jump {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Jump::Jne => "jne".fmt(f),
|
||||
Jump::Jnz => "jnz".fmt(f),
|
||||
Jump::Jeq => "jeq".fmt(f),
|
||||
Jump::Jz => "jz".fmt(f),
|
||||
Jump::Jnc => "jnc".fmt(f),
|
||||
Jump::Jlo => "jlo".fmt(f),
|
||||
Jump::Jc => "jc".fmt(f),
|
||||
Jump::Jhs => "jhs".fmt(f),
|
||||
Jump::Jn => "jn".fmt(f),
|
||||
Jump::Jge => "jge".fmt(f),
|
||||
Jump::Jl => "jl".fmt(f),
|
||||
Jump::Jmp => "jmp".fmt(f),
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
|
||||
use super::*;
|
||||
|
||||
use super::ignore::Ignore;
|
||||
use super::preprocessed::Preprocessed;
|
||||
|
||||
/// A TokenStream is a specialized [Iterator] which produces [Tokens](Token)
|
||||
pub trait TokenStream<'text>: Iterator<Item = Token<'text>> + std::fmt::Debug {
|
||||
/// Gets this stream's [Context]
|
||||
fn context(&self) -> Context;
|
||||
|
||||
/// Creates an iterator that skips [Type::Space] in the input
|
||||
#[inline]
|
||||
fn ignore(&'text mut self, variant: Type) -> Ignore<'text, Self>
|
||||
where Self: Sized {
|
||||
Ignore::new(variant, self)
|
||||
}
|
||||
|
||||
/// Creates a [TokenStream] that performs live substitution of the input
|
||||
#[inline]
|
||||
fn preprocessed(&'text mut self) -> Preprocessed<'text, Self>
|
||||
where Self: Sized {
|
||||
Preprocessed::new(self)
|
||||
}
|
||||
|
||||
/// Returns the next [Token] without advancing
|
||||
fn peek(&mut self) -> Self::Item;
|
||||
|
||||
/// Returns the next [Token] if it is of the expected [Type], without advancing
|
||||
fn peek_expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
|
||||
|
||||
/// Consumes and returns a [Token] if it is the expected [Type]
|
||||
///
|
||||
/// Otherwise, does not consume a [Token]
|
||||
fn expect(&mut self, expected: Type) -> Result<Self::Item, LexError>;
|
||||
|
||||
/// Ignores a [Token] of the expected [Type], propegating errors.
|
||||
#[inline]
|
||||
fn require(&mut self, expected: Type) -> Result<(), LexError> { self.expect(expected).map(|_| ()) }
|
||||
|
||||
/// Ignores a [Token] of the expected [Type], discarding errors.
|
||||
#[inline]
|
||||
fn allow(&mut self, expected: Type) { let _ = self.expect(expected); }
|
||||
|
||||
/// Runs a function on each
|
||||
fn any_of<T, U>(&mut self, f: fn(&mut Self, Type) -> Result<U, LexError>, expected: T) -> Result<U, LexError>
|
||||
where T: AsRef<[Type]> {
|
||||
for &expected in expected.as_ref() {
|
||||
match f(self, expected).map_err(|e| e.bare()) {
|
||||
Ok(t) => return Ok(t),
|
||||
Err(LexError::UnexpectedToken { .. }) => continue,
|
||||
Err(e) => return Err(e.context(self.context())),
|
||||
}
|
||||
}
|
||||
Err(LexError::expected(expected, self.peek()).context(self.context()))
|
||||
}
|
||||
|
||||
/// Returns the next [Token] if it is of the expected [Types](Type), without advancing
|
||||
#[inline]
|
||||
fn peek_expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
|
||||
where T: AsRef<[Type]> {
|
||||
self.any_of(Self::peek_expect, expected)
|
||||
}
|
||||
/// Consumes and returns a [Token] if it matches any of the expected [Types](Type)
|
||||
///
|
||||
/// Otherwise, does not consume a [Token]
|
||||
#[inline]
|
||||
fn expect_any_of<T>(&mut self, expected: T) -> Result<Self::Item, LexError>
|
||||
where T: AsRef<[Type]> {
|
||||
self.any_of(Self::expect, expected)
|
||||
}
|
||||
/// Ignores a [Token] of any expected [Type], discarding errors.
|
||||
#[inline]
|
||||
fn allow_any_of<T>(&mut self, expected: T)
|
||||
where T: AsRef<[Type]> {
|
||||
let _ = self.expect_any_of(expected);
|
||||
}
|
||||
/// Ignores a [Token] of any expected [Type], propegating errors.
|
||||
#[inline]
|
||||
fn require_any_of<T>(&mut self, expected: T) -> Result<(), LexError>
|
||||
where T: AsRef<[Type]> {
|
||||
self.any_of(Self::require, expected)
|
||||
}
|
||||
}
|
||||
62
src/lib.rs
62
src/lib.rs
@@ -54,23 +54,55 @@
|
||||
//! └─ EndOfFile
|
||||
//! ```
|
||||
|
||||
pub mod preamble {
|
||||
//! Common imports for msp430-asm
|
||||
use super::*;
|
||||
pub use assembler::Assembler;
|
||||
pub use error::Error;
|
||||
pub use lexer::{
|
||||
context::Context,
|
||||
token::{Token, Type},
|
||||
token_stream::TokenStream,
|
||||
Tokenizer,
|
||||
pub mod util {
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
ops::{Index, Range},
|
||||
};
|
||||
pub use parser::Parser;
|
||||
/// A <code> [Clone] + [Copy] + [!Iterator](Iterator) <\code> version of a [Range]
|
||||
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Span<Idx> {
|
||||
pub start: Idx,
|
||||
pub end: Idx,
|
||||
}
|
||||
impl<Idx> From<Span<Idx>> for Range<Idx> {
|
||||
fn from(value: Span<Idx>) -> Self {
|
||||
value.start..value.end
|
||||
}
|
||||
}
|
||||
impl<Idx> From<Range<Idx>> for Span<Idx> {
|
||||
fn from(value: Range<Idx>) -> Self {
|
||||
Self { start: value.start, end: value.end }
|
||||
}
|
||||
}
|
||||
impl<T> Index<Span<usize>> for [T] {
|
||||
type Output = [T];
|
||||
fn index(&self, index: Span<usize>) -> &Self::Output {
|
||||
self.index(Range::from(index))
|
||||
}
|
||||
}
|
||||
impl Index<Span<usize>> for str {
|
||||
type Output = str;
|
||||
fn index(&self, index: Span<usize>) -> &Self::Output {
|
||||
self.index(Range::from(index))
|
||||
}
|
||||
}
|
||||
impl<Idx: Debug> Debug for Span<Idx> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}..{:?}", self.start, self.end)
|
||||
}
|
||||
}
|
||||
impl<Idx: Display> Display for Span<Idx> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}..{}", self.start, self.end)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use preamble::*;
|
||||
pub mod error;
|
||||
pub mod lexer;
|
||||
|
||||
pub mod preprocessor;
|
||||
|
||||
pub mod parser;
|
||||
|
||||
pub mod assembler;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
|
||||
648
src/parser.rs
648
src/parser.rs
@@ -1,81 +1,591 @@
|
||||
// © 2023 John Breaux
|
||||
//! Parses [`Tokens`](crate::Token) into an [abstract syntax tree](Root)
|
||||
// © 2023-2024 John Breaux
|
||||
//! Parses [`Tokens`](crate::lexer::token::Token) into an [abstract syntax tree](ast)
|
||||
pub mod ast;
|
||||
|
||||
use crate::{TokenStream, Type};
|
||||
use error::ParseError;
|
||||
use preamble::*;
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
path::Path,
|
||||
use self::error::{
|
||||
Error,
|
||||
ErrorKind::{self, *},
|
||||
PResult, Parsing,
|
||||
};
|
||||
use crate::{
|
||||
lexer::{
|
||||
token::{Reg, Special, Token, TokenKind as Kind},
|
||||
Lexer,
|
||||
},
|
||||
preprocessor::Preprocessor,
|
||||
util::Span,
|
||||
};
|
||||
use ast::*;
|
||||
|
||||
pub mod preamble {
|
||||
//! All the different AST node types
|
||||
use super::*;
|
||||
// Traits
|
||||
pub use parsable::Parsable;
|
||||
// Nodes
|
||||
pub use comment::Comment;
|
||||
pub use directive::Directive;
|
||||
pub use identifier::Identifier;
|
||||
pub use instruction::{
|
||||
encoding::{
|
||||
encoding_parser::EncodingParser, jump_target::JumpTarget, number::Number, primary_operand::PrimaryOperand,
|
||||
register::Register, secondary_operand::SecondaryOperand, width::Width, Encoding,
|
||||
},
|
||||
opcode::Opcode,
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Parser<'t> {
|
||||
lexer: Preprocessor<'t>,
|
||||
next: Option<Token<'t>>,
|
||||
loc: Span<usize>,
|
||||
}
|
||||
|
||||
impl<'t> Parser<'t> {
|
||||
/// Creates a new [Parser]
|
||||
pub fn new(text: &'t str) -> Self {
|
||||
let lexer = Preprocessor::new(text);
|
||||
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
|
||||
}
|
||||
/// Createes a new [Parser] from an existing [Lexer]
|
||||
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
|
||||
let lexer = Preprocessor::with_lexer(lexer);
|
||||
Self { loc: (lexer.start()..lexer.start()).into(), next: None, lexer }
|
||||
}
|
||||
|
||||
pub fn parse<T: Parsable<'t>>(&mut self) -> PResult<T> {
|
||||
Parsable::parse(self)
|
||||
}
|
||||
pub fn error(&self, kind: ErrorKind, parsing: Parsing) -> Error {
|
||||
Error { parsing, kind, loc: self.loc }
|
||||
}
|
||||
|
||||
/// Peek a token out of the lexer
|
||||
pub fn peek(&mut self, p: Parsing) -> PResult<&Token<'t>> {
|
||||
if self.next.is_none() {
|
||||
self.next = self.lexer.scan();
|
||||
}
|
||||
self.next.as_ref().inspect(|t| self.loc = t.pos).ok_or_else(|| self.error(BufEmpty, p))
|
||||
}
|
||||
pub fn next(&mut self, p: Parsing) -> PResult<Token<'t>> {
|
||||
Ok(match self.take() {
|
||||
Some(token) => token,
|
||||
None => {
|
||||
self.peek(p)?;
|
||||
self.take().expect("should have been populated by peek")
|
||||
}
|
||||
})
|
||||
}
|
||||
/// Consumes the next token
|
||||
pub fn assert(&mut self, expect: Kind, p: Parsing) -> PResult<&mut Self> {
|
||||
match self.peek(p)?.kind {
|
||||
kind if kind == expect => {
|
||||
self.take();
|
||||
Ok(self)
|
||||
}
|
||||
kind => Err(self.error(Unexpected(kind), p)),
|
||||
}
|
||||
}
|
||||
/// Consumes the next token without checking it
|
||||
pub fn then(&mut self, p: Parsing) -> PResult<&mut Self> {
|
||||
self.next(p)?;
|
||||
Ok(self)
|
||||
}
|
||||
/// Take the last peeked token
|
||||
pub fn take(&mut self) -> Option<Token<'t>> {
|
||||
self.next.take()
|
||||
}
|
||||
}
|
||||
|
||||
// Expressions
|
||||
impl<'t> Parser<'t> {
|
||||
/// Parses an expression
|
||||
pub fn expr(&mut self) -> PResult<Expr<'t>> {
|
||||
self.term()
|
||||
}
|
||||
/// Parses a term-expression (binary `*`mul, `/`div, `%`rem)
|
||||
pub fn term(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let a = self.factor()?;
|
||||
let mut other = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Star => other.push((BinOp::Mul, self.then(p)?.factor()?)),
|
||||
Kind::Slash => other.push((BinOp::Div, self.then(p)?.factor()?)),
|
||||
Kind::Percent => other.push((BinOp::Rem, self.then(p)?.factor()?)),
|
||||
_ if other.is_empty() => break Ok(a),
|
||||
_ => break Ok(Expr::Binary(a.into(), other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Parses a factor expression (binary `+`add, `-`sub)
|
||||
pub fn factor(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let a = self.shift()?;
|
||||
let mut other = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Plus => other.push((BinOp::Add, self.then(p)?.shift()?)),
|
||||
Kind::Minus => other.push((BinOp::Sub, self.then(p)?.shift()?)),
|
||||
_ if other.is_empty() => break Ok(a),
|
||||
_ => break Ok(Expr::Binary(a.into(), other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Parses a bit-shift expression (binary `<<`shift left, `>>`shift right)
|
||||
pub fn shift(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let a = self.bin()?;
|
||||
let mut other = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Lsh => other.push((BinOp::Lsh, self.then(p)?.bin()?)),
|
||||
Kind::Rsh => other.push((BinOp::Rsh, self.then(p)?.bin()?)),
|
||||
_ if other.is_empty() => break Ok(a),
|
||||
_ => break Ok(Expr::Binary(a.into(), other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn bin(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let a = self.unary()?;
|
||||
let mut other = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Amp => other.push((BinOp::And, self.then(p)?.unary()?)),
|
||||
Kind::Bar => other.push((BinOp::Or, self.then(p)?.unary()?)),
|
||||
Kind::Caret => other.push((BinOp::Xor, self.then(p)?.unary()?)),
|
||||
_ if other.is_empty() => break Ok(a),
|
||||
_ => break Ok(Expr::Binary(a.into(), other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Parses a unary expression (`!`invert, `-`negate)
|
||||
pub fn unary(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let mut ops = vec![];
|
||||
loop {
|
||||
match self.peek(p)?.kind {
|
||||
Kind::Star => ops.push(UnOp::Deref),
|
||||
Kind::Minus => ops.push(UnOp::Neg),
|
||||
Kind::Bang => ops.push(UnOp::Not),
|
||||
_ if ops.is_empty() => break Ok(self.primary()?),
|
||||
_ => break Ok(Expr::Unary(ops, self.primary()?.into())),
|
||||
}
|
||||
self.take();
|
||||
}
|
||||
}
|
||||
/// Parses a `(`grouped expression`)`, `&`addrof expression, Number, or Identifier
|
||||
pub fn primary(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let Token { lexeme, kind, .. } = *self.peek(p)?;
|
||||
Ok(match kind {
|
||||
Kind::OpenParen => {
|
||||
let out = Expr::Group(self.then(p)?.parse()?);
|
||||
self.assert(Kind::CloseParen, p)?;
|
||||
out
|
||||
}
|
||||
Kind::Number(n, _) => {
|
||||
self.take();
|
||||
Expr::Number(n)
|
||||
}
|
||||
Kind::Identifier => {
|
||||
self.take();
|
||||
Expr::Ident(lexeme)
|
||||
}
|
||||
Kind::Amp => self.then(p)?.addrof()?,
|
||||
ty => Err(self.error(NonNumeric(ty), p))?,
|
||||
})
|
||||
}
|
||||
pub fn addrof(&mut self) -> PResult<Expr<'t>> {
|
||||
let p = Parsing::Expr;
|
||||
let token = self.peek(p)?;
|
||||
let out = match token.kind {
|
||||
Kind::Identifier => Expr::AddrOf(token.lexeme),
|
||||
Kind::Number(n, _) => Expr::Number(n),
|
||||
ty => Err(self.error(Unexpected(ty), p))?,
|
||||
};
|
||||
self.take();
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Parsable<'t>: Sized {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self>;
|
||||
}
|
||||
|
||||
impl<'t> Parsable<'t> for Statements<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let mut stmts = vec![];
|
||||
while p.peek(Parsing::File)?.kind != Kind::Eof {
|
||||
stmts.push(p.parse()?)
|
||||
}
|
||||
Ok(Self { stmts })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parsable<'t> for Statement<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let token = *p.peek(Parsing::Stmt)?;
|
||||
Ok(match token.kind {
|
||||
Kind::Comment => {
|
||||
p.take();
|
||||
Statement::Comment(token.lexeme)
|
||||
}
|
||||
Kind::Directive => Statement::Directive(p.parse()?),
|
||||
Kind::Identifier => Statement::Label(p.label()?),
|
||||
_ => Statement::Insn(p.parse()?),
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Directive<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Directive;
|
||||
let Token { lexeme, kind, pos: _ } = *p.peek(parsing)?;
|
||||
let Kind::Directive = kind else { return Err(p.error(Unexpected(kind), parsing)) };
|
||||
p.take();
|
||||
Ok(match lexeme {
|
||||
".define" => Directive::Define(p.parse()?),
|
||||
".org" => Directive::Org(p.expr()?.into()),
|
||||
".word" => Directive::Word(p.parse()?),
|
||||
".words" => Directive::Words(p.parse()?),
|
||||
".string" => Directive::String(p.string()?),
|
||||
_ => Err(p.error(Unexpected(Kind::Directive), parsing))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Vec<Token<'t>> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Directive;
|
||||
let mut tokens = vec![];
|
||||
loop {
|
||||
if let Kind::Eof | Kind::Newline | Kind::Comment = p.peek(parsing)?.kind {
|
||||
break;
|
||||
}
|
||||
tokens.push(p.next(parsing)?)
|
||||
}
|
||||
p.take();
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Instruction<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let start = p.peek(Parsing::Instruction)?.pos.start;
|
||||
Ok(Self { kind: p.parse()?, span: Span { start, end: p.loc.end } })
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for InstructionKind<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
use crate::lexer::token::OneArg;
|
||||
// an instruction starts with an opcode
|
||||
Ok(match p.peek(Parsing::Instruction)?.kind() {
|
||||
Kind::NoEm(_) => Self::NoEm(p.parse()?),
|
||||
Kind::OneEm(_) => Self::OneEm(p.parse()?),
|
||||
Kind::Special(Special::Br) => Self::Br(p.parse()?),
|
||||
Kind::OneArg(OneArg::Reti) => Self::Reti(p.parse()?),
|
||||
Kind::OneArg(_) => Self::OneArg(p.parse()?),
|
||||
Kind::TwoArg(_) => Self::TwoArg(p.parse()?),
|
||||
Kind::Jump(_) => Self::Jump(p.parse()?),
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::Instruction))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for NoEm {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
match p.next(Parsing::NoEm)?.kind {
|
||||
Kind::NoEm(opcode) => Ok(Self { opcode }),
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::NoEm)),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for OneEm<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
Ok(Self {
|
||||
opcode: match p.next(Parsing::OneEm)?.kind {
|
||||
Kind::OneEm(opcode) => opcode,
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::OneEm))?,
|
||||
},
|
||||
width: p.parse()?,
|
||||
dst: p.parse()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for OneArg<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
Ok(Self {
|
||||
opcode: match p.next(Parsing::OneArg)?.kind {
|
||||
Kind::OneArg(opcode) => opcode,
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::OneArg))?,
|
||||
},
|
||||
width: p.parse()?,
|
||||
src: p.parse()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for TwoArg<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::TwoArg;
|
||||
Ok(Self {
|
||||
opcode: match p.next(parsing)?.kind {
|
||||
Kind::TwoArg(opcode) => opcode,
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
},
|
||||
width: p.parse()?,
|
||||
src: p.parse()?,
|
||||
dst: p.assert(Kind::Comma, parsing)?.parse()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Jump<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Jump;
|
||||
Ok(Self {
|
||||
opcode: match p.next(parsing)?.kind {
|
||||
Kind::Jump(opcode) => opcode,
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
},
|
||||
dst: p.parse()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Reti {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
use crate::lexer::token::OneArg;
|
||||
p.assert(Kind::OneArg(OneArg::Reti), Parsing::Reti)?;
|
||||
Ok(Reti)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Br<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
p.assert(Kind::Special(Special::Br), Parsing::Br)?;
|
||||
Ok(Self { src: p.parse()? })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Parsable<'t> for Src<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Src;
|
||||
Ok(match p.peek(parsing)?.kind {
|
||||
Kind::Hash => Src::Immediate(p.then(parsing)?.parse()?), // #imm, #special
|
||||
Kind::Amp => Src::Absolute(p.then(parsing)?.parse()?), // &addr
|
||||
Kind::At => {
|
||||
let reg = match p.then(parsing)?.next(parsing)?.kind {
|
||||
Kind::Reg(r) => r,
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
};
|
||||
if let Kind::Plus = p.peek(parsing)?.kind {
|
||||
p.take();
|
||||
Src::PostInc(reg)
|
||||
} else {
|
||||
Src::Indirect(reg)
|
||||
}
|
||||
} // @reg+, @reg
|
||||
Kind::Reg(_) => Src::Direct(p.parse()?),
|
||||
_ => {
|
||||
let expr = p.parse()?;
|
||||
match p.peek(parsing)?.kind {
|
||||
Kind::OpenParen => Src::Indexed(expr, {
|
||||
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
|
||||
p.assert(Kind::CloseParen, parsing)?;
|
||||
reg
|
||||
}),
|
||||
_ => Src::BareExpr(expr),
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Dst<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Dst;
|
||||
Ok(match p.peek(parsing)?.kind {
|
||||
Kind::Hash => match p.then(parsing)?.next(parsing)?.kind {
|
||||
Kind::Number(0, _) => Dst::Special(DstSpecial::Zero),
|
||||
Kind::Number(1, _) => Dst::Special(DstSpecial::One),
|
||||
Kind::Number(n, _) => Err(p.error(BadIntForDst(n), parsing))?,
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
},
|
||||
Kind::Amp => Dst::Absolute(p.then(parsing)?.parse()?),
|
||||
Kind::Reg(_) => Dst::Direct(p.parse()?),
|
||||
_ => Dst::Indexed(p.expr()?.into(), {
|
||||
let reg = p.assert(Kind::OpenParen, parsing)?.reg()?;
|
||||
p.assert(Kind::CloseParen, parsing)?;
|
||||
reg
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for JumpDst<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Jump;
|
||||
let mut neg = false;
|
||||
let out = loop {
|
||||
let token = p.peek(parsing)?;
|
||||
match token.kind {
|
||||
Kind::Minus => {
|
||||
neg = !neg;
|
||||
}
|
||||
Kind::Plus => {}
|
||||
Kind::Identifier => break Self::Label(token.lexeme),
|
||||
Kind::Number(n, _) => break Self::Rel(n as i16 * if neg { -1 } else { 1 }),
|
||||
ty => Err(p.error(Unexpected(ty), parsing))?,
|
||||
}
|
||||
p.take();
|
||||
};
|
||||
p.take();
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Width {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let out = match p.peek(Parsing::Width)?.kind() {
|
||||
Kind::Byte => Width::Byte,
|
||||
Kind::Word => Width::Word,
|
||||
_ => return Ok(Width::Word),
|
||||
};
|
||||
p.take();
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Reg {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let out = match p.peek(Parsing::Reg)?.kind {
|
||||
Kind::Reg(r) => r,
|
||||
ty => Err(p.error(Unexpected(ty), Parsing::Reg))?,
|
||||
};
|
||||
p.take();
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
impl<'t> Parsable<'t> for Expr<'t> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
p.expr()
|
||||
}
|
||||
}
|
||||
impl<'t, T: Parsable<'t>> Parsable<'t> for Box<T> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
Ok(Box::new(p.parse()?))
|
||||
}
|
||||
}
|
||||
impl<'t, T: Parsable<'t>> Parsable<'t> for Vec<T> {
|
||||
fn parse(p: &mut Parser<'t>) -> PResult<Self> {
|
||||
let parsing = Parsing::Vec;
|
||||
p.assert(Kind::OpenBrace, parsing)?;
|
||||
let mut out = vec![];
|
||||
while Kind::CloseBrace != p.peek(parsing)?.kind {
|
||||
out.push(p.parse()?)
|
||||
}
|
||||
p.assert(Kind::CloseBrace, parsing)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
/// Context-sensitive parsing rules
|
||||
impl<'t> Parser<'t> {
|
||||
pub fn string(&mut self) -> PResult<&'t str> {
|
||||
let token = *self.peek(Parsing::Directive)?;
|
||||
match token.kind {
|
||||
Kind::String => {
|
||||
self.take();
|
||||
Ok(&token.lexeme[1..token.lexeme.len() - 1])
|
||||
}
|
||||
ty => Err(self.error(Unexpected(ty), Parsing::Directive)),
|
||||
}
|
||||
}
|
||||
pub fn label(&mut self) -> PResult<&'t str> {
|
||||
let p = Parsing::Label;
|
||||
let token = self.next(p)?;
|
||||
assert_eq!(Kind::Identifier, token.kind);
|
||||
self.assert(Kind::Colon, p)?;
|
||||
Ok(token.lexeme)
|
||||
}
|
||||
pub fn reg(&mut self) -> PResult<Reg> {
|
||||
match self.peek(Parsing::Reg)?.kind {
|
||||
Kind::Reg(r) => {
|
||||
self.take();
|
||||
Ok(r)
|
||||
}
|
||||
ty => Err(self.error(Unexpected(ty), Parsing::Reg)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod error {
|
||||
use super::Kind;
|
||||
use crate::util::Span;
|
||||
use std::{fmt::Display, num::TryFromIntError};
|
||||
|
||||
pub type PResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Error {
|
||||
pub parsing: Parsing,
|
||||
pub kind: ErrorKind,
|
||||
pub loc: Span<usize>,
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ErrorKind {
|
||||
LexError,
|
||||
/// Returned when [Parsing::ConstExpr] fails without consuming
|
||||
NotExpr,
|
||||
DivZero,
|
||||
NonNumeric(Kind),
|
||||
BadIntForDst(u16),
|
||||
TryFromIntError(TryFromIntError),
|
||||
Unexpected(Kind),
|
||||
BufEmpty,
|
||||
Todo,
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Parsing {
|
||||
File,
|
||||
Stmt,
|
||||
|
||||
Label,
|
||||
Directive,
|
||||
Instruction,
|
||||
};
|
||||
pub use label::Label;
|
||||
pub use line::Line;
|
||||
pub use root::Root;
|
||||
// Error
|
||||
pub use error::ParseError;
|
||||
}
|
||||
|
||||
pub mod parsable;
|
||||
NoEm,
|
||||
OneEm,
|
||||
Reti,
|
||||
Br,
|
||||
OneArg,
|
||||
TwoArg,
|
||||
Jump,
|
||||
|
||||
pub mod comment;
|
||||
pub mod directive;
|
||||
pub mod error;
|
||||
pub mod identifier;
|
||||
pub mod instruction;
|
||||
pub mod label;
|
||||
pub mod line;
|
||||
pub mod root;
|
||||
Width,
|
||||
Src,
|
||||
Dst,
|
||||
Reg,
|
||||
|
||||
pub struct Parser {
|
||||
radix: u32,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn parse_with<'t>(self, stream: &'t mut impl TokenStream<'t>) -> Result<Root, ParseError> {
|
||||
Root::parse(&self, &mut stream.ignore(Type::Space))
|
||||
Expr,
|
||||
Vec,
|
||||
}
|
||||
pub fn parse<T>(self, input: &T) -> Result<Root, ParseError>
|
||||
where T: AsRef<str> + ?Sized {
|
||||
Root::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
|
||||
impl Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "[{}]: Error: {} while parsing {}", self.loc, self.kind, self.parsing)
|
||||
}
|
||||
}
|
||||
pub fn parse_file<P>(self, path: &P) -> Result<Root, ParseError>
|
||||
where P: AsRef<Path> + ?Sized {
|
||||
self.parse(&std::fs::read_to_string(path.as_ref())?).map(|r| r.set_file(path.as_ref().into()))
|
||||
impl Display for ErrorKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ErrorKind::LexError => write!(f, "lexical error"),
|
||||
ErrorKind::TryFromIntError(e) => write!(f, "{e}"),
|
||||
ErrorKind::BadIntForDst(n) => write!(f, "Immediate #{n} invalid in destination"),
|
||||
ErrorKind::NotExpr => write!(f, "Not a literal or basic expression"),
|
||||
ErrorKind::DivZero => write!(f, "Division by zero"),
|
||||
ErrorKind::NonNumeric(t) => write!(f, "`{t}` is not a Number"),
|
||||
ErrorKind::Unexpected(t) => write!(f, "Unexpected token ({t})"),
|
||||
ErrorKind::BufEmpty => write!(f, "Peek buffer empty"),
|
||||
ErrorKind::Todo => write!(f, "Not yet implemented"),
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn parse_one<T>(self, input: &T) -> Result<Line, ParseError>
|
||||
where T: AsRef<str> + ?Sized {
|
||||
Line::parse(&self, &mut super::Tokenizer::new(input).preprocessed().ignore(Type::Space))
|
||||
}
|
||||
|
||||
/// Sets the default radix for [Token](crate::lexer::token::Token) -> [Number]
|
||||
/// conversion
|
||||
pub fn radix(mut self, radix: u32) { self.radix = radix; }
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self { Self { radix: 16 } }
|
||||
}
|
||||
|
||||
impl Debug for Parser {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Parser").field("radix", &self.radix).finish_non_exhaustive()
|
||||
impl Display for Parsing {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Parsing::File => "a file".fmt(f),
|
||||
Parsing::Stmt => "a line".fmt(f),
|
||||
Parsing::Label => "a label".fmt(f),
|
||||
Parsing::Directive => "a directive".fmt(f),
|
||||
Parsing::Instruction => "an instruction".fmt(f),
|
||||
Parsing::NoEm => "a no-operand emulated instruction".fmt(f),
|
||||
Parsing::OneEm => "a one-operand emulated instruction".fmt(f),
|
||||
Parsing::Reti => "a `reti` instruction".fmt(f),
|
||||
Parsing::Br => "a `br` instruction".fmt(f),
|
||||
Parsing::OneArg => "a one-operand instruction".fmt(f),
|
||||
Parsing::TwoArg => "a two-operand instruction".fmt(f),
|
||||
Parsing::Jump => "a jump instruction".fmt(f),
|
||||
Parsing::Width => "an instruction width".fmt(f),
|
||||
Parsing::Src => "a source".fmt(f),
|
||||
Parsing::Dst => "a destination".fmt(f),
|
||||
Parsing::Reg => "a register".fmt(f),
|
||||
Parsing::Expr => "a constant expression".fmt(f),
|
||||
Parsing::Vec => "a list".fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl std::error::Error for Error {}
|
||||
}
|
||||
|
||||
679
src/parser/ast.rs
Normal file
679
src/parser/ast.rs
Normal file
@@ -0,0 +1,679 @@
|
||||
// © 2023-2024 John Breaux
|
||||
/// Represents MSP430 instructions,
|
||||
use crate::{
|
||||
lexer::token::{self, Reg, Token},
|
||||
util::Span,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Statements<'t> {
|
||||
pub stmts: Vec<Statement<'t>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Statement<'t> {
|
||||
Label(&'t str),
|
||||
Insn(Instruction<'t>),
|
||||
Directive(Directive<'t>),
|
||||
Comment(&'t str),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Directive<'t> {
|
||||
/// TODO: Store define as a vec of tokens. This will require help from the
|
||||
/// [preprocessor](crate::preprocessor)
|
||||
Define(Vec<Token<'t>>),
|
||||
Org(Box<Expr<'t>>),
|
||||
Word(Box<Expr<'t>>),
|
||||
Words(Vec<Expr<'t>>),
|
||||
String(&'t str),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Instruction<'t> {
|
||||
pub span: Span<usize>,
|
||||
pub kind: InstructionKind<'t>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum InstructionKind<'t> {
|
||||
NoEm(NoEm),
|
||||
OneEm(OneEm<'t>),
|
||||
OneArg(OneArg<'t>),
|
||||
TwoArg(TwoArg<'t>),
|
||||
Jump(Jump<'t>),
|
||||
Reti(Reti),
|
||||
Br(Br<'t>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct NoEm {
|
||||
pub opcode: token::NoEm,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct OneEm<'t> {
|
||||
pub opcode: token::OneEm,
|
||||
pub width: Width,
|
||||
pub dst: Dst<'t>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct OneArg<'t> {
|
||||
pub opcode: token::OneArg,
|
||||
pub width: Width,
|
||||
pub src: Src<'t>,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct TwoArg<'t> {
|
||||
pub opcode: token::TwoArg,
|
||||
pub width: Width,
|
||||
pub src: Src<'t>,
|
||||
pub dst: Dst<'t>,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Jump<'t> {
|
||||
pub opcode: token::Jump,
|
||||
pub dst: JumpDst<'t>,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Reti;
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Br<'t> {
|
||||
pub src: Src<'t>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Width {
|
||||
#[default]
|
||||
Word,
|
||||
Byte,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Src<'t> {
|
||||
Direct(Reg),
|
||||
Indexed(Box<Expr<'t>>, Reg),
|
||||
Indirect(Reg),
|
||||
PostInc(Reg),
|
||||
Absolute(Box<Expr<'t>>),
|
||||
Immediate(Box<Expr<'t>>),
|
||||
Special(SrcSpecial),
|
||||
BareExpr(Box<Expr<'t>>),
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum SrcSpecial {
|
||||
Zero,
|
||||
One,
|
||||
Four,
|
||||
Two,
|
||||
Eight,
|
||||
NegOne,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Dst<'t> {
|
||||
Direct(Reg),
|
||||
Indexed(Box<Expr<'t>>, Reg),
|
||||
Absolute(Box<Expr<'t>>),
|
||||
Special(DstSpecial),
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum DstSpecial {
|
||||
Zero,
|
||||
One,
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum JumpDst<'t> {
|
||||
/// A relative offset, nominally an even number from -0x400..=0x3fe
|
||||
Rel(i16),
|
||||
Label(&'t str),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Expr<'t> {
|
||||
Binary(Box<Expr<'t>>, Vec<(BinOp, Expr<'t>)>),
|
||||
Unary(Vec<UnOp>, Box<Expr<'t>>),
|
||||
Group(Box<Expr<'t>>),
|
||||
Number(u16),
|
||||
Ident(&'t str),
|
||||
AddrOf(&'t str),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum BinOp {
|
||||
Mul,
|
||||
Div,
|
||||
Rem,
|
||||
Add,
|
||||
Sub,
|
||||
Lsh,
|
||||
Rsh,
|
||||
And,
|
||||
Xor,
|
||||
Or,
|
||||
}
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum UnOp {
|
||||
Deref,
|
||||
Not,
|
||||
Neg,
|
||||
}
|
||||
|
||||
pub mod conv {
|
||||
//! Conversions between [ast](super) types, via [From], or via `new` constructor
|
||||
use super::{InstructionKind as Ik, *};
|
||||
|
||||
macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$(
|
||||
impl<'t> From<$src> for $dst {
|
||||
fn from(value: $src) -> Self {
|
||||
$expr(value)
|
||||
}
|
||||
}
|
||||
)*}}
|
||||
// sure am glad macros aren't hygenic over lifetimes
|
||||
impl_from! { Ik<'t> {
|
||||
NoEm => Ik::NoEm,
|
||||
OneEm<'t> => Ik::OneEm,
|
||||
OneArg<'t> => Ik::OneArg,
|
||||
TwoArg<'t> => Ik::TwoArg,
|
||||
Jump<'t> => Ik::Jump,
|
||||
Reti => Ik::Reti,
|
||||
Br<'t> => Ik::Br,
|
||||
}}
|
||||
impl_from! { Expr<'t> {
|
||||
u16 => Expr::Number
|
||||
}}
|
||||
impl<'t> From<Dst<'t>> for Src<'t> {
|
||||
fn from(value: Dst<'t>) -> Self {
|
||||
match value {
|
||||
Dst::Special(v) => Src::Special(v.into()),
|
||||
Dst::Absolute(v) => Src::Absolute(v),
|
||||
Dst::Indexed(i, r) => Src::Indexed(i, r),
|
||||
Dst::Direct(r) => Src::Direct(r),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl From<DstSpecial> for SrcSpecial {
|
||||
fn from(value: DstSpecial) -> Self {
|
||||
match value {
|
||||
DstSpecial::Zero => SrcSpecial::Zero,
|
||||
DstSpecial::One => SrcSpecial::One,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> TwoArg<'t> {
|
||||
pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self {
|
||||
Self { opcode, width, src, dst }
|
||||
}
|
||||
}
|
||||
}
|
||||
pub mod display {
|
||||
use super::*;
|
||||
use std::fmt::Display;
|
||||
|
||||
impl<'t> Display for Statements<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for stmt in &self.stmts {
|
||||
writeln!(f, "{stmt}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Statement<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Statement::Label(v) => write!(f, "{v}:"),
|
||||
Statement::Insn(v) => write!(f, "{v}"),
|
||||
Statement::Directive(v) => write!(f, "{v}"),
|
||||
Statement::Comment(v) => write!(f, "{v}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Directive<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Directive::Define(_) => write!(f, ".directive"),
|
||||
Directive::Org(e) => write!(f, ".org {e}"),
|
||||
Directive::Word(w) => write!(f, ".word {w}"),
|
||||
Directive::Words(words) => {
|
||||
write!(f, ".words [ ")?;
|
||||
for word in words {
|
||||
write!(f, "{word} ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
Directive::String(s) => write!(f, ".string \"{s}\""),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Instruction<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { span: _, kind } = self;
|
||||
write!(f, "{kind}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for InstructionKind<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
InstructionKind::NoEm(v) => v.fmt(f),
|
||||
InstructionKind::OneEm(v) => v.fmt(f),
|
||||
InstructionKind::OneArg(v) => v.fmt(f),
|
||||
InstructionKind::TwoArg(v) => v.fmt(f),
|
||||
InstructionKind::Jump(v) => v.fmt(f),
|
||||
InstructionKind::Reti(v) => v.fmt(f),
|
||||
InstructionKind::Br(v) => v.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for NoEm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode } = self;
|
||||
write!(f, "{opcode}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for OneEm<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode, width, dst } = self;
|
||||
write!(f, "{opcode}{width}\t{dst}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for OneArg<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode, width, src } = self;
|
||||
write!(f, "{opcode}{width}\t{src}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for TwoArg<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode, width, src, dst } = self;
|
||||
write!(f, "{opcode}{width}\t{src}, {dst}")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Jump<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { opcode, dst } = self;
|
||||
write!(f, "{opcode}\t{dst}")
|
||||
}
|
||||
}
|
||||
impl Display for Reti {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "reti")
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Br<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self { src } = self;
|
||||
write!(f, "br\t{src}")
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Display for Src<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Src::Direct(r) => write!(f, "{r}"),
|
||||
Src::Indexed(e, r) => write!(f, "{e}({r})"),
|
||||
Src::Indirect(r) => write!(f, "@{r}"),
|
||||
Src::PostInc(r) => write!(f, "@{r}+"),
|
||||
Src::Absolute(e) => write!(f, "&{e}"),
|
||||
Src::Immediate(e) => write!(f, "#{e}"),
|
||||
Src::Special(i) => write!(f, "#{i}"),
|
||||
Src::BareExpr(id) => write!(f, "{id}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for SrcSpecial {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
SrcSpecial::Zero => write!(f, "0"),
|
||||
SrcSpecial::One => write!(f, "1"),
|
||||
SrcSpecial::Four => write!(f, "4"),
|
||||
SrcSpecial::Two => write!(f, "2"),
|
||||
SrcSpecial::Eight => write!(f, "8"),
|
||||
SrcSpecial::NegOne => write!(f, "-1"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Dst<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Dst::Direct(r) => write!(f, "{r}"),
|
||||
Dst::Indexed(e, r) => write!(f, "{e}({r})"),
|
||||
Dst::Absolute(e) => write!(f, "&{e}"),
|
||||
Dst::Special(i) => write!(f, "#{i}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for DstSpecial {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
DstSpecial::Zero => write!(f, "0"),
|
||||
DstSpecial::One => write!(f, "1"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for JumpDst<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
JumpDst::Rel(i) => write!(f, "{i}"),
|
||||
JumpDst::Label(l) => write!(f, "{l}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Display for Expr<'t> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Expr::Binary(head, tail) => {
|
||||
write!(f, "{head}")?;
|
||||
for (op, tail) in tail {
|
||||
write!(f, "{op}{tail}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Expr::Unary(ops, tail) => {
|
||||
for op in ops {
|
||||
write!(f, "{op}")?
|
||||
}
|
||||
write!(f, "{tail}")
|
||||
}
|
||||
Expr::Group(e) => write!(f, "({e})"),
|
||||
Expr::Number(n) => write!(f, "{n:x}"),
|
||||
Expr::Ident(n) => write!(f, "{n}"),
|
||||
Expr::AddrOf(n) => write!(f, "&{n}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for BinOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
BinOp::Mul => write!(f, "*"),
|
||||
BinOp::Div => write!(f, "/"),
|
||||
BinOp::Rem => write!(f, "%"),
|
||||
BinOp::Add => write!(f, "+"),
|
||||
BinOp::Sub => write!(f, "-"),
|
||||
BinOp::Lsh => write!(f, "<<"),
|
||||
BinOp::Rsh => write!(f, ">>"),
|
||||
BinOp::And => write!(f, "&"),
|
||||
BinOp::Xor => write!(f, "^"),
|
||||
BinOp::Or => write!(f, "|"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for UnOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
UnOp::Deref => write!(f, "*"),
|
||||
UnOp::Not => write!(f, "!"),
|
||||
UnOp::Neg => write!(f, "-"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Width {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Width::Word => Ok(()),
|
||||
Width::Byte => write!(f, ".b"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pub mod canonical {
|
||||
use std::iter;
|
||||
|
||||
use super::*;
|
||||
use token::TwoArg::*;
|
||||
pub trait Canonicalize {
|
||||
/// The output after canonicalization
|
||||
type Output;
|
||||
/// Transmutes Self into its "canonical" form. "Emulated" instructions are converted
|
||||
/// into their respective non-emulated forms.
|
||||
fn to_canonical(self) -> Self::Output;
|
||||
}
|
||||
impl<'t> Canonicalize for Statements<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() }
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Statement<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
match self {
|
||||
Statement::Insn(i) => Self::Insn(i.to_canonical()),
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Instruction<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
Self { kind: self.kind.to_canonical(), ..self }
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for InstructionKind<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
match self {
|
||||
Self::NoEm(v) => Self::TwoArg(v.to_canonical()),
|
||||
Self::OneEm(v) => Self::TwoArg(v.to_canonical()),
|
||||
Self::Reti(v) => Self::Reti(v.to_canonical()),
|
||||
Self::Br(v) => Self::TwoArg(v.to_canonical()),
|
||||
Self::OneArg(v) => Self::OneArg(v.to_canonical()),
|
||||
Self::TwoArg(v) => Self::TwoArg(v.to_canonical()),
|
||||
Self::Jump(v) => Self::Jump(v.to_canonical()),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Canonicalize for NoEm {
|
||||
type Output = TwoArg<'static>;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { opcode } = self;
|
||||
use SrcSpecial::*;
|
||||
use Width::*;
|
||||
match opcode {
|
||||
token::NoEm::Nop => {
|
||||
TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG))
|
||||
}
|
||||
token::NoEm::Ret => {
|
||||
TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC))
|
||||
}
|
||||
token::NoEm::Clrc => {
|
||||
TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Clrz => {
|
||||
TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Clrn => {
|
||||
TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Setc => {
|
||||
TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Setz => {
|
||||
TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Setn => {
|
||||
TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Dint => {
|
||||
TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
|
||||
}
|
||||
token::NoEm::Eint => {
|
||||
TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for OneEm<'t> {
|
||||
type Output = TwoArg<'t>;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
use SrcSpecial::*;
|
||||
let Self { opcode, width, dst } = self;
|
||||
match opcode {
|
||||
token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst),
|
||||
token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst),
|
||||
token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst),
|
||||
token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst),
|
||||
token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst),
|
||||
token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst),
|
||||
token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst),
|
||||
token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst),
|
||||
token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst),
|
||||
token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst),
|
||||
token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst),
|
||||
token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst),
|
||||
token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for OneArg<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { opcode, width, src } = self;
|
||||
Self {
|
||||
opcode,
|
||||
width: match opcode {
|
||||
token::OneArg::Call => Width::Word,
|
||||
_ => width,
|
||||
},
|
||||
src: src.to_canonical(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for TwoArg<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { opcode, width, src, dst } = self;
|
||||
Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() }
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Jump<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { opcode, dst } = self;
|
||||
Self {
|
||||
opcode: match opcode {
|
||||
token::Jump::Jnz => token::Jump::Jne,
|
||||
token::Jump::Jz => token::Jump::Jeq,
|
||||
token::Jump::Jnc => token::Jump::Jlo,
|
||||
token::Jump::Jc => token::Jump::Jhs,
|
||||
t => t,
|
||||
},
|
||||
dst: dst.to_canonical(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Canonicalize for Reti {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
self
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Br<'t> {
|
||||
type Output = TwoArg<'t>;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
let Self { src } = self;
|
||||
TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Canonicalize for Src<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
use SrcSpecial::*;
|
||||
match self {
|
||||
Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self,
|
||||
Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r),
|
||||
Src::Absolute(e) => Src::Absolute(e.to_canonical().into()),
|
||||
Src::Immediate(e) => match e.to_canonical() {
|
||||
Expr::Number(0) => Src::Special(Zero),
|
||||
Expr::Number(1) => Src::Special(One),
|
||||
Expr::Number(2) => Src::Special(Two),
|
||||
Expr::Number(4) => Src::Special(Four),
|
||||
Expr::Number(8) => Src::Special(Eight),
|
||||
Expr::Number(0xffff) => Src::Special(NegOne),
|
||||
expr => Src::Immediate(expr.into()),
|
||||
},
|
||||
Src::BareExpr(_) => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Dst<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
match self {
|
||||
Dst::Direct(_) | Dst::Special(_) => self,
|
||||
Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r),
|
||||
Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for JumpDst<'t> {
|
||||
type Output = Self;
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
self
|
||||
}
|
||||
}
|
||||
impl<'t> Canonicalize for Expr<'t> {
|
||||
type Output = Self;
|
||||
/// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number],
|
||||
/// this returns a single [Expr::Number]. If not, it evaluates until
|
||||
/// it runs into an unevaluatable leaf.
|
||||
fn to_canonical(self) -> Self::Output {
|
||||
match self {
|
||||
Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self,
|
||||
Expr::Group(e) => e.to_canonical(),
|
||||
Expr::Unary(ops, tail) => {
|
||||
let mut tail = match tail.to_canonical() {
|
||||
Expr::Number(n) => n,
|
||||
other => return other,
|
||||
};
|
||||
// If the tail is dereferenced, canonicalization must halt,
|
||||
// since we have no knowledge of memory layout
|
||||
let mut ops = ops.into_iter();
|
||||
for op in ops.by_ref() {
|
||||
tail = match op {
|
||||
UnOp::Deref => {
|
||||
return Expr::Unary(
|
||||
iter::once(op).chain(ops).collect(),
|
||||
Box::new(tail.into()),
|
||||
)
|
||||
}
|
||||
UnOp::Not => !tail,
|
||||
UnOp::Neg => 0u16.wrapping_sub(tail),
|
||||
}
|
||||
}
|
||||
Expr::Number(tail)
|
||||
}
|
||||
Expr::Binary(head, tails) => {
|
||||
let mut head = match head.to_canonical() {
|
||||
Expr::Number(n) => n,
|
||||
head => return Expr::Binary(head.into(), tails),
|
||||
};
|
||||
let mut tails = tails.into_iter();
|
||||
for (op, tail) in &mut tails {
|
||||
let tail = tail.to_canonical();
|
||||
// If the canonical tail isn't a number, rebuild and return
|
||||
let Expr::Number(tail) = tail else {
|
||||
return Expr::Binary(
|
||||
Box::new(head.into()),
|
||||
iter::once((op, tail)).chain(tails).collect(),
|
||||
);
|
||||
};
|
||||
head = match op {
|
||||
BinOp::Mul => head.wrapping_mul(tail),
|
||||
BinOp::Div => head.wrapping_div(tail),
|
||||
BinOp::Rem => head.wrapping_rem(tail),
|
||||
BinOp::Add => head.wrapping_add(tail),
|
||||
BinOp::Sub => head.wrapping_sub(tail),
|
||||
BinOp::Lsh => head.wrapping_shl(tail as u32),
|
||||
BinOp::Rsh => head.wrapping_shr(tail as u32),
|
||||
BinOp::And => head & tail,
|
||||
BinOp::Xor => head ^ tail,
|
||||
BinOp::Or => head | tail,
|
||||
};
|
||||
}
|
||||
Expr::Number(head)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Comment`] stores the contents of a line comment, including the preceding `;` or `//`
|
||||
use super::*;
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Comment(pub String);
|
||||
|
||||
impl Parsable for Comment {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
Ok(Self(stream.expect(Type::Comment)?.lexeme().to_string()))
|
||||
}
|
||||
}
|
||||
impl Display for Comment {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.0, f) }
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Directive`] issues commands directly to the [`Tokenizer`](crate::Tokenizer) and
|
||||
//! [Linker](crate::Linker)
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use super::*;
|
||||
use crate::lexer::token::OwnedToken;
|
||||
|
||||
// TODO: Parse each kind of *postprocessor* directive into an AST node
|
||||
// - .org 8000: Directive::Org { base: Number }
|
||||
// - .define ident tt... Directive::Define { } ; should this be in the AST? How do I put this
|
||||
// in the AST?
|
||||
// - .include "<filename>" Directive::Include { Root } ; should this include an entire AST in
|
||||
// the AST?
|
||||
// - .word 8000 Directive::Word(Number)
|
||||
// - .words dead beef Directive::Words(Vec<u16>|Vec<Number>)
|
||||
// - .byte ff Directive::Byte(Number)
|
||||
// - .bytes de, ad, be, ef Directive::Bytes(Vec<u8>)
|
||||
// - .string "string" Directive::String(String)
|
||||
// - .ascii "string" Directive::Ascii(Vec<u8>)
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Directive {
|
||||
Org(Number),
|
||||
Define(Vec<OwnedToken>),
|
||||
Include(Root),
|
||||
Byte(Number),
|
||||
Bytes(Vec<Number>),
|
||||
Word(Number),
|
||||
Words(Vec<Number>),
|
||||
String(String),
|
||||
Strings(Vec<String>),
|
||||
}
|
||||
|
||||
impl Directive {}
|
||||
|
||||
impl Parsable for Directive {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
let d = stream.expect(Type::Directive)?;
|
||||
// match on the directive
|
||||
Ok(match d.lexeme() {
|
||||
".org" => Self::Org(Number::parse(p, stream)?),
|
||||
".define" => {
|
||||
let mut tokens = vec![];
|
||||
loop {
|
||||
match stream.peek().variant() {
|
||||
Type::Endl | Type::EndOfFile => break,
|
||||
_ => tokens.push(stream.next().unwrap_or_default().into()),
|
||||
}
|
||||
}
|
||||
Self::Define(tokens)
|
||||
}
|
||||
".include" => {
|
||||
// Try to get path
|
||||
Self::Include(Parser::default().parse_file(&PathBuf::parse(p, stream)?)?)
|
||||
}
|
||||
".byte" => Self::Byte(Number::parse(p, stream)?),
|
||||
".bytes" => Self::Bytes(Vec::<Number>::parse(p, stream)?),
|
||||
".word" => Self::Word(Number::parse(p, stream)?),
|
||||
".words" => Self::Words(Vec::<Number>::parse(p, stream)?),
|
||||
".string" => Self::String(String::parse(p, stream)?),
|
||||
".strings" => Self::Strings(Vec::<String>::parse(p, stream)?),
|
||||
e => Err(ParseError::UnrecognizedDirective(e.into()))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Directive {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Directive::Org(num) => write!(f, ".org {num}"),
|
||||
Directive::Define(rep) => {
|
||||
write!(f, ".define")?;
|
||||
for t in rep {
|
||||
write!(f, " {t}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Directive::Include(r) => Display::fmt(r, f),
|
||||
Directive::Byte(num) => write!(f, ".org {num}"),
|
||||
Directive::Bytes(v) => write!(f, ".bytes {v:?}"),
|
||||
Directive::Word(num) => write!(f, ".org {num}"),
|
||||
Directive::Words(v) => write!(f, ".bytes {v:?}"),
|
||||
Directive::String(s) => write!(f, ".string \"{s}\""),
|
||||
Directive::Strings(s) => write!(f, ".string \"{s:?}\""),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
// © 2023 John Breauxs
|
||||
use super::*;
|
||||
use crate::lexer::error::LexError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ParseError {
|
||||
/// Produced by [lexer](crate::lexer)
|
||||
LexError(LexError),
|
||||
/// Produced by [std::io]
|
||||
IoError(std::io::Error),
|
||||
/// Produced by [Number](Number)[::parse()](Parsable::parse())
|
||||
/// when the parsed number contains digits too high for the specified radix
|
||||
UnexpectedDigits(String, u32),
|
||||
/// Produced by [Opcode](Opcode)[::parse()](Parsable::parse())
|
||||
/// when the opcode passed lexing but did not match recognized opcodes.
|
||||
///
|
||||
/// This is always a lexer bug.
|
||||
UnrecognizedOpcode(String),
|
||||
/// Produced by [Directive](Directive)[::parse()](Parsable::parse())
|
||||
/// when an unknown or unimplemented directive is used
|
||||
UnrecognizedDirective(String),
|
||||
/// Produced by [Register] when attempting to convert from a [str]
|
||||
/// that isn't a register (pc, sp, sr, cg, or r{number})
|
||||
NotARegister(String),
|
||||
/// Produced by [Register] when the r{number} is outside the range 0-15
|
||||
RegisterTooHigh(u16),
|
||||
/// Produced by [SecondaryOperand] when the joke "secondary immediate" form
|
||||
/// is out of range 0..=1
|
||||
FatSecondaryImmediate(isize),
|
||||
/// Produced by a [Number] too wide to fit in 16 bits
|
||||
/// (outside the range `(-2^15) .. (2^16-1)` )
|
||||
NumberTooWide(isize),
|
||||
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
|
||||
/// when the jump offset is outside the range (-0x3ff..0x3fc)
|
||||
JumpedTooFar(isize),
|
||||
/// Produced by [JumpTarget](parser::preamble::JumpTarget)
|
||||
JumpedOdd(isize),
|
||||
}
|
||||
|
||||
impl From<LexError> for ParseError {
|
||||
fn from(value: LexError) -> Self { Self::LexError(value) }
|
||||
}
|
||||
impl From<std::io::Error> for ParseError {
|
||||
fn from(value: std::io::Error) -> Self { Self::IoError(value) }
|
||||
}
|
||||
|
||||
impl Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::LexError(error) => Display::fmt(error, f),
|
||||
Self::IoError(error) => Display::fmt(error, f),
|
||||
Self::UnexpectedDigits(number, radix) => write!(f, "Number `{number}` is not base {radix}."),
|
||||
Self::UnrecognizedOpcode(op) => write!(f, "{op} is not an opcode"),
|
||||
Self::UnrecognizedDirective(d) => write!(f, "{d} is not a directive."),
|
||||
Self::NotARegister(reg) => write!(f, "{reg} is not a register"),
|
||||
Self::RegisterTooHigh(reg) => write!(f, "r{reg} is not a register"),
|
||||
Self::FatSecondaryImmediate(num) => write!(f, "Secondary immediate must be #0 or #1, not #{num}"),
|
||||
Self::NumberTooWide(num) => write!(f, "{num} does not fit in 16 bits"),
|
||||
Self::JumpedTooFar(num) => write!(f, "{num} is too far away: must be in range (`-1022..=1024`.)"),
|
||||
Self::JumpedOdd(num) => {
|
||||
write!(f, "Jump targets only encode even numbers: {num} must not be odd.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl std::error::Error for ParseError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::LexError(e) => Some(e),
|
||||
Self::IoError(e) => Some(e),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [Identifier] stores the hash of an identifier
|
||||
use super::*;
|
||||
use std::rc::Rc;
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Identifier {
|
||||
str: Rc<str>,
|
||||
}
|
||||
|
||||
impl Identifier {
|
||||
fn str<T: AsRef<str>>(s: T) -> Self { Self { str: s.as_ref().to_owned().into() } }
|
||||
}
|
||||
|
||||
impl Parsable for Identifier {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
let token = stream.expect(Type::Identifier)?;
|
||||
match token.variant() {
|
||||
Type::Identifier => Ok(Self::str(token.lexeme())),
|
||||
_ => unreachable!("Expected identifier, got {token:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Display for Identifier {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.str, f) }
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [`Instruction`] contains the [`Opcode`] and [`Encoding`] information for a single msp430
|
||||
//! instruction
|
||||
//!
|
||||
//!
|
||||
//! Note: [`Opcode`] and [`Encoding`] are very tightly coupled, because they represent
|
||||
//! interdependent parts of the same instruction. This is why [`Opcode`]::resolve() returns an
|
||||
//! [`EncodingParser`] -- otherwise, there's an explosion of states that I can't really cope with on
|
||||
//! my own. Really, there's about 9 valid classes of instruction, some of which are only used for
|
||||
//! one or two of the MSP430's instructions.
|
||||
|
||||
use super::*;
|
||||
|
||||
pub mod encoding;
|
||||
pub mod opcode;
|
||||
|
||||
/// Contains the [Opcode] and [Encoding] information for a single msp430 instruction
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Instruction(Opcode, Encoding);
|
||||
|
||||
impl Instruction {
|
||||
pub fn opcode(&self) -> &Opcode { &self.0 }
|
||||
pub fn encoding(&self) -> &Encoding { &self.1 }
|
||||
/// Gets the Instruction as a [u16]
|
||||
pub fn word(&self) -> u16 { self.0 as u16 | self.1.word() }
|
||||
/// Gets the [extension words]
|
||||
pub fn ext_words(&self) -> [Option<u16>; 2] { self.1.extwords() }
|
||||
}
|
||||
|
||||
impl Parsable for Instruction {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where
|
||||
Self: Sized,
|
||||
T: crate::TokenStream<'text>,
|
||||
{
|
||||
// parse an opcode
|
||||
let opcode: Opcode = Opcode::parse(p, stream)?;
|
||||
// resolve the opcode to a final opcode and an encoding
|
||||
let (opcode, encoding) = opcode.resolve();
|
||||
// parse the encoding
|
||||
let encoding = encoding.parse(p, stream)?;
|
||||
Ok(Self(opcode, encoding))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Instruction> for u16 {
|
||||
fn from(value: Instruction) -> Self { value.word() }
|
||||
}
|
||||
|
||||
impl Display for Instruction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}{}", self.0, self.1) }
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [`Encoding`] represents the set of arguments for a given [msp430 opcode](Opcode)
|
||||
use super::*;
|
||||
|
||||
pub mod number;
|
||||
pub mod register;
|
||||
pub mod width;
|
||||
|
||||
pub mod jump_target;
|
||||
pub mod primary_operand;
|
||||
pub mod secondary_operand;
|
||||
|
||||
mod builder;
|
||||
pub mod encoding_parser;
|
||||
|
||||
use builder::{DoubleBuilder, JumpBuilder, ReflexiveBuilder, SingleBuilder};
|
||||
use encoding_parser::EncodingParser;
|
||||
|
||||
/// Represents an [instruction encoding](https://mspgcc.sourceforge.net/manual/x223.html)
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// use msp430_asm::{preamble::*, parser::preamble::*};
|
||||
/// // Create a token sequence
|
||||
/// let asm_file = r".b 8000(r15)";
|
||||
/// // Create a single-operand encoding parser
|
||||
/// let single: EncodingParser = Encoding::single().end();
|
||||
/// // Parse an Encoding from it
|
||||
/// let encoding: Encoding = single
|
||||
/// .parse(&Default::default(), &mut Tokenizer::new(asm_file).ignore_spaces())
|
||||
/// .unwrap();
|
||||
/// // Print the Encoding
|
||||
/// println!("{encoding}");
|
||||
/// ```
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Encoding {
|
||||
Single { width: Width, dst: PrimaryOperand },
|
||||
Jump { target: JumpTarget },
|
||||
Double { width: Width, src: PrimaryOperand, dst: SecondaryOperand },
|
||||
}
|
||||
impl Encoding {
|
||||
/// Returns a builder for [Encoding::Single]
|
||||
pub fn single() -> SingleBuilder { Default::default() }
|
||||
/// Returns a builder for [Encoding::Jump]
|
||||
pub fn jump() -> JumpBuilder { Default::default() }
|
||||
/// Returns a builder for [Encoding::Double]
|
||||
pub fn double() -> DoubleBuilder { Default::default() }
|
||||
/// Returns a builder for [Encoding::Double]
|
||||
///
|
||||
/// The reflexive pseudo-[Encoding] is a [Double](Encoding::Double) where the src and
|
||||
/// dst are the same
|
||||
pub fn reflexive() -> ReflexiveBuilder { Default::default() }
|
||||
///
|
||||
pub fn word(&self) -> u16 {
|
||||
match self {
|
||||
Encoding::Single { width, dst } => u16::from(*width) | dst.mode() | dst.register() as u16,
|
||||
Encoding::Jump { target } => target.word().unwrap_or_default(),
|
||||
Encoding::Double { width, src, dst } => {
|
||||
u16::from(*width) | src.mode() | dst.mode() | dst.register() as u16 | ((src.register() as u16) << 8)
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Returns extwords for instruction
|
||||
pub fn extwords(&self) -> [Option<u16>; 2] {
|
||||
match self {
|
||||
Encoding::Double { src, dst, .. } => [src.ext_word(), dst.ext_word()],
|
||||
Encoding::Single { dst, .. } => [dst.ext_word(), None],
|
||||
Encoding::Jump { .. } => [None, None],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Encoding {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Encoding::Single { width, dst } => write!(f, "{width} {dst}"),
|
||||
Encoding::Jump { target } => write!(f, " {target}"),
|
||||
Encoding::Double { width, src, dst } => write!(f, "{width} {src}, {dst}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! Builder API for [`EncodingParser`]
|
||||
use super::*;
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SingleBuilder {
|
||||
width: Option<Width>,
|
||||
dst: Option<PrimaryOperand>,
|
||||
}
|
||||
impl SingleBuilder {
|
||||
pub fn width(mut self, width: bool) -> Self {
|
||||
self.width = Some(width.into());
|
||||
self
|
||||
}
|
||||
/// Sets the [PrimaryOperand] field
|
||||
pub fn operand(mut self, dst: PrimaryOperand) -> Self {
|
||||
self.dst = Some(dst);
|
||||
self
|
||||
}
|
||||
/// Build
|
||||
pub fn end(self) -> EncodingParser { EncodingParser::Single { width: self.width, dst: self.dst } }
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct JumpBuilder {
|
||||
target: Option<JumpTarget>,
|
||||
}
|
||||
impl JumpBuilder {
|
||||
pub fn target(mut self, target: JumpTarget) -> Self {
|
||||
self.target = Some(target);
|
||||
self
|
||||
}
|
||||
pub fn end(self) -> EncodingParser { EncodingParser::Jump { target: self.target } }
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DoubleBuilder {
|
||||
width: Option<Width>,
|
||||
src: Option<PrimaryOperand>,
|
||||
dst: Option<SecondaryOperand>,
|
||||
}
|
||||
impl DoubleBuilder {
|
||||
/// Sets the [Width] field
|
||||
pub fn width(mut self, width: bool) -> Self {
|
||||
self.width = Some(width.into());
|
||||
self
|
||||
}
|
||||
/// Sets the [PrimaryOperand] field
|
||||
pub fn src(mut self, src: PrimaryOperand) -> Self {
|
||||
self.src = Some(src);
|
||||
self
|
||||
}
|
||||
/// Sets the [PrimaryOperand] field
|
||||
pub fn dst(mut self, dst: SecondaryOperand) -> Self {
|
||||
self.dst = Some(dst);
|
||||
self
|
||||
}
|
||||
pub fn end(self) -> EncodingParser { EncodingParser::Double { width: self.width, src: self.src, dst: self.dst } }
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ReflexiveBuilder {
|
||||
width: Option<Width>,
|
||||
reg: Option<SecondaryOperand>,
|
||||
}
|
||||
impl ReflexiveBuilder {
|
||||
/// Sets the [Width] field
|
||||
pub fn width(mut self, width: bool) -> Self {
|
||||
self.width = Some(width.into());
|
||||
self
|
||||
}
|
||||
pub fn reg(mut self, reg: SecondaryOperand) -> Self {
|
||||
self.reg = Some(reg);
|
||||
self
|
||||
}
|
||||
pub fn end(self) -> EncodingParser { EncodingParser::Reflexive { width: self.width, reg: self.reg } }
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [`EncodingParser`] builds an [`Encoding`] from a [`TokenStream`]
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
/// Builds an [Encoding] using [Tokens](crate::Token) from an input [TokenStream]
|
||||
pub enum EncodingParser {
|
||||
Single { width: Option<Width>, dst: Option<PrimaryOperand> },
|
||||
Jump { target: Option<JumpTarget> },
|
||||
Double { width: Option<Width>, src: Option<PrimaryOperand>, dst: Option<SecondaryOperand> },
|
||||
Reflexive { width: Option<Width>, reg: Option<SecondaryOperand> },
|
||||
}
|
||||
|
||||
impl EncodingParser {
|
||||
/// Constructs an [Encoding] from this [EncodingParser], filling holes
|
||||
/// with the tokenstream
|
||||
pub fn parse<'text, T>(self, p: &Parser, stream: &mut T) -> Result<Encoding, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
Ok(match self {
|
||||
Self::Single { width, dst } => Encoding::Single {
|
||||
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
|
||||
dst: if let Some(dst) = dst { dst } else { PrimaryOperand::parse(p, stream)? },
|
||||
},
|
||||
Self::Jump { target } => Encoding::Jump { target: target.unwrap_or(JumpTarget::parse(p, stream)?) },
|
||||
Self::Double { width, src, dst } => Encoding::Double {
|
||||
width: width.unwrap_or_else(|| Width::parse_or_default(p, stream)),
|
||||
src: if let Some(src) = src { src } else { PrimaryOperand::parse(p, stream)? },
|
||||
dst: if let Some(dst) = dst { dst } else { SecondaryOperand::parse(p, stream)? },
|
||||
},
|
||||
Self::Reflexive { width, reg } => {
|
||||
let width = width.unwrap_or_else(|| Width::parse(p, stream).unwrap_or_default());
|
||||
let reg = if let Some(reg) = reg { reg } else { SecondaryOperand::parse(p, stream)? };
|
||||
Encoding::Double { width, src: reg.clone().into(), dst: reg }
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`JumpTarget`] contains the [pc-relative offset](Number) or [label](Identifier)
|
||||
//! for a [Jump](Encoding::Jump) [instruction]
|
||||
use super::*;
|
||||
|
||||
/// Contains the [pc-relative offset](Number) or [label](Identifier)
|
||||
/// for a [Jump](Encoding::Jump) [Instruction]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum JumpTarget {
|
||||
Number(Number),
|
||||
Identifier(Identifier),
|
||||
}
|
||||
|
||||
impl JumpTarget {
|
||||
pub fn word(&self) -> Option<u16> {
|
||||
match self {
|
||||
JumpTarget::Number(n) => Some(u16::from(*n) & 0x3ff),
|
||||
JumpTarget::Identifier(_) => None,
|
||||
}
|
||||
}
|
||||
pub fn squish(value: isize) -> Result<u16, ParseError> {
|
||||
match value {
|
||||
i if i % 2 != 0 => Err(ParseError::JumpedOdd(i))?,
|
||||
i if (-1024..=1022).contains(&(i - 2)) => Ok(((value >> 1) - 1) as u16 & 0x3ff),
|
||||
i => Err(ParseError::JumpedTooFar(i))?,
|
||||
}
|
||||
}
|
||||
pub fn unsquish(value: u16) -> isize { (value as isize + 1) << 1 }
|
||||
}
|
||||
|
||||
impl Parsable for JumpTarget {
|
||||
// - Identifier
|
||||
// - Number
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
// Try to parse a number
|
||||
if let Some(num) = Number::try_parse(p, stream)? {
|
||||
Self::try_from(num)
|
||||
} else {
|
||||
// if that fails, try to parse an identifier instead
|
||||
Ok(Self::Identifier(Identifier::parse(p, stream)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Number> for JumpTarget {
|
||||
type Error = ParseError;
|
||||
fn try_from(value: Number) -> Result<Self, Self::Error> { Ok(Self::Number(Self::squish(value.into())?.into())) }
|
||||
}
|
||||
|
||||
impl Display for JumpTarget {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Number(num) => write!(f, "{:x}", Self::unsquish(u16::from(*num))),
|
||||
Self::Identifier(id) => write!(f, "{id}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Number`] represents a 16-bit signed or unsigned word
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Number(isize, u32); // (value, radix)
|
||||
|
||||
impl Parsable for Number {
|
||||
// A number is:
|
||||
// [Minus|Plus]? RadixMarker[Hex|Dec|Oct|Bin]? Number
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
use Type as Ty;
|
||||
// The number is negative when it begins with a Minus, but Plus is also acceptable.
|
||||
let negative = stream.expect_any_of([Ty::Minus, Ty::Plus]).map_or(false, |t| t.is_variant(Ty::Minus));
|
||||
let radix = match stream
|
||||
.expect_any_of([Ty::RadixMarkerHex, Ty::RadixMarkerDec, Ty::RadixMarkerOct, Ty::RadixMarkerBin])
|
||||
.ok()
|
||||
.map(|t| t.variant())
|
||||
{
|
||||
Some(Ty::RadixMarkerHex) => 16,
|
||||
Some(Ty::RadixMarkerDec) => 10,
|
||||
Some(Ty::RadixMarkerOct) => 8,
|
||||
Some(Ty::RadixMarkerBin) => 2,
|
||||
_ => p.radix,
|
||||
};
|
||||
let number = stream.expect(Ty::Number)?;
|
||||
// TODO: Reintroduce error context
|
||||
let number = isize::from_str_radix(number.lexeme(), radix)
|
||||
.map_err(|_| ParseError::UnexpectedDigits(number.lexeme().into(), radix))?
|
||||
* if negative { -1 } else { 1 };
|
||||
// Ensure number fits within a *signed or unsigned* 16-bit int (it will be truncated to fit)
|
||||
Ok(Self(
|
||||
if (-0x8000..0x10000).contains(&number) { number } else { Err(ParseError::NumberTooWide(number))? },
|
||||
radix,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<isize> for Number {
|
||||
fn from(value: isize) -> Self { Self(value, 16) }
|
||||
}
|
||||
|
||||
impl From<Number> for isize {
|
||||
fn from(value: Number) -> Self { value.0 as Self }
|
||||
}
|
||||
|
||||
impl From<u16> for Number {
|
||||
fn from(value: u16) -> Self { Self(value as isize, 16) }
|
||||
}
|
||||
|
||||
impl From<Number> for u16 {
|
||||
fn from(value: Number) -> Self { value.0 as Self }
|
||||
}
|
||||
|
||||
impl std::ops::Sub<isize> for Number {
|
||||
type Output = Self;
|
||||
fn sub(mut self, rhs: isize) -> Self::Output {
|
||||
self.0 -= rhs;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shr<usize> for Number {
|
||||
type Output = Self;
|
||||
fn shr(mut self, rhs: usize) -> Self::Output {
|
||||
self.0 >>= rhs;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Number {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self.1 {
|
||||
2 => std::fmt::Binary::fmt(&self.0, f),
|
||||
8 => std::fmt::Octal::fmt(&self.0, f),
|
||||
16 => std::fmt::LowerHex::fmt(&self.0, f),
|
||||
_ => std::fmt::Display::fmt(&self.0, f),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,146 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`PrimaryOperand`] contains the first [`Register`], addressing mode, and Extension
|
||||
//! Word for a [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [`Instruction`]
|
||||
use super::*;
|
||||
|
||||
/// Contains the first [Register], addressing mode, and Extension Word for a
|
||||
/// [one-operand](Encoding::Single) or [two-operand](Encoding::Double) [Instruction]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum PrimaryOperand {
|
||||
Direct(Register),
|
||||
Indirect(Register),
|
||||
PostInc(Register),
|
||||
Indexed(Register, Number),
|
||||
Relative(Identifier),
|
||||
Absolute(Number),
|
||||
Immediate(Number),
|
||||
Four,
|
||||
Eight,
|
||||
Zero,
|
||||
One,
|
||||
Two,
|
||||
MinusOne,
|
||||
}
|
||||
|
||||
impl PrimaryOperand {
|
||||
/// Returns the mode bits
|
||||
pub fn mode(&self) -> u16 {
|
||||
use PrimaryOperand::*;
|
||||
match self {
|
||||
Direct(_) | Zero => 0,
|
||||
Indexed(_, _) | Relative(_) | Absolute(_) | One => 1 << 4,
|
||||
Indirect(_) | Two | Four => 2 << 4,
|
||||
PostInc(_) | Immediate(_) | MinusOne | Eight => 3 << 4,
|
||||
}
|
||||
}
|
||||
/// Gets the register
|
||||
pub fn register(&self) -> Register {
|
||||
use PrimaryOperand::*;
|
||||
match self {
|
||||
Direct(r) | Indexed(r, _) | Indirect(r) | PostInc(r) => *r,
|
||||
Immediate(_) | Relative(_) => Register::pc,
|
||||
Absolute(_) | Four | Eight => Register::sr,
|
||||
Zero | One | Two | MinusOne => Register::cg,
|
||||
}
|
||||
}
|
||||
/// Gets the extension word, if present
|
||||
pub fn ext_word(&self) -> Option<u16> {
|
||||
use PrimaryOperand::*;
|
||||
match self {
|
||||
Indexed(_, w) | Absolute(w) | Immediate(w) => Some((*w).into()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parsable for PrimaryOperand {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
// Try parsing as Register (Direct)
|
||||
if let Some(r) = Register::try_parse(p, stream)? {
|
||||
return Ok(Self::Direct(r));
|
||||
}
|
||||
// Try parsing as Number (Indexed)
|
||||
if let Some(idx) = Number::try_parse(p, stream)? {
|
||||
stream.expect(Type::LParen)?;
|
||||
let reg = Register::parse(p, stream)?;
|
||||
stream.expect(Type::RParen)?;
|
||||
return Ok(Self::Indexed(reg, idx));
|
||||
}
|
||||
// Try parsing as Identifier (Relative, label mode)
|
||||
if let Some(id) = Identifier::try_parse(p, stream)? {
|
||||
return Ok(Self::Relative(id));
|
||||
}
|
||||
// Or directly match any of the valid prefix markers
|
||||
// Register, Number, and Identifier are included here to make error messages clearer.
|
||||
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
|
||||
// (a failure condition)
|
||||
let token = stream.expect_any_of([
|
||||
Type::Indirect,
|
||||
Type::Absolute,
|
||||
Type::Immediate,
|
||||
Type::Register,
|
||||
Type::Number,
|
||||
Type::Identifier,
|
||||
])?;
|
||||
Ok(match token.variant() {
|
||||
Type::Indirect => {
|
||||
let reg = Register::parse(p, stream)?;
|
||||
match stream.expect(Type::Plus) {
|
||||
Ok(_) => Self::PostInc(reg),
|
||||
Err(_) => Self::Indirect(reg),
|
||||
}
|
||||
}
|
||||
Type::Absolute => Self::Absolute(Number::parse(p, stream)?),
|
||||
Type::Immediate => {
|
||||
let number = Number::parse(p, stream)?;
|
||||
match number.into() {
|
||||
// There are two representations for the all-ones constant, since Number preserves
|
||||
// signedness.
|
||||
-1_isize | 0xffff => Self::MinusOne,
|
||||
0 => Self::Zero,
|
||||
1 => Self::One,
|
||||
2 => Self::Two,
|
||||
4 => Self::Four,
|
||||
8 => Self::Eight,
|
||||
_ => Self::Immediate(number),
|
||||
}
|
||||
}
|
||||
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SecondaryOperand> for PrimaryOperand {
|
||||
fn from(value: SecondaryOperand) -> Self {
|
||||
match value {
|
||||
SecondaryOperand::Direct(r) => Self::Direct(r),
|
||||
SecondaryOperand::Indexed(r, n) => Self::Indexed(r, n),
|
||||
SecondaryOperand::Absolute(n) => Self::Absolute(n),
|
||||
SecondaryOperand::Relative(id) => Self::Relative(id),
|
||||
SecondaryOperand::Zero => Self::Zero,
|
||||
SecondaryOperand::One => Self::One,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PrimaryOperand {
|
||||
// Turn the operand back into a form which parses into the same type
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Direct(r) => Display::fmt(r, f),
|
||||
Self::Indirect(r) => write!(f, "@{r}"),
|
||||
Self::PostInc(r) => write!(f, "@{r}+"),
|
||||
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
|
||||
Self::Relative(id) => Display::fmt(id, f),
|
||||
Self::Absolute(n) => write!(f, "&{n}"),
|
||||
Self::Immediate(n) => write!(f, "#{n}"),
|
||||
Self::Four => Display::fmt("#4", f),
|
||||
Self::Eight => Display::fmt("#8", f),
|
||||
Self::Zero => Display::fmt("#0", f),
|
||||
Self::One => Display::fmt("#1", f),
|
||||
Self::Two => Display::fmt("#2", f),
|
||||
Self::MinusOne => Display::fmt("#-1", f),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,112 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Register`] represents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// A [Register] epresents [one of the MSP430 processor's registers](https://mspgcc.sourceforge.net/manual/x82.html)
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Register {
|
||||
/// Program Counter
|
||||
pc,
|
||||
/// Stack Pointer
|
||||
sp,
|
||||
/// Status Register
|
||||
sr,
|
||||
/// Constant Generator
|
||||
cg,
|
||||
r4,
|
||||
r5,
|
||||
r6,
|
||||
r7,
|
||||
r8,
|
||||
r9,
|
||||
r10,
|
||||
r11,
|
||||
r12,
|
||||
r13,
|
||||
r14,
|
||||
r15,
|
||||
}
|
||||
|
||||
impl Parsable for Register {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
stream.expect(Type::Register)?.lexeme().parse()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Register> for u16 {
|
||||
fn from(value: Register) -> Self { value as u16 }
|
||||
}
|
||||
|
||||
impl TryFrom<u16> for Register {
|
||||
type Error = ParseError;
|
||||
fn try_from(value: u16) -> Result<Self, Self::Error> {
|
||||
use Register::*;
|
||||
Ok(match value {
|
||||
0 => pc,
|
||||
1 => sp,
|
||||
2 => sr,
|
||||
3 => cg,
|
||||
4 => r4,
|
||||
5 => r5,
|
||||
6 => r6,
|
||||
7 => r7,
|
||||
8 => r8,
|
||||
9 => r9,
|
||||
10 => r10,
|
||||
11 => r11,
|
||||
12 => r12,
|
||||
13 => r13,
|
||||
14 => r14,
|
||||
15 => r15,
|
||||
_ => return Err(ParseError::RegisterTooHigh(value)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Register {
|
||||
type Err = ParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use Register::*;
|
||||
match s {
|
||||
"pc" => Ok(pc),
|
||||
"sp" => Ok(sp),
|
||||
"sr" => Ok(sr),
|
||||
"cg" => Ok(cg),
|
||||
_ => {
|
||||
str::parse::<u16>(&s[1..]).map_err(|_| -> Self::Err { ParseError::NotARegister(s.into()) })?.try_into()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Register> for &str {
|
||||
fn from(value: Register) -> Self {
|
||||
use Register::*;
|
||||
match value {
|
||||
pc => "pc",
|
||||
sp => "sp",
|
||||
sr => "sr",
|
||||
cg => "cg",
|
||||
r4 => "r4",
|
||||
r5 => "r5",
|
||||
r6 => "r6",
|
||||
r7 => "r7",
|
||||
r8 => "r8",
|
||||
r9 => "r9",
|
||||
r10 => "r10",
|
||||
r11 => "r11",
|
||||
r12 => "r12",
|
||||
r13 => "r13",
|
||||
r14 => "r14",
|
||||
r15 => "r15",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Register {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", <&str>::from(*self)) }
|
||||
}
|
||||
@@ -1,105 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`SecondaryOperand`] contains the second [`Register`], addressing mode, and Extension
|
||||
//! Word for a [two-operand](Encoding::Double) [instruction]
|
||||
use super::*;
|
||||
|
||||
/// The destination of a [Double](Encoding::Double)
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum SecondaryOperand {
|
||||
Direct(Register),
|
||||
Indexed(Register, Number),
|
||||
Relative(Identifier),
|
||||
Absolute(Number),
|
||||
// Joke encodings?
|
||||
Zero,
|
||||
One,
|
||||
}
|
||||
|
||||
use SecondaryOperand as So;
|
||||
|
||||
impl SecondaryOperand {
|
||||
pub fn mode(&self) -> u16 {
|
||||
match self {
|
||||
So::Direct(_) | So::Zero => 0,
|
||||
So::Indexed(_, _) | So::Relative(_) | So::Absolute(_) | So::One => 1 << 7,
|
||||
}
|
||||
}
|
||||
pub fn register(&self) -> Register {
|
||||
use SecondaryOperand::*;
|
||||
match self {
|
||||
Direct(r) | Indexed(r, _) => *r,
|
||||
Relative(_) => Register::pc,
|
||||
Absolute(_) => Register::sr,
|
||||
Zero | One => Register::cg,
|
||||
}
|
||||
}
|
||||
/// This is the only way to have an extension word
|
||||
pub fn ext_word(&self) -> Option<u16> {
|
||||
use SecondaryOperand::*;
|
||||
match self {
|
||||
Indexed(_, w) | Absolute(w) => Some((*w).into()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parsable for SecondaryOperand {
|
||||
// Separator
|
||||
// - Register => Direct
|
||||
// - Number => Indexed
|
||||
// - OpenIdx
|
||||
// - Register
|
||||
// - CloseIdx
|
||||
// - Absolute
|
||||
// - Number
|
||||
// - Immediate
|
||||
// - Number == 0, 1
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: crate::TokenStream<'text> {
|
||||
use SecondaryOperand::*;
|
||||
stream.allow(Type::Separator);
|
||||
// Try parsing as Register (Direct)
|
||||
if let Some(r) = Register::try_parse(p, stream)? {
|
||||
return Ok(Self::Direct(r));
|
||||
}
|
||||
// Try parsing as Number (Indexed)
|
||||
if let Some(idx) = Number::try_parse(p, stream)? {
|
||||
stream.expect(Type::LParen)?;
|
||||
let reg = Register::parse(p, stream)?;
|
||||
stream.expect(Type::RParen)?;
|
||||
return Ok(Self::Indexed(reg, idx));
|
||||
}
|
||||
// Try parsing as Identifier (Relative, label mode)
|
||||
if let Some(id) = Identifier::try_parse(p, stream)? {
|
||||
return Ok(Self::Relative(id));
|
||||
}
|
||||
// Register, Number, and Identifier are included here to make error messages clearer.
|
||||
// their inclusion will cause a negligible slowdown when the next token is not a prefix marker
|
||||
// (a failure condition) but should not match a token
|
||||
let token =
|
||||
stream.expect_any_of([Type::Absolute, Type::Immediate, Type::Register, Type::Number, Type::Identifier])?;
|
||||
Ok(match token.variant() {
|
||||
Type::Absolute => Absolute(Number::parse(p, stream)?),
|
||||
// TODO: Reintroduce error context
|
||||
Type::Immediate => match Number::parse(p, stream)?.into() {
|
||||
0 => Zero,
|
||||
1 => One,
|
||||
n => Err(ParseError::FatSecondaryImmediate(n))?,
|
||||
},
|
||||
_ => unreachable!("Token {token:?} passed expectation but failed match!"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SecondaryOperand {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Direct(r) => Display::fmt(r, f),
|
||||
Self::Indexed(r, idx) => write!(f, "{idx}({r})"),
|
||||
Self::Relative(id) => Display::fmt(id, f),
|
||||
Self::Absolute(n) => write!(f, "&{n}"),
|
||||
Self::Zero => Display::fmt("#0", f),
|
||||
Self::One => Display::fmt("#1", f),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Width`] represents whether an instruction operates on whole words or bytes
|
||||
use super::*;
|
||||
|
||||
/// Represents an instruction's operand width.
|
||||
///
|
||||
/// Evaluates to false when instruction takes word-sized operands, or true when
|
||||
/// instruction takes byte-sized operands
|
||||
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Width(bool);
|
||||
|
||||
impl Parsable for Width {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
let Ok(token) = stream.expect_any_of([Type::ByteWidth, Type::WordWidth]) else {
|
||||
return Ok(Self(false));
|
||||
};
|
||||
Ok(Self(token.is_variant(Type::ByteWidth)))
|
||||
}
|
||||
}
|
||||
impl From<Width> for u16 {
|
||||
fn from(value: Width) -> Self { (value.0 as Self) << 6 }
|
||||
}
|
||||
impl From<Width> for bool {
|
||||
fn from(value: Width) -> Self { value.0 }
|
||||
}
|
||||
impl From<bool> for Width {
|
||||
fn from(value: bool) -> Self { Width(value) }
|
||||
}
|
||||
impl std::fmt::Display for Width {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(if self.0 { ".b" } else { "" }) }
|
||||
}
|
||||
@@ -1,261 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! An [`Opcode`] encodes an msp430 operation
|
||||
use super::*;
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Opcode from the [MSPGCC Manual][1]
|
||||
///
|
||||
/// Calling [`resolve()`](Opcode::resolve()) will emit an [EncodingParser] which will
|
||||
/// extract from a [TokenStream] only the required arguments for that call.
|
||||
///
|
||||
/// [1]: https://mspgcc.sourceforge.net/manual/x223.html
|
||||
#[allow(clippy::identity_op)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Opcode {
|
||||
// "Emulated" opcodes
|
||||
Nop,
|
||||
Pop,
|
||||
Br,
|
||||
Ret,
|
||||
Clrc,
|
||||
Setc,
|
||||
Clrz,
|
||||
Setz,
|
||||
Clrn,
|
||||
Setn,
|
||||
Dint,
|
||||
Eint,
|
||||
Rla,
|
||||
Rlc,
|
||||
Inv,
|
||||
Clr,
|
||||
Tst,
|
||||
Dec,
|
||||
Decd,
|
||||
Inc,
|
||||
Incd,
|
||||
Adc,
|
||||
Dadc,
|
||||
Sbc,
|
||||
// Single
|
||||
Rrc = 0x1000 | 0 << 7,
|
||||
Swpb = 0x1000 | 1 << 7,
|
||||
Rra = 0x1000 | 2 << 7,
|
||||
Sxt = 0x1000 | 3 << 7,
|
||||
Push = 0x1000 | 4 << 7,
|
||||
Call = 0x1000 | 5 << 7,
|
||||
Reti = 0x1000 | 6 << 7,
|
||||
// Jump
|
||||
Jnz = 0x2000 | 0 << 10,
|
||||
Jz = 0x2000 | 1 << 10,
|
||||
Jnc = 0x2000 | 2 << 10,
|
||||
Jc = 0x2000 | 3 << 10,
|
||||
Jn = 0x2000 | 4 << 10,
|
||||
Jge = 0x2000 | 5 << 10,
|
||||
Jl = 0x2000 | 6 << 10,
|
||||
Jmp = 0x2000 | 7 << 10,
|
||||
// Double
|
||||
Mov = 0x4000,
|
||||
Add = 0x5000,
|
||||
Addc = 0x6000,
|
||||
Subc = 0x7000,
|
||||
Sub = 0x8000,
|
||||
Cmp = 0x9000,
|
||||
Dadd = 0xa000,
|
||||
Bit = 0xb000,
|
||||
Bic = 0xc000,
|
||||
Bis = 0xd000,
|
||||
Xor = 0xe000,
|
||||
And = 0xf000,
|
||||
}
|
||||
|
||||
impl Opcode {
|
||||
/// Resolve an Opcode into an [Opcode] and an [EncodingParser]
|
||||
pub fn resolve(self) -> (Opcode, EncodingParser) {
|
||||
use super::Encoding as Enc;
|
||||
use Register as Reg;
|
||||
use {PrimaryOperand as Src, SecondaryOperand as Dst};
|
||||
match self {
|
||||
Self::Rrc | Self::Rra | Self::Push => (self, Enc::single().end()),
|
||||
// these instructions do not take a width specifier (though they may still behave properly)
|
||||
Self::Swpb | Self::Sxt | Self::Call => (self, Enc::single().width(false).end()),
|
||||
// `reti` does not take any operands.
|
||||
Self::Reti => (self, Enc::single().operand(Src::Direct(Reg::pc)).end()),
|
||||
Self::Jnz | Self::Jz | Self::Jnc | Self::Jc | Self::Jn | Self::Jge | Self::Jl | Self::Jmp => {
|
||||
(self, Enc::jump().end())
|
||||
}
|
||||
Self::Mov
|
||||
| Self::Add
|
||||
| Self::Addc
|
||||
| Self::Subc
|
||||
| Self::Sub
|
||||
| Self::Cmp
|
||||
| Self::Dadd
|
||||
| Self::Bit
|
||||
| Self::Bic
|
||||
| Self::Bis
|
||||
| Self::Xor
|
||||
| Self::And => (self, Enc::double().end()),
|
||||
Self::Nop => (Self::Mov, Enc::double().src(Src::Zero).dst(Dst::Zero).end()),
|
||||
Self::Pop => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).end()),
|
||||
Self::Br => (Self::Mov, Enc::double().dst(Dst::Direct(Reg::pc)).end()),
|
||||
Self::Ret => (Self::Mov, Enc::double().src(Src::PostInc(Reg::sp)).dst(Dst::Direct(Reg::pc)).end()),
|
||||
Self::Clrc => (Self::Bic, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Setc => (Self::Bis, Enc::double().src(Src::One).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Clrz => (Self::Bic, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Setz => (Self::Bis, Enc::double().src(Src::Two).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Clrn => (Self::Bic, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Setn => (Self::Bis, Enc::double().src(Src::Four).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Dint => (Self::Bic, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Eint => (Self::Bis, Enc::double().src(Src::Eight).dst(Dst::Direct(Reg::sr)).end()),
|
||||
Self::Rla => (Self::Add, Enc::reflexive().end()),
|
||||
Self::Rlc => (Self::Addc, Enc::reflexive().end()),
|
||||
Self::Inv => (Self::Xor, Enc::double().src(Src::MinusOne).end()),
|
||||
Self::Clr => (Self::Mov, Enc::double().src(Src::Zero).end()),
|
||||
Self::Tst => (Self::Cmp, Enc::double().src(Src::Zero).end()),
|
||||
Self::Dec => (Self::Sub, Enc::double().src(Src::One).end()),
|
||||
Self::Decd => (Self::Sub, Enc::double().src(Src::Two).end()),
|
||||
Self::Inc => (Self::Add, Enc::double().src(Src::One).end()),
|
||||
Self::Incd => (Self::Add, Enc::double().src(Src::Two).end()),
|
||||
Self::Adc => (Self::Addc, Enc::double().src(Src::Zero).end()),
|
||||
Self::Dadc => (Self::Dadd, Enc::double().src(Src::Zero).end()),
|
||||
Self::Sbc => (Self::Subc, Enc::double().src(Src::Zero).end()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parsable for Opcode {
|
||||
fn parse<'text, T>(_: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
// TODO: Reintroduce error context
|
||||
stream.expect(Type::Insn)?.parse()
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Opcode {
|
||||
type Err = ParseError;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
//TODO: Reduce allocations here?
|
||||
let s = s.to_ascii_lowercase();
|
||||
Ok(match s.as_str() {
|
||||
"rrc" => Self::Rrc,
|
||||
"swpb" => Self::Swpb,
|
||||
"rra" => Self::Rra,
|
||||
"sxt" => Self::Sxt,
|
||||
"push" => Self::Push,
|
||||
"call" => Self::Call,
|
||||
"reti" => Self::Reti,
|
||||
|
||||
"jne" | "jnz" => Self::Jnz,
|
||||
"jeq" | "jz" => Self::Jz,
|
||||
"jnc" | "jlo" => Self::Jnc,
|
||||
"jc" | "jhs" => Self::Jc,
|
||||
"jn" => Self::Jn,
|
||||
"jge" => Self::Jge,
|
||||
"jl" => Self::Jl,
|
||||
"jmp" => Self::Jmp,
|
||||
|
||||
"mov" => Self::Mov,
|
||||
"add" => Self::Add,
|
||||
"addc" => Self::Addc,
|
||||
"subc" => Self::Subc,
|
||||
"sub" => Self::Sub,
|
||||
"cmp" => Self::Cmp,
|
||||
"dadd" => Self::Dadd,
|
||||
"bit" => Self::Bit,
|
||||
"bic" => Self::Bic,
|
||||
"bis" => Self::Bis,
|
||||
"xor" => Self::Xor,
|
||||
"and" => Self::And,
|
||||
|
||||
"nop" => Self::Nop,
|
||||
"pop" => Self::Pop,
|
||||
"br" => Self::Br,
|
||||
"ret" => Self::Ret,
|
||||
"clrc" => Self::Clrc,
|
||||
"setc" => Self::Setc,
|
||||
"clrz" => Self::Clrz,
|
||||
"setz" => Self::Setz,
|
||||
"clrn" => Self::Clrn,
|
||||
"setn" => Self::Setn,
|
||||
"dint" => Self::Dint,
|
||||
"eint" => Self::Eint,
|
||||
"rla" => Self::Rla,
|
||||
"rlc" => Self::Rlc,
|
||||
"inv" => Self::Inv,
|
||||
"clr" => Self::Clr,
|
||||
"tst" => Self::Tst,
|
||||
"dec" => Self::Dec,
|
||||
"decd" => Self::Decd,
|
||||
"inc" => Self::Inc,
|
||||
"incd" => Self::Incd,
|
||||
"adc" => Self::Adc,
|
||||
"dadc" => Self::Dadc,
|
||||
"sbc" => Self::Sbc,
|
||||
_ => Err(ParseError::UnrecognizedOpcode(s))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Opcode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
match self {
|
||||
Self::Nop => "nop",
|
||||
Self::Pop => "pop",
|
||||
Self::Br => "br",
|
||||
Self::Ret => "ret",
|
||||
Self::Clrc => "clrc",
|
||||
Self::Setc => "setc",
|
||||
Self::Clrz => "clrz",
|
||||
Self::Setz => "setz",
|
||||
Self::Clrn => "clrn",
|
||||
Self::Setn => "setn",
|
||||
Self::Dint => "dint",
|
||||
Self::Eint => "eint",
|
||||
Self::Rla => "rla",
|
||||
Self::Rlc => "rlc",
|
||||
Self::Inv => "inv",
|
||||
Self::Clr => "clr",
|
||||
Self::Tst => "tst",
|
||||
Self::Dec => "dec",
|
||||
Self::Decd => "decd",
|
||||
Self::Inc => "inc",
|
||||
Self::Incd => "incd",
|
||||
Self::Adc => "adc",
|
||||
Self::Dadc => "dadc",
|
||||
Self::Sbc => "sbc",
|
||||
Self::Rrc => "rrc",
|
||||
Self::Swpb => "swpb",
|
||||
Self::Rra => "rra",
|
||||
Self::Sxt => "sxt",
|
||||
Self::Push => "push",
|
||||
Self::Call => "call",
|
||||
Self::Reti => "reti",
|
||||
Self::Jnz => "jnz",
|
||||
Self::Jz => "jz",
|
||||
Self::Jnc => "jnc",
|
||||
Self::Jc => "jc",
|
||||
Self::Jn => "jn",
|
||||
Self::Jge => "jge",
|
||||
Self::Jl => "jl",
|
||||
Self::Jmp => "jmp",
|
||||
Self::Mov => "mov",
|
||||
Self::Add => "add",
|
||||
Self::Addc => "addc",
|
||||
Self::Subc => "subc",
|
||||
Self::Sub => "sub",
|
||||
Self::Cmp => "cmp",
|
||||
Self::Dadd => "dadd",
|
||||
Self::Bit => "bit",
|
||||
Self::Bic => "bic",
|
||||
Self::Bis => "bis",
|
||||
Self::Xor => "xor",
|
||||
Self::And => "and",
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! The definition of a label
|
||||
use super::*;
|
||||
|
||||
/// The definition of a label
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Label(pub Identifier);
|
||||
|
||||
impl Parsable for Label {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
Ok(Self(Identifier::parse(p, stream).and_then(|t| {
|
||||
stream.require(Type::Label)?;
|
||||
Ok(t)
|
||||
})?))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Label {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}:", self.0) }
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! [`Line`] contains a single subcomponent of the document. Multiple instructions on the same
|
||||
//! document line will be treated as if they took up multiple [`Line`s](Line).
|
||||
//!
|
||||
//! A line contains one of:
|
||||
//! - [`Label`]
|
||||
//! - [`Instruction`]
|
||||
//! - [`Directive`]
|
||||
//! - [`Comment`]
|
||||
//! - [Nothing](Line::Empty)
|
||||
use super::*;
|
||||
|
||||
/// A line contains any one of:
|
||||
/// - [`Label`] (definition)
|
||||
/// - [`Instruction`]
|
||||
/// - [`Directive`]
|
||||
/// - [`Comment`]
|
||||
/// - Nothing at all
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Line {
|
||||
Empty,
|
||||
Insn(Instruction),
|
||||
Comment(Comment),
|
||||
Directive(Directive),
|
||||
Label(Label),
|
||||
EndOfFile, // Expected end of file
|
||||
}
|
||||
|
||||
impl Parsable for Line {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
Ok(
|
||||
match stream
|
||||
.peek_expect_any_of([
|
||||
Type::Endl,
|
||||
Type::Insn,
|
||||
Type::Comment,
|
||||
Type::Directive,
|
||||
Type::Identifier,
|
||||
Type::EndOfFile,
|
||||
])?
|
||||
.variant()
|
||||
{
|
||||
Type::Endl => {
|
||||
stream.next();
|
||||
Self::Empty
|
||||
}
|
||||
Type::Insn => Self::Insn(Instruction::parse(p, stream)?),
|
||||
Type::Comment => Self::Comment(Comment::parse(p, stream)?),
|
||||
Type::Directive => Self::Directive(Directive::parse(p, stream)?),
|
||||
Type::Identifier => Self::Label(Label::parse(p, stream)?),
|
||||
Type::EndOfFile => {
|
||||
stream.next();
|
||||
Self::EndOfFile
|
||||
}
|
||||
_ => unreachable!("stream.peek_expect_any_of should return Err for unmatched inputs"),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
impl Display for Line {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Empty => writeln!(f, "\n"),
|
||||
Self::Label(arg0) => Display::fmt(arg0, f),
|
||||
Self::Insn(arg0) => Display::fmt(arg0, f),
|
||||
Self::Directive(arg0) => Display::fmt(arg0, f),
|
||||
Self::Comment(arg0) => Display::fmt(arg0, f),
|
||||
Self::EndOfFile => write!(f, "; End of file."),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,85 +0,0 @@
|
||||
// © 2023 John Breaux
|
||||
//! A [`Parsable`] struct (an AST node) can parse tokens from a [stream](TokenStream) into it[`self`](https://doc.rust-lang.org/stable/std/keyword.SelfTy.html)
|
||||
use super::*;
|
||||
/// Parses tokens from [stream](TokenStream) into Self node
|
||||
pub trait Parsable {
|
||||
/// Parses tokens from [TokenStream](TokenStream) into Self nodes
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where
|
||||
Self: Sized,
|
||||
T: TokenStream<'text>;
|
||||
|
||||
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
|
||||
///
|
||||
/// Masks failed expectations.
|
||||
fn try_parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Option<Self>, ParseError>
|
||||
where
|
||||
Self: Sized,
|
||||
T: TokenStream<'text>,
|
||||
{
|
||||
match Self::parse(p, stream) {
|
||||
Ok(some) => Ok(Some(some)),
|
||||
Err(ParseError::LexError(_)) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_and<'text, T, R>(
|
||||
p: &Parser,
|
||||
stream: &mut T,
|
||||
f: fn(p: &Parser, &mut T) -> R,
|
||||
) -> Result<(Self, R), ParseError>
|
||||
where
|
||||
Self: Sized,
|
||||
T: TokenStream<'text>,
|
||||
{
|
||||
Ok((Self::parse(p, stream)?, f(p, stream)))
|
||||
}
|
||||
|
||||
/// Attempts to parse tokens from [stream](TokenStream) into Self nodes.
|
||||
///
|
||||
/// Returns [`Self::default()`](Default::default()) on error
|
||||
fn parse_or_default<'text, T>(p: &Parser, stream: &mut T) -> Self
|
||||
where
|
||||
Self: Sized + Default,
|
||||
T: TokenStream<'text>,
|
||||
{
|
||||
Self::parse(p, stream).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! parsable_str_types {
|
||||
($($t:ty),*$(,)?) => {$(
|
||||
impl Parsable for $t {
|
||||
fn parse<'text, T>(_p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
Ok(stream.expect(Type::String)?.lexeme().trim_matches('"').into())
|
||||
}
|
||||
}
|
||||
)*};
|
||||
}
|
||||
use std::{path::PathBuf, rc::Rc};
|
||||
parsable_str_types![String, Rc<str>, Box<str>, PathBuf];
|
||||
|
||||
/// Vectors of arbitrary parsables are cool
|
||||
impl<P: Parsable> Parsable for Vec<P> {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
// [dead beef]
|
||||
// [A, B,]
|
||||
// [c d e f]
|
||||
// [ something
|
||||
// else ]
|
||||
|
||||
stream.require(Type::LBracket)?;
|
||||
stream.allow(Type::Endl);
|
||||
let mut out = vec![];
|
||||
while let Some(t) = P::try_parse(p, stream)? {
|
||||
out.push(t);
|
||||
stream.allow(Type::Separator);
|
||||
stream.allow(Type::Endl);
|
||||
}
|
||||
stream.require(Type::RBracket)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
// © 2023 John Breaux
|
||||
use super::*;
|
||||
|
||||
/// Contains the entire AST
|
||||
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Root(Option<PathBuf>, Vec<(usize, Line)>);
|
||||
// pub struct Root { pub path: PathBuf, pub lines: Vec<Line> }
|
||||
|
||||
impl Root {
|
||||
pub fn file(&self) -> Option<&Path> { self.0.as_deref() }
|
||||
pub(crate) fn set_file(mut self, path: PathBuf) -> Self {
|
||||
self.0 = Some(path);
|
||||
self
|
||||
}
|
||||
pub fn lines(&self) -> &[(usize, Line)] { &self.1 }
|
||||
}
|
||||
|
||||
impl Parsable for Root {
|
||||
fn parse<'text, T>(p: &Parser, stream: &mut T) -> Result<Self, ParseError>
|
||||
where T: TokenStream<'text> {
|
||||
let mut lines = vec![];
|
||||
loop {
|
||||
let number = stream.context().line();
|
||||
match Line::parse(p, stream)? {
|
||||
Line::EndOfFile => break,
|
||||
line => lines.push((number, line)),
|
||||
}
|
||||
}
|
||||
Ok(Root(None, lines))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Root {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for (num, line) in &self.1 {
|
||||
f.pad(&format!("{num:3}: {line} "))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Root {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for line in self.0.iter() {
|
||||
Debug::fmt(line, f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
87
src/preprocessor.rs
Normal file
87
src/preprocessor.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
// © 2023-2024 John Breaux
|
||||
|
||||
use crate::{
|
||||
lexer::{
|
||||
token::{Token, TokenKind as Kind},
|
||||
Lexer,
|
||||
},
|
||||
util::Span,
|
||||
};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Preprocessor<'t> {
|
||||
lexer: Lexer<'t>,
|
||||
buf: VecDeque<Token<'t>>,
|
||||
defn: HashMap<&'t str, Vec<Token<'t>>>,
|
||||
/// Location for injected tokens
|
||||
pos: Span<usize>,
|
||||
}
|
||||
|
||||
impl<'t> Preprocessor<'t> {
|
||||
pub fn new(text: &'t str) -> Self {
|
||||
Self {
|
||||
lexer: Lexer::new(text),
|
||||
buf: Default::default(),
|
||||
defn: Default::default(),
|
||||
pos: Default::default(),
|
||||
}
|
||||
}
|
||||
pub fn with_lexer(lexer: Lexer<'t>) -> Self {
|
||||
Self { lexer, buf: Default::default(), defn: Default::default(), pos: Default::default() }
|
||||
}
|
||||
pub fn scan(&mut self) -> Option<Token<'t>> {
|
||||
self.buf.pop_front().or_else(|| self.next()).inspect(|t| self.pos = t.pos)
|
||||
}
|
||||
pub fn start(&self) -> usize {
|
||||
self.lexer.location()
|
||||
}
|
||||
/// Grabs a token from the lexer, and attempts to match its lexeme
|
||||
fn next(&mut self) -> Option<Token<'t>> {
|
||||
let token = self.lexer.scan()?;
|
||||
if let Some(tokens) = self.defn.get(token.lexeme) {
|
||||
self.buf.extend(tokens.iter().copied().map(|mut t| {
|
||||
t.pos = self.pos;
|
||||
t
|
||||
}));
|
||||
return self.scan();
|
||||
} else {
|
||||
match token.kind {
|
||||
Kind::Directive => self.directive(token),
|
||||
Kind::Newline => return self.scan(),
|
||||
_ => {}
|
||||
}
|
||||
Some(token)
|
||||
}
|
||||
}
|
||||
/// Passes a token through while parsing a directive
|
||||
fn tee(&mut self) -> Option<Token<'t>> {
|
||||
let token = self.lexer.scan()?;
|
||||
self.buf.push_back(token);
|
||||
// self.buf.push_back(token);
|
||||
Some(token)
|
||||
}
|
||||
/// Parses and executes a directive
|
||||
pub fn directive(&mut self, token: Token<'t>) {
|
||||
if ".define" == token.lexeme {
|
||||
self.define()
|
||||
}
|
||||
}
|
||||
pub fn define(&mut self) {
|
||||
let Some(key) = self.tee() else {
|
||||
return;
|
||||
};
|
||||
let mut value = vec![];
|
||||
while let Some(token) = self.tee() {
|
||||
match token.kind {
|
||||
Kind::Comment => {
|
||||
self.buf.push_back(token);
|
||||
break;
|
||||
}
|
||||
Kind::Newline => break,
|
||||
_ => value.push(token),
|
||||
}
|
||||
}
|
||||
self.defn.insert(key.lexeme, value);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user