680 lines
23 KiB
Rust

// © 2023-2024 John Breaux
//See LICENSE.md for license
/// Represents MSP430 instructions,
use crate::{
lexer::token::{self, Reg, Token},
span::Span,
};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Statements<'t> {
pub stmts: Vec<Statement<'t>>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Statement<'t> {
Label(&'t str),
Insn(Instruction<'t>),
Directive(Directive<'t>),
Comment(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Directive<'t> {
/// TODO: Store define as a vec of tokens. This will require help from the
/// [preprocessor](crate::preprocessor)
Define(Vec<Token<'t>>),
Org(Box<Expr<'t>>),
Word(Box<Expr<'t>>),
Words(Vec<Expr<'t>>),
String(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Instruction<'t> {
pub span: Span<usize>,
pub kind: InstructionKind<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum InstructionKind<'t> {
NoEm(NoEm),
OneEm(OneEm<'t>),
OneArg(OneArg<'t>),
TwoArg(TwoArg<'t>),
Jump(Jump<'t>),
Reti(Reti),
Br(Br<'t>),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct NoEm {
pub opcode: token::NoEm,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct OneEm<'t> {
pub opcode: token::OneEm,
pub width: Width,
pub dst: Dst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct OneArg<'t> {
pub opcode: token::OneArg,
pub width: Width,
pub src: Src<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct TwoArg<'t> {
pub opcode: token::TwoArg,
pub width: Width,
pub src: Src<'t>,
pub dst: Dst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Jump<'t> {
pub opcode: token::Jump,
pub dst: JumpDst<'t>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Reti;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Br<'t> {
pub src: Src<'t>,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub enum Width {
#[default]
Word,
Byte,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Src<'t> {
Direct(Reg),
Indexed(Box<Expr<'t>>, Reg),
Indirect(Reg),
PostInc(Reg),
Absolute(Box<Expr<'t>>),
Immediate(Box<Expr<'t>>),
Special(SrcSpecial),
BareExpr(Box<Expr<'t>>),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum SrcSpecial {
Zero,
One,
Four,
Two,
Eight,
NegOne,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Dst<'t> {
Direct(Reg),
Indexed(Box<Expr<'t>>, Reg),
Absolute(Box<Expr<'t>>),
Special(DstSpecial),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum DstSpecial {
Zero,
One,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum JumpDst<'t> {
/// A relative offset, nominally an even number from -0x400..=0x3fe
Rel(i16),
Label(&'t str),
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Expr<'t> {
Binary(Box<Expr<'t>>, Vec<(BinOp, Expr<'t>)>),
Unary(Vec<UnOp>, Box<Expr<'t>>),
Group(Box<Expr<'t>>),
Number(u16),
Ident(&'t str),
AddrOf(&'t str),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum BinOp {
Mul,
Div,
Rem,
Add,
Sub,
Lsh,
Rsh,
And,
Xor,
Or,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum UnOp {
Deref,
Not,
Neg,
}
pub mod conv {
//! Conversions between [ast](super) types, via [From], or via `new` constructor
use super::{InstructionKind as Ik, *};
macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$(
impl<'t> From<$src> for $dst {
fn from(value: $src) -> Self {
$expr(value)
}
}
)*}}
// sure am glad macros aren't hygenic over lifetimes
impl_from! { Ik<'t> {
NoEm => Ik::NoEm,
OneEm<'t> => Ik::OneEm,
OneArg<'t> => Ik::OneArg,
TwoArg<'t> => Ik::TwoArg,
Jump<'t> => Ik::Jump,
Reti => Ik::Reti,
Br<'t> => Ik::Br,
}}
impl_from! { Expr<'t> {
u16 => Expr::Number
}}
impl<'t> From<Dst<'t>> for Src<'t> {
fn from(value: Dst<'t>) -> Self {
match value {
Dst::Special(v) => Src::Special(v.into()),
Dst::Absolute(v) => Src::Absolute(v),
Dst::Indexed(i, r) => Src::Indexed(i, r),
Dst::Direct(r) => Src::Direct(r),
}
}
}
impl From<DstSpecial> for SrcSpecial {
fn from(value: DstSpecial) -> Self {
match value {
DstSpecial::Zero => SrcSpecial::Zero,
DstSpecial::One => SrcSpecial::One,
}
}
}
impl<'t> TwoArg<'t> {
pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self {
Self { opcode, width, src, dst }
}
}
}
pub mod display {
use super::*;
use std::fmt::Display;
impl<'t> Display for Statements<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for stmt in &self.stmts {
writeln!(f, "{stmt}")?;
}
Ok(())
}
}
impl<'t> Display for Statement<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Statement::Label(v) => write!(f, "{v}:"),
Statement::Insn(v) => write!(f, "{v}"),
Statement::Directive(v) => write!(f, "{v}"),
Statement::Comment(v) => write!(f, "{v}"),
}
}
}
impl<'t> Display for Directive<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Directive::Define(_) => write!(f, ".directive"),
Directive::Org(e) => write!(f, ".org {e}"),
Directive::Word(w) => write!(f, ".word {w}"),
Directive::Words(words) => {
write!(f, ".words [ ")?;
for word in words {
write!(f, "{word} ")?;
}
write!(f, "]")
}
Directive::String(s) => write!(f, ".string \"{s}\""),
}
}
}
impl<'t> Display for Instruction<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { span: _, kind } = self;
write!(f, "{kind}")
}
}
impl<'t> Display for InstructionKind<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InstructionKind::NoEm(v) => v.fmt(f),
InstructionKind::OneEm(v) => v.fmt(f),
InstructionKind::OneArg(v) => v.fmt(f),
InstructionKind::TwoArg(v) => v.fmt(f),
InstructionKind::Jump(v) => v.fmt(f),
InstructionKind::Reti(v) => v.fmt(f),
InstructionKind::Br(v) => v.fmt(f),
}
}
}
impl Display for NoEm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode } = self;
write!(f, "{opcode}")
}
}
impl<'t> Display for OneEm<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, dst } = self;
write!(f, "{opcode}{width}\t{dst}")
}
}
impl<'t> Display for OneArg<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, src } = self;
write!(f, "{opcode}{width}\t{src}")
}
}
impl<'t> Display for TwoArg<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, width, src, dst } = self;
write!(f, "{opcode}{width}\t{src}, {dst}")
}
}
impl<'t> Display for Jump<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { opcode, dst } = self;
write!(f, "{opcode}\t{dst}")
}
}
impl Display for Reti {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "reti")
}
}
impl<'t> Display for Br<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { src } = self;
write!(f, "br\t{src}")
}
}
impl<'t> Display for Src<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Src::Direct(r) => write!(f, "{r}"),
Src::Indexed(e, r) => write!(f, "{e}({r})"),
Src::Indirect(r) => write!(f, "@{r}"),
Src::PostInc(r) => write!(f, "@{r}+"),
Src::Absolute(e) => write!(f, "&{e}"),
Src::Immediate(e) => write!(f, "#{e}"),
Src::Special(i) => write!(f, "#{i}"),
Src::BareExpr(id) => write!(f, "{id}"),
}
}
}
impl Display for SrcSpecial {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SrcSpecial::Zero => write!(f, "0"),
SrcSpecial::One => write!(f, "1"),
SrcSpecial::Four => write!(f, "4"),
SrcSpecial::Two => write!(f, "2"),
SrcSpecial::Eight => write!(f, "8"),
SrcSpecial::NegOne => write!(f, "-1"),
}
}
}
impl<'t> Display for Dst<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Dst::Direct(r) => write!(f, "{r}"),
Dst::Indexed(e, r) => write!(f, "{e}({r})"),
Dst::Absolute(e) => write!(f, "&{e}"),
Dst::Special(i) => write!(f, "#{i}"),
}
}
}
impl Display for DstSpecial {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DstSpecial::Zero => write!(f, "0"),
DstSpecial::One => write!(f, "1"),
}
}
}
impl<'t> Display for JumpDst<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
JumpDst::Rel(i) => write!(f, "{i}"),
JumpDst::Label(l) => write!(f, "{l}"),
}
}
}
impl<'t> Display for Expr<'t> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expr::Binary(head, tail) => {
write!(f, "{head}")?;
for (op, tail) in tail {
write!(f, "{op}{tail}")?;
}
Ok(())
}
Expr::Unary(ops, tail) => {
for op in ops {
write!(f, "{op}")?
}
write!(f, "{tail}")
}
Expr::Group(e) => write!(f, "({e})"),
Expr::Number(n) => write!(f, "{n:x}"),
Expr::Ident(n) => write!(f, "{n}"),
Expr::AddrOf(n) => write!(f, "&{n}"),
}
}
}
impl Display for BinOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BinOp::Mul => write!(f, "*"),
BinOp::Div => write!(f, "/"),
BinOp::Rem => write!(f, "%"),
BinOp::Add => write!(f, "+"),
BinOp::Sub => write!(f, "-"),
BinOp::Lsh => write!(f, "<<"),
BinOp::Rsh => write!(f, ">>"),
BinOp::And => write!(f, "&"),
BinOp::Xor => write!(f, "^"),
BinOp::Or => write!(f, "|"),
}
}
}
impl Display for UnOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
UnOp::Deref => write!(f, "*"),
UnOp::Not => write!(f, "!"),
UnOp::Neg => write!(f, "-"),
}
}
}
impl Display for Width {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Width::Word => Ok(()),
Width::Byte => write!(f, ".b"),
}
}
}
}
pub mod canonical {
use std::iter;
use super::*;
use token::TwoArg::*;
pub trait Canonicalize {
/// The output after canonicalization
type Output;
/// Transmutes Self into its "canonical" form. "Emulated" instructions are converted
/// into their respective non-emulated forms.
fn to_canonical(self) -> Self::Output;
}
impl<'t> Canonicalize for Statements<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() }
}
}
impl<'t> Canonicalize for Statement<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Statement::Insn(i) => Self::Insn(i.to_canonical()),
_ => self,
}
}
}
impl<'t> Canonicalize for Instruction<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
Self { kind: self.kind.to_canonical(), ..self }
}
}
impl<'t> Canonicalize for InstructionKind<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Self::NoEm(v) => Self::TwoArg(v.to_canonical()),
Self::OneEm(v) => Self::TwoArg(v.to_canonical()),
Self::Reti(v) => Self::Reti(v.to_canonical()),
Self::Br(v) => Self::TwoArg(v.to_canonical()),
Self::OneArg(v) => Self::OneArg(v.to_canonical()),
Self::TwoArg(v) => Self::TwoArg(v.to_canonical()),
Self::Jump(v) => Self::Jump(v.to_canonical()),
}
}
}
impl Canonicalize for NoEm {
type Output = TwoArg<'static>;
fn to_canonical(self) -> Self::Output {
let Self { opcode } = self;
use SrcSpecial::*;
use Width::*;
match opcode {
token::NoEm::Nop => {
TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG))
}
token::NoEm::Ret => {
TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC))
}
token::NoEm::Clrc => {
TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR))
}
token::NoEm::Clrz => {
TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR))
}
token::NoEm::Clrn => {
TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR))
}
token::NoEm::Setc => {
TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR))
}
token::NoEm::Setz => {
TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR))
}
token::NoEm::Setn => {
TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR))
}
token::NoEm::Dint => {
TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
}
token::NoEm::Eint => {
TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR))
}
}
}
}
impl<'t> Canonicalize for OneEm<'t> {
type Output = TwoArg<'t>;
fn to_canonical(self) -> Self::Output {
use SrcSpecial::*;
let Self { opcode, width, dst } = self;
match opcode {
token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst),
token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst),
token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst),
token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst),
token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst),
token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst),
token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst),
token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst),
token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst),
token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst),
token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst),
token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst),
token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst),
}
}
}
impl<'t> Canonicalize for OneArg<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, width, src } = self;
Self {
opcode,
width: match opcode {
token::OneArg::Call => Width::Word,
_ => width,
},
src: src.to_canonical(),
}
}
}
impl<'t> Canonicalize for TwoArg<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, width, src, dst } = self;
Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() }
}
}
impl<'t> Canonicalize for Jump<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
let Self { opcode, dst } = self;
Self {
opcode: match opcode {
token::Jump::Jnz => token::Jump::Jne,
token::Jump::Jz => token::Jump::Jeq,
token::Jump::Jnc => token::Jump::Jlo,
token::Jump::Jc => token::Jump::Jhs,
t => t,
},
dst: dst.to_canonical(),
}
}
}
impl Canonicalize for Reti {
type Output = Self;
fn to_canonical(self) -> Self::Output {
self
}
}
impl<'t> Canonicalize for Br<'t> {
type Output = TwoArg<'t>;
fn to_canonical(self) -> Self::Output {
let Self { src } = self;
TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC))
}
}
impl<'t> Canonicalize for Src<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
use SrcSpecial::*;
match self {
Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self,
Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r),
Src::Absolute(e) => Src::Absolute(e.to_canonical().into()),
Src::Immediate(e) => match e.to_canonical() {
Expr::Number(0) => Src::Special(Zero),
Expr::Number(1) => Src::Special(One),
Expr::Number(2) => Src::Special(Two),
Expr::Number(4) => Src::Special(Four),
Expr::Number(8) => Src::Special(Eight),
Expr::Number(0xffff) => Src::Special(NegOne),
expr => Src::Immediate(expr.into()),
},
Src::BareExpr(_) => self,
}
}
}
impl<'t> Canonicalize for Dst<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
match self {
Dst::Direct(_) | Dst::Special(_) => self,
Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r),
Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()),
}
}
}
impl<'t> Canonicalize for JumpDst<'t> {
type Output = Self;
fn to_canonical(self) -> Self::Output {
self
}
}
impl<'t> Canonicalize for Expr<'t> {
type Output = Self;
/// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number],
/// this returns a single [Expr::Number]. If not, it evaluates until
/// it runs into an unevaluatable leaf.
fn to_canonical(self) -> Self::Output {
match self {
Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self,
Expr::Group(e) => e.to_canonical(),
Expr::Unary(ops, tail) => {
let mut tail = match tail.to_canonical() {
Expr::Number(n) => n,
other => return other,
};
// If the tail is dereferenced, canonicalization must halt,
// since we have no knowledge of memory layout
let mut ops = ops.into_iter();
for op in ops.by_ref() {
tail = match op {
UnOp::Deref => {
return Expr::Unary(
iter::once(op).chain(ops).collect(),
Box::new(tail.into()),
)
}
UnOp::Not => !tail,
UnOp::Neg => 0u16.wrapping_sub(tail),
}
}
Expr::Number(tail)
}
Expr::Binary(head, tails) => {
let mut tails = tails.into_iter().map(|(op, tail)| (op, tail.to_canonical()));
let mut head = match head.to_canonical() {
Expr::Number(n) => n,
head => return Expr::Binary(head.into(), tails.collect()),
};
for (op, tail) in &mut tails {
// If the canonical tail isn't a number, rebuild and return
let Expr::Number(tail) = tail else {
return Expr::Binary(
Box::new(head.into()),
iter::once((op, tail)).chain(tails).collect(),
);
};
head = match op {
BinOp::Mul => head.wrapping_mul(tail),
BinOp::Div => head.wrapping_div(tail),
BinOp::Rem => head.wrapping_rem(tail),
BinOp::Add => head.wrapping_add(tail),
BinOp::Sub => head.wrapping_sub(tail),
BinOp::Lsh => head.wrapping_shl(tail as u32),
BinOp::Rsh => head.wrapping_shr(tail as u32),
BinOp::And => head & tail,
BinOp::Xor => head ^ tail,
BinOp::Or => head | tail,
};
}
Expr::Number(head)
}
}
}
}
}