// © 2023-2024 John Breaux //See LICENSE.md for license /// Represents MSP430 instructions, use crate::{ lexer::token::{self, Reg, Token}, span::Span, }; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Statements<'t> { pub stmts: Vec>, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Statement<'t> { Label(&'t str), Insn(Instruction<'t>), Directive(Directive<'t>), Comment(&'t str), } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Directive<'t> { /// TODO: Store define as a vec of tokens. This will require help from the /// [preprocessor](crate::preprocessor) Define(Vec>), Org(Box>), Word(Box>), Words(Vec>), String(&'t str), } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Instruction<'t> { pub span: Span, pub kind: InstructionKind<'t>, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum InstructionKind<'t> { NoEm(NoEm), OneEm(OneEm<'t>), OneArg(OneArg<'t>), TwoArg(TwoArg<'t>), Jump(Jump<'t>), Reti(Reti), Br(Br<'t>), } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct NoEm { pub opcode: token::NoEm, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct OneEm<'t> { pub opcode: token::OneEm, pub width: Width, pub dst: Dst<'t>, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct OneArg<'t> { pub opcode: token::OneArg, pub width: Width, pub src: Src<'t>, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct TwoArg<'t> { pub opcode: token::TwoArg, pub width: Width, pub src: Src<'t>, pub dst: Dst<'t>, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Jump<'t> { pub opcode: token::Jump, pub dst: JumpDst<'t>, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Reti; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Br<'t> { pub src: Src<'t>, } #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] pub enum Width { #[default] Word, Byte, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Src<'t> { Direct(Reg), Indexed(Box>, Reg), Indirect(Reg), PostInc(Reg), Absolute(Box>), Immediate(Box>), Special(SrcSpecial), BareExpr(Box>), } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum SrcSpecial { Zero, One, Four, Two, Eight, NegOne, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Dst<'t> { Direct(Reg), Indexed(Box>, Reg), Absolute(Box>), Special(DstSpecial), } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum DstSpecial { Zero, One, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum JumpDst<'t> { /// A relative offset, nominally an even number from -0x400..=0x3fe Rel(i16), Label(&'t str), } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Expr<'t> { Binary(Box>, Vec<(BinOp, Expr<'t>)>), Unary(Vec, Box>), Group(Box>), Number(u16), Ident(&'t str), AddrOf(&'t str), } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum BinOp { Mul, Div, Rem, Add, Sub, Lsh, Rsh, And, Xor, Or, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum UnOp { Deref, Not, Neg, } pub mod conv { //! Conversions between [ast](super) types, via [From], or via `new` constructor use super::{InstructionKind as Ik, *}; macro_rules! impl_from {($dst:ty {$($src:ty => $expr:expr),*$(,)?}) => {$( impl<'t> From<$src> for $dst { fn from(value: $src) -> Self { $expr(value) } } )*}} // sure am glad macros aren't hygenic over lifetimes impl_from! { Ik<'t> { NoEm => Ik::NoEm, OneEm<'t> => Ik::OneEm, OneArg<'t> => Ik::OneArg, TwoArg<'t> => Ik::TwoArg, Jump<'t> => Ik::Jump, Reti => Ik::Reti, Br<'t> => Ik::Br, }} impl_from! { Expr<'t> { u16 => Expr::Number }} impl<'t> From> for Src<'t> { fn from(value: Dst<'t>) -> Self { match value { Dst::Special(v) => Src::Special(v.into()), Dst::Absolute(v) => Src::Absolute(v), Dst::Indexed(i, r) => Src::Indexed(i, r), Dst::Direct(r) => Src::Direct(r), } } } impl From for SrcSpecial { fn from(value: DstSpecial) -> Self { match value { DstSpecial::Zero => SrcSpecial::Zero, DstSpecial::One => SrcSpecial::One, } } } impl<'t> TwoArg<'t> { pub fn new(opcode: token::TwoArg, width: Width, src: Src<'t>, dst: Dst<'t>) -> Self { Self { opcode, width, src, dst } } } } pub mod display { use super::*; use std::fmt::Display; impl<'t> Display for Statements<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { for stmt in &self.stmts { writeln!(f, "{stmt}")?; } Ok(()) } } impl<'t> Display for Statement<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Statement::Label(v) => write!(f, "{v}:"), Statement::Insn(v) => write!(f, "{v}"), Statement::Directive(v) => write!(f, "{v}"), Statement::Comment(v) => write!(f, "{v}"), } } } impl<'t> Display for Directive<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Directive::Define(_) => write!(f, ".directive"), Directive::Org(e) => write!(f, ".org {e}"), Directive::Word(w) => write!(f, ".word {w}"), Directive::Words(words) => { write!(f, ".words [ ")?; for word in words { write!(f, "{word} ")?; } write!(f, "]") } Directive::String(s) => write!(f, ".string \"{s}\""), } } } impl<'t> Display for Instruction<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { span: _, kind } = self; write!(f, "{kind}") } } impl<'t> Display for InstructionKind<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { InstructionKind::NoEm(v) => v.fmt(f), InstructionKind::OneEm(v) => v.fmt(f), InstructionKind::OneArg(v) => v.fmt(f), InstructionKind::TwoArg(v) => v.fmt(f), InstructionKind::Jump(v) => v.fmt(f), InstructionKind::Reti(v) => v.fmt(f), InstructionKind::Br(v) => v.fmt(f), } } } impl Display for NoEm { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { opcode } = self; write!(f, "{opcode}") } } impl<'t> Display for OneEm<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { opcode, width, dst } = self; write!(f, "{opcode}{width}\t{dst}") } } impl<'t> Display for OneArg<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { opcode, width, src } = self; write!(f, "{opcode}{width}\t{src}") } } impl<'t> Display for TwoArg<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { opcode, width, src, dst } = self; write!(f, "{opcode}{width}\t{src}, {dst}") } } impl<'t> Display for Jump<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { opcode, dst } = self; write!(f, "{opcode}\t{dst}") } } impl Display for Reti { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "reti") } } impl<'t> Display for Br<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { src } = self; write!(f, "br\t{src}") } } impl<'t> Display for Src<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Src::Direct(r) => write!(f, "{r}"), Src::Indexed(e, r) => write!(f, "{e}({r})"), Src::Indirect(r) => write!(f, "@{r}"), Src::PostInc(r) => write!(f, "@{r}+"), Src::Absolute(e) => write!(f, "&{e}"), Src::Immediate(e) => write!(f, "#{e}"), Src::Special(i) => write!(f, "#{i}"), Src::BareExpr(id) => write!(f, "{id}"), } } } impl Display for SrcSpecial { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { SrcSpecial::Zero => write!(f, "0"), SrcSpecial::One => write!(f, "1"), SrcSpecial::Four => write!(f, "4"), SrcSpecial::Two => write!(f, "2"), SrcSpecial::Eight => write!(f, "8"), SrcSpecial::NegOne => write!(f, "-1"), } } } impl<'t> Display for Dst<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Dst::Direct(r) => write!(f, "{r}"), Dst::Indexed(e, r) => write!(f, "{e}({r})"), Dst::Absolute(e) => write!(f, "&{e}"), Dst::Special(i) => write!(f, "#{i}"), } } } impl Display for DstSpecial { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { DstSpecial::Zero => write!(f, "0"), DstSpecial::One => write!(f, "1"), } } } impl<'t> Display for JumpDst<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { JumpDst::Rel(i) => write!(f, "{i}"), JumpDst::Label(l) => write!(f, "{l}"), } } } impl<'t> Display for Expr<'t> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Expr::Binary(head, tail) => { write!(f, "{head}")?; for (op, tail) in tail { write!(f, "{op}{tail}")?; } Ok(()) } Expr::Unary(ops, tail) => { for op in ops { write!(f, "{op}")? } write!(f, "{tail}") } Expr::Group(e) => write!(f, "({e})"), Expr::Number(n) => write!(f, "{n:x}"), Expr::Ident(n) => write!(f, "{n}"), Expr::AddrOf(n) => write!(f, "&{n}"), } } } impl Display for BinOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { BinOp::Mul => write!(f, "*"), BinOp::Div => write!(f, "/"), BinOp::Rem => write!(f, "%"), BinOp::Add => write!(f, "+"), BinOp::Sub => write!(f, "-"), BinOp::Lsh => write!(f, "<<"), BinOp::Rsh => write!(f, ">>"), BinOp::And => write!(f, "&"), BinOp::Xor => write!(f, "^"), BinOp::Or => write!(f, "|"), } } } impl Display for UnOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { UnOp::Deref => write!(f, "*"), UnOp::Not => write!(f, "!"), UnOp::Neg => write!(f, "-"), } } } impl Display for Width { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Width::Word => Ok(()), Width::Byte => write!(f, ".b"), } } } } pub mod canonical { use std::iter; use super::*; use token::TwoArg::*; pub trait Canonicalize { /// The output after canonicalization type Output; /// Transmutes Self into its "canonical" form. "Emulated" instructions are converted /// into their respective non-emulated forms. fn to_canonical(self) -> Self::Output; } impl<'t> Canonicalize for Statements<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { Self { stmts: self.stmts.into_iter().map(|s| s.to_canonical()).collect() } } } impl<'t> Canonicalize for Statement<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { match self { Statement::Insn(i) => Self::Insn(i.to_canonical()), _ => self, } } } impl<'t> Canonicalize for Instruction<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { Self { kind: self.kind.to_canonical(), ..self } } } impl<'t> Canonicalize for InstructionKind<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { match self { Self::NoEm(v) => Self::TwoArg(v.to_canonical()), Self::OneEm(v) => Self::TwoArg(v.to_canonical()), Self::Reti(v) => Self::Reti(v.to_canonical()), Self::Br(v) => Self::TwoArg(v.to_canonical()), Self::OneArg(v) => Self::OneArg(v.to_canonical()), Self::TwoArg(v) => Self::TwoArg(v.to_canonical()), Self::Jump(v) => Self::Jump(v.to_canonical()), } } } impl Canonicalize for NoEm { type Output = TwoArg<'static>; fn to_canonical(self) -> Self::Output { let Self { opcode } = self; use SrcSpecial::*; use Width::*; match opcode { token::NoEm::Nop => { TwoArg::new(Mov, Word, Src::Direct(Reg::CG), Dst::Direct(Reg::CG)) } token::NoEm::Ret => { TwoArg::new(Mov, Word, Src::PostInc(Reg::SP), Dst::Direct(Reg::PC)) } token::NoEm::Clrc => { TwoArg::new(Bic, Word, Src::Special(One), Dst::Direct(Reg::SR)) } token::NoEm::Clrz => { TwoArg::new(Bic, Word, Src::Special(Two), Dst::Direct(Reg::SR)) } token::NoEm::Clrn => { TwoArg::new(Bic, Word, Src::Special(Four), Dst::Direct(Reg::SR)) } token::NoEm::Setc => { TwoArg::new(Bis, Word, Src::Special(One), Dst::Direct(Reg::SR)) } token::NoEm::Setz => { TwoArg::new(Bis, Word, Src::Special(Two), Dst::Direct(Reg::SR)) } token::NoEm::Setn => { TwoArg::new(Bis, Word, Src::Special(Four), Dst::Direct(Reg::SR)) } token::NoEm::Dint => { TwoArg::new(Bic, Word, Src::Special(Eight), Dst::Direct(Reg::SR)) } token::NoEm::Eint => { TwoArg::new(Bis, Word, Src::Special(Eight), Dst::Direct(Reg::SR)) } } } } impl<'t> Canonicalize for OneEm<'t> { type Output = TwoArg<'t>; fn to_canonical(self) -> Self::Output { use SrcSpecial::*; let Self { opcode, width, dst } = self; match opcode { token::OneEm::Pop => TwoArg::new(Mov, width, Src::PostInc(Reg::SP), dst), token::OneEm::Rla => TwoArg::new(Add, width, dst.clone().into(), dst), token::OneEm::Rlc => TwoArg::new(Addc, width, dst.clone().into(), dst), token::OneEm::Inv => TwoArg::new(Xor, width, Src::Special(NegOne), dst), token::OneEm::Clr => TwoArg::new(Mov, width, Src::Special(Zero), dst), token::OneEm::Tst => TwoArg::new(Cmp, width, Src::Special(Zero), dst), token::OneEm::Dec => TwoArg::new(Sub, width, Src::Special(One), dst), token::OneEm::Decd => TwoArg::new(Sub, width, Src::Special(Two), dst), token::OneEm::Inc => TwoArg::new(Add, width, Src::Special(One), dst), token::OneEm::Incd => TwoArg::new(Add, width, Src::Special(Two), dst), token::OneEm::Adc => TwoArg::new(Addc, width, Src::Special(Zero), dst), token::OneEm::Dadc => TwoArg::new(Dadd, width, Src::Special(Zero), dst), token::OneEm::Sbc => TwoArg::new(Subc, width, Src::Special(Zero), dst), } } } impl<'t> Canonicalize for OneArg<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { let Self { opcode, width, src } = self; Self { opcode, width: match opcode { token::OneArg::Call => Width::Word, _ => width, }, src: src.to_canonical(), } } } impl<'t> Canonicalize for TwoArg<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { let Self { opcode, width, src, dst } = self; Self { opcode, width, src: src.to_canonical(), dst: dst.to_canonical() } } } impl<'t> Canonicalize for Jump<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { let Self { opcode, dst } = self; Self { opcode: match opcode { token::Jump::Jnz => token::Jump::Jne, token::Jump::Jz => token::Jump::Jeq, token::Jump::Jnc => token::Jump::Jlo, token::Jump::Jc => token::Jump::Jhs, t => t, }, dst: dst.to_canonical(), } } } impl Canonicalize for Reti { type Output = Self; fn to_canonical(self) -> Self::Output { self } } impl<'t> Canonicalize for Br<'t> { type Output = TwoArg<'t>; fn to_canonical(self) -> Self::Output { let Self { src } = self; TwoArg::new(Mov, Width::Word, src, Dst::Direct(Reg::PC)) } } impl<'t> Canonicalize for Src<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { use SrcSpecial::*; match self { Src::Direct(_) | Src::Indirect(_) | Src::PostInc(_) | Src::Special(_) => self, Src::Indexed(e, r) => Src::Indexed(e.to_canonical().into(), r), Src::Absolute(e) => Src::Absolute(e.to_canonical().into()), Src::Immediate(e) => match e.to_canonical() { Expr::Number(0) => Src::Special(Zero), Expr::Number(1) => Src::Special(One), Expr::Number(2) => Src::Special(Two), Expr::Number(4) => Src::Special(Four), Expr::Number(8) => Src::Special(Eight), Expr::Number(0xffff) => Src::Special(NegOne), expr => Src::Immediate(expr.into()), }, Src::BareExpr(_) => self, } } } impl<'t> Canonicalize for Dst<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { match self { Dst::Direct(_) | Dst::Special(_) => self, Dst::Indexed(e, r) => Dst::Indexed(e.to_canonical().into(), r), Dst::Absolute(e) => Dst::Absolute(e.to_canonical().into()), } } } impl<'t> Canonicalize for JumpDst<'t> { type Output = Self; fn to_canonical(self) -> Self::Output { self } } impl<'t> Canonicalize for Expr<'t> { type Output = Self; /// Canonicalizes an [Expr]. If all leaves are of type [Expr::Number], /// this returns a single [Expr::Number]. If not, it evaluates until /// it runs into an unevaluatable leaf. fn to_canonical(self) -> Self::Output { match self { Expr::Number(_) | Expr::Ident(_) | Expr::AddrOf(_) => self, Expr::Group(e) => e.to_canonical(), Expr::Unary(ops, tail) => { let mut tail = match tail.to_canonical() { Expr::Number(n) => n, other => return other, }; // If the tail is dereferenced, canonicalization must halt, // since we have no knowledge of memory layout let mut ops = ops.into_iter(); for op in ops.by_ref() { tail = match op { UnOp::Deref => { return Expr::Unary( iter::once(op).chain(ops).collect(), Box::new(tail.into()), ) } UnOp::Not => !tail, UnOp::Neg => 0u16.wrapping_sub(tail), } } Expr::Number(tail) } Expr::Binary(head, tails) => { let mut tails = tails.into_iter().map(|(op, tail)| (op, tail.to_canonical())); let mut head = match head.to_canonical() { Expr::Number(n) => n, head => return Expr::Binary(head.into(), tails.collect()), }; for (op, tail) in &mut tails { // If the canonical tail isn't a number, rebuild and return let Expr::Number(tail) = tail else { return Expr::Binary( Box::new(head.into()), iter::once((op, tail)).chain(tails).collect(), ); }; head = match op { BinOp::Mul => head.wrapping_mul(tail), BinOp::Div => head.wrapping_div(tail), BinOp::Rem => head.wrapping_rem(tail), BinOp::Add => head.wrapping_add(tail), BinOp::Sub => head.wrapping_sub(tail), BinOp::Lsh => head.wrapping_shl(tail as u32), BinOp::Rsh => head.wrapping_shr(tail as u32), BinOp::And => head & tail, BinOp::Xor => head ^ tail, BinOp::Or => head | tail, }; } Expr::Number(head) } } } } }