From 31c904f89cffe17857d10ba6a369a93860a84034 Mon Sep 17 00:00:00 2001 From: John Breaux Date: Thu, 31 Aug 2023 22:34:55 -0500 Subject: [PATCH] chirp-disasm: Implement a graph parser for better output --- src/bin/chirp-disasm/main.rs | 68 +++++++++---- src/bin/chirp-disasm/tree.rs | 180 ++++++++++++++++++++++++++++++----- src/cpu/instruction.rs | 9 ++ 3 files changed, 215 insertions(+), 42 deletions(-) diff --git a/src/bin/chirp-disasm/main.rs b/src/bin/chirp-disasm/main.rs index 617d5e2..b26f9b1 100644 --- a/src/bin/chirp-disasm/main.rs +++ b/src/bin/chirp-disasm/main.rs @@ -6,23 +6,45 @@ use std::{fs::read, path::PathBuf}; mod tree; fn main() -> Result<()> { - let options = Arguments::parse_args_default_or_exit(); - let contents = &read(&options.file)?; - let disassembler = Dis::default(); - for (addr, insn) in contents[options.offset..].chunks_exact(2).enumerate() { - let insn = u16::from_be_bytes( - insn.try_into() - .expect("Iterated over 2-byte chunks, got <2 bytes"), - ); - println!( - "{}", - format_args!( - "{:03x}: {} {:04x}", - 2 * addr + 0x200 + options.offset, - disassembler.once(insn), - insn.bright_black(), - ) - ); + let mut options = Arguments::parse_args_default_or_exit(); + while let Some(file) = options.file.pop() { + println!("{file:?}"); + let contents = &read(&file)?; + if options.tree || options.traverse { + let loadaddr = options.loadaddr as usize; + let mem = mem! { + cpu::mem::Region::Program [loadaddr..loadaddr + contents.len()] = contents + }; + let mut nodes = Default::default(); + let tree = tree::DisNode::traverse( + mem.grab(..).expect("grabbing [..] should never fail"), + &mut nodes, + options.loadaddr as usize + options.offset, + ); + if options.traverse { + for (k, v) in nodes.iter() { + if let Some(v) = &v.upgrade().as_ref().map(std::rc::Rc::as_ref) { + println!("{k:03x}: {v:04x}"); + } + } + } else { + println!("{tree}"); + } + } else { + let disassembler = Dis::default(); + for (addr, insn) in contents[options.offset..].chunks_exact(2).enumerate() { + let insn = u16::from_be_bytes( + insn.try_into() + .expect("Iterated over 2-byte chunks, got <2 bytes"), + ); + println!( + "{:03x}: {:04x} {}", + 2 * addr + 0x200 + options.offset, + insn.bright_black(), + disassembler.once(insn), + ); + } + } } Ok(()) } @@ -32,11 +54,19 @@ struct Arguments { #[options(help = "Show help text")] help: bool, #[options(help = "Load a ROM to run on Chirp", free, required)] - pub file: PathBuf, - #[options(help = "Load address (usually 200)", parse(try_from_str = "parse_hex"))] + pub file: Vec, + #[options( + help = "Load address (usually 200)", + parse(try_from_str = "parse_hex"), + default = "200" + )] pub loadaddr: u16, #[options(help = "Start disassembling at offset...")] pub offset: usize, + #[options(help = "Print the disassembly as a tree")] + pub tree: bool, + #[options(help = "Prune unreachable instructions ")] + pub traverse: bool, } fn parse_hex(value: &str) -> std::result::Result { diff --git a/src/bin/chirp-disasm/tree.rs b/src/bin/chirp-disasm/tree.rs index bdac089..a315a8d 100644 --- a/src/bin/chirp-disasm/tree.rs +++ b/src/bin/chirp-disasm/tree.rs @@ -1,49 +1,183 @@ #![allow(dead_code)] #![allow(unused_variables)] -use std::collections::HashSet; +use std::{ + collections::{BTreeMap, HashSet}, + fmt::{Display, LowerHex}, + rc::{Rc, Weak}, +}; use chirp::cpu::instruction::Insn; +use imperative_rs::InstructionSet; +use owo_colors::OwoColorize; type Adr = usize; /// Represents the kinds of control flow an instruction can take -pub enum DisNode { - Branch { - addr: Adr, +#[derive(Clone, Debug, Default)] +pub enum DisNodeContents { + Subroutine { insn: Insn, - a: Box, - b: Box, + jump: Rc, + ret: Rc, + }, + Branch { + insn: Insn, + next: Rc, + jump: Rc, }, Continue { - addr: Adr, insn: Insn, - next: Box, + next: Rc, + }, + End { + insn: Insn, + }, + RelBranch { + insn: Insn, }, Merge { - addr: Adr, + insn: Insn, + back: Option>, + }, + PendingMerge { insn: Insn, }, - End(Insn), + #[default] Invalid, } +/// Represents the kinds of control flow an instruction can take +#[derive(Clone, Debug)] +pub struct DisNode { + pub contents: DisNodeContents, + pub addr: Adr, + pub depth: usize, +} + impl DisNode { - pub fn travel( + pub fn traverse( + mem: &[u8], + nodes: &mut BTreeMap>, + addr: Adr, + ) -> Rc { + Self::tree_recurse(mem, &mut Default::default(), nodes, addr, 0) + } + pub fn tree_recurse( mem: &[u8], visited: &mut HashSet, - current: Adr, - ) -> Result { - use DisNode::*; + nodes: &mut BTreeMap>, + addr: Adr, + depth: usize, + ) -> Rc { + use DisNodeContents::*; + // Try to decode an instruction. If the instruction is invalid, fail early. + let Ok((len, insn)) = Insn::decode(&mem[addr..]) else { + return Rc::new(DisNode { + contents: Invalid, + addr, + depth, + }); + }; + let mut next = DisNode { + contents: { + match insn { + // instruction is already visited, but the branch isn't guaranteed to be in the tree yet + _ if !visited.insert(addr) => PendingMerge { insn }, - // decode an insn at the current Adr - // classify the insn - // If the instruction is invalid, emit an Invalid token - // If the instruction is already visited, emit a Merge token - // If the instruction is a ret instruction, emit a Merge token - // If the instruction is any other instruction, emit a Continue token - // If the instruction is a branch to current, emit an End token - // If the instruction is a branch instruction, recursively follow each branch + Insn::ret | Insn::halt => End { insn }, - Ok(End(Insn::cls)) + // A branch to the current address will halt the machine + Insn::jmp { A } | Insn::call { A } if A as usize == addr => End { insn }, + + Insn::jmp { A } => Continue { + insn, + next: DisNode::tree_recurse(mem, visited, nodes, A as usize, depth), + }, + + Insn::call { A } => Branch { + insn, + jump: DisNode::tree_recurse(mem, visited, nodes, A as usize, depth + 1), + next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth), + }, + + // If the instruction is a skip instruction, first visit the next instruction, + // then visit the skip instruction. This preserves visitor order. + Insn::seb { .. } + | Insn::sneb { .. } + | Insn::se { .. } + | Insn::sne { .. } + | Insn::sek { .. } => Branch { + insn, + // FIXME: If the next instruction is Long I, this will just break + next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth), + jump: DisNode::tree_recurse(mem, visited, nodes, addr + len + 2, depth + 1), + }, + + // Relative branch prediction is out of scope right now + Insn::jmpr { .. } => RelBranch { insn }, + + // If the instruction is any other instruction, emit a Continue token + _ => Continue { + insn, + next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth), + }, + } + }, + addr, + depth, + }; + // Resolve pending merges + if let PendingMerge { insn } = next.contents { + next.contents = Merge { + insn, + back: nodes.get(&addr).cloned(), + } + } + let next = Rc::new(next); + nodes.insert(addr, Rc::downgrade(&next)); + next + } +} + +impl Display for DisNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use DisNodeContents::*; + write!(f, "\n{:04x}: ", self.addr,)?; + for indent in 0..self.depth { + Display::fmt(&"│ ".bright_magenta(), f)?; + } + match &self.contents { + Subroutine { insn, ret, jump } => write!(f, "{insn}{jump}{ret}"), + Branch { insn, next, jump } => write!(f, "{insn}{jump}{next}"), + Continue { insn, next } => write!(f, "{insn}{next}"), + RelBranch { insn } => Display::fmt(&insn.underline(), f), + PendingMerge { insn } | Merge { insn, .. } => write!( + f, + "{}", + format_args!("{}; ...", insn).italic().bright_black() + ), + End { insn } => Display::fmt(insn, f), + Invalid => Display::fmt(&"Invalid".bold().red(), f), + } + } +} + +impl LowerHex for DisNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use DisNodeContents::*; + match self.contents { + Subroutine { insn, .. } + | Branch { insn, .. } + | Continue { insn, .. } + | Merge { insn, .. } + | End { insn } + | RelBranch { insn } + | PendingMerge { insn } => { + LowerHex::fmt(&u32::from(&insn).bright_black(), f)?; + f.write_str(" ")?; + Display::fmt(&insn.cyan(), f) + } + Invalid => Display::fmt("Invalid", f), + } } } diff --git a/src/cpu/instruction.rs b/src/cpu/instruction.rs index 7c777bd..8426374 100644 --- a/src/cpu/instruction.rs +++ b/src/cpu/instruction.rs @@ -130,6 +130,7 @@ pub enum Insn { #[opcode = "0x00fc"] scl, /// | 00fd | Exit (halt and catch fire) + #[opcode = "0x0000"] #[opcode = "0x00fd"] halt, /// | 00fe | Return to low-resolution mode @@ -163,6 +164,14 @@ pub enum Insn { long { i: usize }, } +impl From<&Insn> for u32 { + fn from(value: &Insn) -> Self { + let mut buf = [0u8;4]; + let len = Insn::encode(value, &mut buf).unwrap_or_default(); + u32::from_be_bytes(buf) >> ((4-len)*8) + } +} + impl Display for Insn { #[rustfmt::skip] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {