chirp-disasm: Implement a graph parser for better output

This commit is contained in:
John 2023-08-31 22:34:55 -05:00
parent f886aadc63
commit 31c904f89c
3 changed files with 215 additions and 42 deletions

View File

@ -6,8 +6,31 @@ use std::{fs::read, path::PathBuf};
mod tree; mod tree;
fn main() -> Result<()> { fn main() -> Result<()> {
let options = Arguments::parse_args_default_or_exit(); let mut options = Arguments::parse_args_default_or_exit();
let contents = &read(&options.file)?; while let Some(file) = options.file.pop() {
println!("{file:?}");
let contents = &read(&file)?;
if options.tree || options.traverse {
let loadaddr = options.loadaddr as usize;
let mem = mem! {
cpu::mem::Region::Program [loadaddr..loadaddr + contents.len()] = contents
};
let mut nodes = Default::default();
let tree = tree::DisNode::traverse(
mem.grab(..).expect("grabbing [..] should never fail"),
&mut nodes,
options.loadaddr as usize + options.offset,
);
if options.traverse {
for (k, v) in nodes.iter() {
if let Some(v) = &v.upgrade().as_ref().map(std::rc::Rc::as_ref) {
println!("{k:03x}: {v:04x}");
}
}
} else {
println!("{tree}");
}
} else {
let disassembler = Dis::default(); let disassembler = Dis::default();
for (addr, insn) in contents[options.offset..].chunks_exact(2).enumerate() { for (addr, insn) in contents[options.offset..].chunks_exact(2).enumerate() {
let insn = u16::from_be_bytes( let insn = u16::from_be_bytes(
@ -15,15 +38,14 @@ fn main() -> Result<()> {
.expect("Iterated over 2-byte chunks, got <2 bytes"), .expect("Iterated over 2-byte chunks, got <2 bytes"),
); );
println!( println!(
"{}", "{:03x}: {:04x} {}",
format_args!(
"{:03x}: {} {:04x}",
2 * addr + 0x200 + options.offset, 2 * addr + 0x200 + options.offset,
disassembler.once(insn),
insn.bright_black(), insn.bright_black(),
) disassembler.once(insn),
); );
} }
}
}
Ok(()) Ok(())
} }
@ -32,11 +54,19 @@ struct Arguments {
#[options(help = "Show help text")] #[options(help = "Show help text")]
help: bool, help: bool,
#[options(help = "Load a ROM to run on Chirp", free, required)] #[options(help = "Load a ROM to run on Chirp", free, required)]
pub file: PathBuf, pub file: Vec<PathBuf>,
#[options(help = "Load address (usually 200)", parse(try_from_str = "parse_hex"))] #[options(
help = "Load address (usually 200)",
parse(try_from_str = "parse_hex"),
default = "200"
)]
pub loadaddr: u16, pub loadaddr: u16,
#[options(help = "Start disassembling at offset...")] #[options(help = "Start disassembling at offset...")]
pub offset: usize, pub offset: usize,
#[options(help = "Print the disassembly as a tree")]
pub tree: bool,
#[options(help = "Prune unreachable instructions ")]
pub traverse: bool,
} }
fn parse_hex(value: &str) -> std::result::Result<u16, std::num::ParseIntError> { fn parse_hex(value: &str) -> std::result::Result<u16, std::num::ParseIntError> {

View File

@ -1,49 +1,183 @@
#![allow(dead_code)] #![allow(dead_code)]
#![allow(unused_variables)] #![allow(unused_variables)]
use std::collections::HashSet; use std::{
collections::{BTreeMap, HashSet},
fmt::{Display, LowerHex},
rc::{Rc, Weak},
};
use chirp::cpu::instruction::Insn; use chirp::cpu::instruction::Insn;
use imperative_rs::InstructionSet;
use owo_colors::OwoColorize;
type Adr = usize; type Adr = usize;
/// Represents the kinds of control flow an instruction can take /// Represents the kinds of control flow an instruction can take
pub enum DisNode { #[derive(Clone, Debug, Default)]
Branch { pub enum DisNodeContents {
addr: Adr, Subroutine {
insn: Insn, insn: Insn,
a: Box<DisNode>, jump: Rc<DisNode>,
b: Box<DisNode>, ret: Rc<DisNode>,
},
Branch {
insn: Insn,
next: Rc<DisNode>,
jump: Rc<DisNode>,
}, },
Continue { Continue {
addr: Adr,
insn: Insn, insn: Insn,
next: Box<DisNode>, next: Rc<DisNode>,
},
End {
insn: Insn,
},
RelBranch {
insn: Insn,
}, },
Merge { Merge {
addr: Adr, insn: Insn,
back: Option<Weak<DisNode>>,
},
PendingMerge {
insn: Insn, insn: Insn,
}, },
End(Insn), #[default]
Invalid, Invalid,
} }
/// Represents the kinds of control flow an instruction can take
#[derive(Clone, Debug)]
pub struct DisNode {
pub contents: DisNodeContents,
pub addr: Adr,
pub depth: usize,
}
impl DisNode { impl DisNode {
pub fn travel( pub fn traverse(
mem: &[u8],
nodes: &mut BTreeMap<Adr, Weak<DisNode>>,
addr: Adr,
) -> Rc<DisNode> {
Self::tree_recurse(mem, &mut Default::default(), nodes, addr, 0)
}
pub fn tree_recurse(
mem: &[u8], mem: &[u8],
visited: &mut HashSet<Adr>, visited: &mut HashSet<Adr>,
current: Adr, nodes: &mut BTreeMap<Adr, Weak<DisNode>>,
) -> Result<DisNode, chirp::Error> { addr: Adr,
use DisNode::*; depth: usize,
) -> Rc<DisNode> {
use DisNodeContents::*;
// Try to decode an instruction. If the instruction is invalid, fail early.
let Ok((len, insn)) = Insn::decode(&mem[addr..]) else {
return Rc::new(DisNode {
contents: Invalid,
addr,
depth,
});
};
let mut next = DisNode {
contents: {
match insn {
// instruction is already visited, but the branch isn't guaranteed to be in the tree yet
_ if !visited.insert(addr) => PendingMerge { insn },
Insn::ret | Insn::halt => End { insn },
// A branch to the current address will halt the machine
Insn::jmp { A } | Insn::call { A } if A as usize == addr => End { insn },
Insn::jmp { A } => Continue {
insn,
next: DisNode::tree_recurse(mem, visited, nodes, A as usize, depth),
},
Insn::call { A } => Branch {
insn,
jump: DisNode::tree_recurse(mem, visited, nodes, A as usize, depth + 1),
next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth),
},
// If the instruction is a skip instruction, first visit the next instruction,
// then visit the skip instruction. This preserves visitor order.
Insn::seb { .. }
| Insn::sneb { .. }
| Insn::se { .. }
| Insn::sne { .. }
| Insn::sek { .. } => Branch {
insn,
// FIXME: If the next instruction is Long I, this will just break
next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth),
jump: DisNode::tree_recurse(mem, visited, nodes, addr + len + 2, depth + 1),
},
// Relative branch prediction is out of scope right now
Insn::jmpr { .. } => RelBranch { insn },
// decode an insn at the current Adr
// classify the insn
// If the instruction is invalid, emit an Invalid token
// If the instruction is already visited, emit a Merge token
// If the instruction is a ret instruction, emit a Merge token
// If the instruction is any other instruction, emit a Continue token // If the instruction is any other instruction, emit a Continue token
// If the instruction is a branch to current, emit an End token _ => Continue {
// If the instruction is a branch instruction, recursively follow each branch insn,
next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth),
},
}
},
addr,
depth,
};
// Resolve pending merges
if let PendingMerge { insn } = next.contents {
next.contents = Merge {
insn,
back: nodes.get(&addr).cloned(),
}
}
let next = Rc::new(next);
nodes.insert(addr, Rc::downgrade(&next));
next
}
}
Ok(End(Insn::cls)) impl Display for DisNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use DisNodeContents::*;
write!(f, "\n{:04x}: ", self.addr,)?;
for indent in 0..self.depth {
Display::fmt(&"".bright_magenta(), f)?;
}
match &self.contents {
Subroutine { insn, ret, jump } => write!(f, "{insn}{jump}{ret}"),
Branch { insn, next, jump } => write!(f, "{insn}{jump}{next}"),
Continue { insn, next } => write!(f, "{insn}{next}"),
RelBranch { insn } => Display::fmt(&insn.underline(), f),
PendingMerge { insn } | Merge { insn, .. } => write!(
f,
"{}",
format_args!("{}; ...", insn).italic().bright_black()
),
End { insn } => Display::fmt(insn, f),
Invalid => Display::fmt(&"Invalid".bold().red(), f),
}
}
}
impl LowerHex for DisNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use DisNodeContents::*;
match self.contents {
Subroutine { insn, .. }
| Branch { insn, .. }
| Continue { insn, .. }
| Merge { insn, .. }
| End { insn }
| RelBranch { insn }
| PendingMerge { insn } => {
LowerHex::fmt(&u32::from(&insn).bright_black(), f)?;
f.write_str(" ")?;
Display::fmt(&insn.cyan(), f)
}
Invalid => Display::fmt("Invalid", f),
}
} }
} }

View File

@ -130,6 +130,7 @@ pub enum Insn {
#[opcode = "0x00fc"] #[opcode = "0x00fc"]
scl, scl,
/// | 00fd | Exit (halt and catch fire) /// | 00fd | Exit (halt and catch fire)
#[opcode = "0x0000"]
#[opcode = "0x00fd"] #[opcode = "0x00fd"]
halt, halt,
/// | 00fe | Return to low-resolution mode /// | 00fe | Return to low-resolution mode
@ -163,6 +164,14 @@ pub enum Insn {
long { i: usize }, long { i: usize },
} }
impl From<&Insn> for u32 {
fn from(value: &Insn) -> Self {
let mut buf = [0u8;4];
let len = Insn::encode(value, &mut buf).unwrap_or_default();
u32::from_be_bytes(buf) >> ((4-len)*8)
}
}
impl Display for Insn { impl Display for Insn {
#[rustfmt::skip] #[rustfmt::skip]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {