chirp-disasm: Implement a graph parser for better output

This commit is contained in:
John 2023-08-31 22:34:55 -05:00
parent f886aadc63
commit 31c904f89c
3 changed files with 215 additions and 42 deletions

View File

@ -6,23 +6,45 @@ use std::{fs::read, path::PathBuf};
mod tree;
fn main() -> Result<()> {
let options = Arguments::parse_args_default_or_exit();
let contents = &read(&options.file)?;
let disassembler = Dis::default();
for (addr, insn) in contents[options.offset..].chunks_exact(2).enumerate() {
let insn = u16::from_be_bytes(
insn.try_into()
.expect("Iterated over 2-byte chunks, got <2 bytes"),
);
println!(
"{}",
format_args!(
"{:03x}: {} {:04x}",
2 * addr + 0x200 + options.offset,
disassembler.once(insn),
insn.bright_black(),
)
);
let mut options = Arguments::parse_args_default_or_exit();
while let Some(file) = options.file.pop() {
println!("{file:?}");
let contents = &read(&file)?;
if options.tree || options.traverse {
let loadaddr = options.loadaddr as usize;
let mem = mem! {
cpu::mem::Region::Program [loadaddr..loadaddr + contents.len()] = contents
};
let mut nodes = Default::default();
let tree = tree::DisNode::traverse(
mem.grab(..).expect("grabbing [..] should never fail"),
&mut nodes,
options.loadaddr as usize + options.offset,
);
if options.traverse {
for (k, v) in nodes.iter() {
if let Some(v) = &v.upgrade().as_ref().map(std::rc::Rc::as_ref) {
println!("{k:03x}: {v:04x}");
}
}
} else {
println!("{tree}");
}
} else {
let disassembler = Dis::default();
for (addr, insn) in contents[options.offset..].chunks_exact(2).enumerate() {
let insn = u16::from_be_bytes(
insn.try_into()
.expect("Iterated over 2-byte chunks, got <2 bytes"),
);
println!(
"{:03x}: {:04x} {}",
2 * addr + 0x200 + options.offset,
insn.bright_black(),
disassembler.once(insn),
);
}
}
}
Ok(())
}
@ -32,11 +54,19 @@ struct Arguments {
#[options(help = "Show help text")]
help: bool,
#[options(help = "Load a ROM to run on Chirp", free, required)]
pub file: PathBuf,
#[options(help = "Load address (usually 200)", parse(try_from_str = "parse_hex"))]
pub file: Vec<PathBuf>,
#[options(
help = "Load address (usually 200)",
parse(try_from_str = "parse_hex"),
default = "200"
)]
pub loadaddr: u16,
#[options(help = "Start disassembling at offset...")]
pub offset: usize,
#[options(help = "Print the disassembly as a tree")]
pub tree: bool,
#[options(help = "Prune unreachable instructions ")]
pub traverse: bool,
}
fn parse_hex(value: &str) -> std::result::Result<u16, std::num::ParseIntError> {

View File

@ -1,49 +1,183 @@
#![allow(dead_code)]
#![allow(unused_variables)]
use std::collections::HashSet;
use std::{
collections::{BTreeMap, HashSet},
fmt::{Display, LowerHex},
rc::{Rc, Weak},
};
use chirp::cpu::instruction::Insn;
use imperative_rs::InstructionSet;
use owo_colors::OwoColorize;
type Adr = usize;
/// Represents the kinds of control flow an instruction can take
pub enum DisNode {
Branch {
addr: Adr,
#[derive(Clone, Debug, Default)]
pub enum DisNodeContents {
Subroutine {
insn: Insn,
a: Box<DisNode>,
b: Box<DisNode>,
jump: Rc<DisNode>,
ret: Rc<DisNode>,
},
Branch {
insn: Insn,
next: Rc<DisNode>,
jump: Rc<DisNode>,
},
Continue {
addr: Adr,
insn: Insn,
next: Box<DisNode>,
next: Rc<DisNode>,
},
End {
insn: Insn,
},
RelBranch {
insn: Insn,
},
Merge {
addr: Adr,
insn: Insn,
back: Option<Weak<DisNode>>,
},
PendingMerge {
insn: Insn,
},
End(Insn),
#[default]
Invalid,
}
/// Represents the kinds of control flow an instruction can take
#[derive(Clone, Debug)]
pub struct DisNode {
pub contents: DisNodeContents,
pub addr: Adr,
pub depth: usize,
}
impl DisNode {
pub fn travel(
pub fn traverse(
mem: &[u8],
nodes: &mut BTreeMap<Adr, Weak<DisNode>>,
addr: Adr,
) -> Rc<DisNode> {
Self::tree_recurse(mem, &mut Default::default(), nodes, addr, 0)
}
pub fn tree_recurse(
mem: &[u8],
visited: &mut HashSet<Adr>,
current: Adr,
) -> Result<DisNode, chirp::Error> {
use DisNode::*;
nodes: &mut BTreeMap<Adr, Weak<DisNode>>,
addr: Adr,
depth: usize,
) -> Rc<DisNode> {
use DisNodeContents::*;
// Try to decode an instruction. If the instruction is invalid, fail early.
let Ok((len, insn)) = Insn::decode(&mem[addr..]) else {
return Rc::new(DisNode {
contents: Invalid,
addr,
depth,
});
};
let mut next = DisNode {
contents: {
match insn {
// instruction is already visited, but the branch isn't guaranteed to be in the tree yet
_ if !visited.insert(addr) => PendingMerge { insn },
// decode an insn at the current Adr
// classify the insn
// If the instruction is invalid, emit an Invalid token
// If the instruction is already visited, emit a Merge token
// If the instruction is a ret instruction, emit a Merge token
// If the instruction is any other instruction, emit a Continue token
// If the instruction is a branch to current, emit an End token
// If the instruction is a branch instruction, recursively follow each branch
Insn::ret | Insn::halt => End { insn },
Ok(End(Insn::cls))
// A branch to the current address will halt the machine
Insn::jmp { A } | Insn::call { A } if A as usize == addr => End { insn },
Insn::jmp { A } => Continue {
insn,
next: DisNode::tree_recurse(mem, visited, nodes, A as usize, depth),
},
Insn::call { A } => Branch {
insn,
jump: DisNode::tree_recurse(mem, visited, nodes, A as usize, depth + 1),
next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth),
},
// If the instruction is a skip instruction, first visit the next instruction,
// then visit the skip instruction. This preserves visitor order.
Insn::seb { .. }
| Insn::sneb { .. }
| Insn::se { .. }
| Insn::sne { .. }
| Insn::sek { .. } => Branch {
insn,
// FIXME: If the next instruction is Long I, this will just break
next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth),
jump: DisNode::tree_recurse(mem, visited, nodes, addr + len + 2, depth + 1),
},
// Relative branch prediction is out of scope right now
Insn::jmpr { .. } => RelBranch { insn },
// If the instruction is any other instruction, emit a Continue token
_ => Continue {
insn,
next: DisNode::tree_recurse(mem, visited, nodes, addr + len, depth),
},
}
},
addr,
depth,
};
// Resolve pending merges
if let PendingMerge { insn } = next.contents {
next.contents = Merge {
insn,
back: nodes.get(&addr).cloned(),
}
}
let next = Rc::new(next);
nodes.insert(addr, Rc::downgrade(&next));
next
}
}
impl Display for DisNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use DisNodeContents::*;
write!(f, "\n{:04x}: ", self.addr,)?;
for indent in 0..self.depth {
Display::fmt(&"".bright_magenta(), f)?;
}
match &self.contents {
Subroutine { insn, ret, jump } => write!(f, "{insn}{jump}{ret}"),
Branch { insn, next, jump } => write!(f, "{insn}{jump}{next}"),
Continue { insn, next } => write!(f, "{insn}{next}"),
RelBranch { insn } => Display::fmt(&insn.underline(), f),
PendingMerge { insn } | Merge { insn, .. } => write!(
f,
"{}",
format_args!("{}; ...", insn).italic().bright_black()
),
End { insn } => Display::fmt(insn, f),
Invalid => Display::fmt(&"Invalid".bold().red(), f),
}
}
}
impl LowerHex for DisNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use DisNodeContents::*;
match self.contents {
Subroutine { insn, .. }
| Branch { insn, .. }
| Continue { insn, .. }
| Merge { insn, .. }
| End { insn }
| RelBranch { insn }
| PendingMerge { insn } => {
LowerHex::fmt(&u32::from(&insn).bright_black(), f)?;
f.write_str(" ")?;
Display::fmt(&insn.cyan(), f)
}
Invalid => Display::fmt("Invalid", f),
}
}
}

View File

@ -130,6 +130,7 @@ pub enum Insn {
#[opcode = "0x00fc"]
scl,
/// | 00fd | Exit (halt and catch fire)
#[opcode = "0x0000"]
#[opcode = "0x00fd"]
halt,
/// | 00fe | Return to low-resolution mode
@ -163,6 +164,14 @@ pub enum Insn {
long { i: usize },
}
impl From<&Insn> for u32 {
fn from(value: &Insn) -> Self {
let mut buf = [0u8;4];
let len = Insn::encode(value, &mut buf).unwrap_or_default();
u32::from_be_bytes(buf) >> ((4-len)*8)
}
}
impl Display for Insn {
#[rustfmt::skip]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {