Major Refactor: Make invalid states unrepresentable™️

- Rewrote the instruction decoder as an enum
- Used imperative_rs to auto-generate the bit twiddling logic
- Implemented Display on that enum, for disassembly
- Rewrote CPU::tick
  - Now >10x faster
  - Disassembly mode is still 5x slower though
- Implemented time-based benchmarking
  - (use option -S to set the number of instructions per epoch)
This commit is contained in:
2023-03-30 08:27:06 -05:00
parent 8ab9799913
commit cc3bc3a7fe
11 changed files with 200 additions and 661 deletions

View File

@@ -13,10 +13,13 @@ pub trait Disassembler {
}
pub mod disassembler;
pub mod old_disassembler;
use self::disassembler::Dis;
use crate::bus::{Bus, Read, Region, Write};
use self::disassembler::{Dis, Insn};
use crate::{
bus::{Bus, Read, Region, Write},
error::Result,
};
use imperative_rs::InstructionSet;
use owo_colors::OwoColorize;
use rand::random;
use std::time::Instant;
@@ -123,6 +126,22 @@ impl ControlFlags {
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct Timers {
frame: Instant,
insn: Instant,
}
impl Default for Timers {
fn default() -> Self {
let now = Instant::now();
Self {
frame: now,
insn: now,
}
}
}
/// Represents the internal state of the CPU interpreter
#[derive(Clone, Debug, PartialEq)]
pub struct CPU {
@@ -142,7 +161,7 @@ pub struct CPU {
// I/O
keys: [bool; 16],
// Execution data
timer: Instant,
timers: Timers,
cycle: usize,
breakpoints: Vec<Adr>,
disassembler: Dis,
@@ -160,7 +179,7 @@ impl CPU {
/// 0x50, // font location
/// 0x200, // start of program
/// 0xefe, // top of stack
/// Disassemble::default(),
/// Dis::default(),
/// vec![], // Breakpoints
/// ControlFlags::default()
/// );
@@ -343,7 +362,7 @@ impl CPU {
/// 0x50,
/// 0x340,
/// 0xefe,
/// Disassemble::default(),
/// Dis::default(),
/// vec![],
/// ControlFlags::default()
/// );
@@ -419,18 +438,18 @@ impl CPU {
/// ],
/// Screen [0x0f00..0x1000],
/// };
/// cpu.singlestep(&mut bus);
/// cpu.singlestep(&mut bus)?;
/// assert_eq!(0x202, cpu.pc());
/// assert_eq!(1, cpu.cycle());
///# Ok(())
///# }
/// ```
pub fn singlestep(&mut self, bus: &mut Bus) -> &mut Self {
pub fn singlestep(&mut self, bus: &mut Bus) -> Result<&mut Self> {
self.flags.pause = false;
self.tick(bus);
self.tick(bus)?;
self.flags.vbi_wait = false;
self.flags.pause = true;
self
Ok(self)
}
/// Unpauses the emulator for `steps` ticks
@@ -448,18 +467,18 @@ impl CPU {
/// ],
/// Screen [0x0f00..0x1000],
/// };
/// cpu.multistep(&mut bus, 0x20);
/// cpu.multistep(&mut bus, 0x20)?;
/// assert_eq!(0x202, cpu.pc());
/// assert_eq!(0x20, cpu.cycle());
///# Ok(())
///# }
/// ```
pub fn multistep(&mut self, bus: &mut Bus, steps: usize) -> &mut Self {
pub fn multistep(&mut self, bus: &mut Bus, steps: usize) -> Result<&mut Self> {
for _ in 0..steps {
self.tick(bus);
self.tick(bus)?;
self.vertical_blank();
}
self
Ok(self)
}
/// Simulates vertical blanking
@@ -471,6 +490,7 @@ impl CPU {
/// - Subtracts the elapsed time in fractions of a frame
/// from st/dt
/// - Disables framepause if the duration exceeds that of a frame
#[inline(always)]
pub fn vertical_blank(&mut self) -> &mut Self {
if self.flags.pause {
return self;
@@ -480,14 +500,15 @@ impl CPU {
if self.flags.vbi_wait {
self.flags.vbi_wait = !(self.cycle % speed == 0);
}
self.delay -= 1.0 / speed as f64;
self.sound -= 1.0 / speed as f64;
let speed = 1.0 / speed as f64;
self.delay -= speed;
self.sound -= speed;
return self;
};
// Convert the elapsed time to 60ths of a second
let time = self.timer.elapsed().as_secs_f64() * 60.0;
self.timer = Instant::now();
let time = self.timers.frame.elapsed().as_secs_f64() * 60.0;
self.timers.frame = Instant::now();
if time > 1.0 {
self.flags.vbi_wait = false;
}
@@ -513,7 +534,7 @@ impl CPU {
/// ],
/// Screen [0x0f00..0x1000],
/// };
/// cpu.tick(&mut bus);
/// cpu.tick(&mut bus)?;
/// assert_eq!(0x202, cpu.pc());
/// assert_eq!(1, cpu.cycle());
///# Ok(())
@@ -534,162 +555,93 @@ impl CPU {
/// ],
/// Screen [0x0f00..0x1000],
/// };
/// cpu.multistep(&mut bus, 0x10); // panics!
/// cpu.tick(&mut bus)?; // panics!
///# Ok(())
///# }
/// ```
pub fn tick(&mut self, bus: &mut Bus) -> &mut Self {
pub fn tick(&mut self, bus: &mut Bus) -> Result<&mut Self> {
// Do nothing if paused
if self.flags.pause || self.flags.vbi_wait || self.flags.keypause {
// always tick in test mode
if self.flags.monotonic.is_some() {
self.cycle += 1;
}
return self;
return Ok(self);
}
self.cycle += 1;
// fetch opcode
let opcode: u16 = bus.read(self.pc);
let pc = self.pc;
let opcode: &[u8; 2] = if let Some(slice) = bus.get(self.pc as usize..self.pc as usize + 2)
{
slice.try_into()?
} else {
return Err(crate::error::Error::InvalidBusRange {
range: self.pc as usize..self.pc as usize + 2,
});
};
// DINC pc
self.pc = self.pc.wrapping_add(2);
// decode opcode
use old_disassembler::{a, b, i, n, x, y};
let (i, x, y, n, b, a) = (
i(opcode),
x(opcode),
y(opcode),
n(opcode),
b(opcode),
a(opcode),
);
match i {
// # Issue a system call
// |opcode| effect |
// |------|------------------------------------|
// | 00e0 | Clear screen memory to all 0 |
// | 00ee | Return from subroutine |
0x0 => match a {
0x0e0 => self.clear_screen(bus),
0x0ee => self.ret(bus),
_ => self.sys(a),
},
// | 1aaa | Sets pc to an absolute address
0x1 => self.jump(a),
// | 2aaa | Pushes pc onto the stack, then jumps to a
0x2 => self.call(a, bus),
// | 3xbb | Skips next instruction if register X == b
0x3 => self.skip_equals_immediate(x, b),
// | 4xbb | Skips next instruction if register X != b
0x4 => self.skip_not_equals_immediate(x, b),
// # Performs a register-register comparison
// |opcode| effect |
// |------|------------------------------------|
// | 9XY0 | Skip next instruction if vX == vY |
0x5 => match n {
0x0 => self.skip_equals(x, y),
_ => self.unimplemented(opcode),
},
// 6xbb: Loads immediate byte b into register vX
0x6 => self.load_immediate(x, b),
// 7xbb: Adds immediate byte b to register vX
0x7 => self.add_immediate(x, b),
// # Performs ALU operation
// |opcode| effect |
// |------|------------------------------------|
// | 8xy0 | Y = X |
// | 8xy1 | X = X | Y |
// | 8xy2 | X = X & Y |
// | 8xy3 | X = X ^ Y |
// | 8xy4 | X = X + Y; Set vF=carry |
// | 8xy5 | X = X - Y; Set vF=carry |
// | 8xy6 | X = X >> 1 |
// | 8xy7 | X = Y - X; Set vF=carry |
// | 8xyE | X = X << 1 |
0x8 => match n {
0x0 => self.load(x, y),
0x1 => self.or(x, y),
0x2 => self.and(x, y),
0x3 => self.xor(x, y),
0x4 => self.add(x, y),
0x5 => self.sub(x, y),
0x6 => self.shift_right(x, y),
0x7 => self.backwards_sub(x, y),
0xE => self.shift_left(x, y),
_ => self.unimplemented(opcode),
},
// # Performs a register-register comparison
// |opcode| effect |
// |------|------------------------------------|
// | 9XY0 | Skip next instruction if vX != vY |
0x9 => match n {
0 => self.skip_not_equals(x, y),
_ => self.unimplemented(opcode),
},
// Aaaa: Load address #a into register I
0xa => self.load_i_immediate(a),
// Baaa: Jump to &adr + v0
0xb => self.jump_indexed(a),
// Cxbb: Stores a random number + the provided byte into vX
0xc => self.rand(x, b),
// Dxyn: Draws n-byte sprite to the screen at coordinates (vX, vY)
0xd => self.draw(x, y, n, bus),
// # Skips instruction on value of keypress
// |opcode| effect |
// |------|------------------------------------|
// | eX9e | Skip next instruction if key == vX |
// | eXa1 | Skip next instruction if key != vX |
0xe => match b {
0x9e => self.skip_key_equals(x),
0xa1 => self.skip_key_not_equals(x),
_ => self.unimplemented(opcode),
},
// # Performs IO
// |opcode| effect |
// |------|------------------------------------|
// | fX07 | Set vX to value in delay timer |
// | fX0a | Wait for input, store in vX m |
// | fX15 | Set sound timer to the value in vX |
// | fX18 | set delay timer to the value in vX |
// | fX1e | Add x to I |
// | fX29 | Load sprite for character x into I |
// | fX33 | BCD convert X into I[0..3] |
// | fX55 | DMA Stor from I to registers 0..X |
// | fX65 | DMA Load from I to registers 0..X |
0xf => match b {
0x07 => self.load_delay_timer(x),
0x0A => self.wait_for_key(x),
0x15 => self.store_delay_timer(x),
0x18 => self.store_sound_timer(x),
0x1E => self.add_i(x),
0x29 => self.load_sprite(x),
0x33 => self.bcd_convert(x, bus),
0x55 => self.store_dma(x, bus),
0x65 => self.load_dma(x, bus),
_ => self.unimplemented(opcode),
},
_ => unreachable!("Extracted nibble from byte, got >nibble?"),
}
let elapsed = self.timer.elapsed();
// Print opcode disassembly:
if self.flags.debug {
std::println!(
"{:3} {:03x}: {:<36}{:?}",
println!("{:?}", self.timers.insn.elapsed().bright_black());
self.timers.insn = Instant::now();
std::print!(
"{:3} {:03x}: {:<36}",
self.cycle.bright_black(),
pc,
self.disassembler.once(opcode),
elapsed.dimmed()
self.pc,
self.disassembler.once(u16::from_be_bytes(*opcode))
);
}
// decode opcode
if let Ok((inc, insn)) = Insn::decode(opcode) {
self.pc = self.pc.wrapping_add(inc as u16);
match insn {
Insn::cls => self.clear_screen(bus),
Insn::ret => self.ret(bus),
Insn::jmp { A } => self.jump(A),
Insn::call { A } => self.call(A, bus),
Insn::seb { B, x } => self.skip_equals_immediate(x, B),
Insn::sneb { B, x } => self.skip_not_equals_immediate(x, B),
Insn::se { y, x } => self.skip_equals(x, y),
Insn::movb { B, x } => self.load_immediate(x, B),
Insn::addb { B, x } => self.add_immediate(x, B),
Insn::mov { x, y } => self.load(x, y),
Insn::or { y, x } => self.or(x, y),
Insn::and { y, x } => self.and(x, y),
Insn::xor { y, x } => self.xor(x, y),
Insn::add { y, x } => self.add(x, y),
Insn::sub { y, x } => self.sub(x, y),
Insn::shr { y, x } => self.shift_right(x, y),
Insn::bsub { y, x } => self.backwards_sub(x, y),
Insn::shl { y, x } => self.shift_left(x, y),
Insn::sne { y, x } => self.skip_not_equals(x, y),
Insn::movI { A } => self.load_i_immediate(A),
Insn::jmpr { A } => self.jump_indexed(A),
Insn::rand { B, x } => self.rand(x, B),
Insn::draw { x, y, n } => self.draw(x, y, n, bus),
Insn::sek { x } => self.skip_key_equals(x),
Insn::snek { x } => self.skip_key_not_equals(x),
Insn::getdt { x } => self.load_delay_timer(x),
Insn::waitk { x } => self.wait_for_key(x),
Insn::setdt { x } => self.store_delay_timer(x),
Insn::movst { x } => self.store_sound_timer(x),
Insn::addI { x } => self.add_i(x),
Insn::font { x } => self.load_sprite(x),
Insn::bcd { x } => self.bcd_convert(x, bus),
Insn::dmao { x } => self.store_dma(x, bus),
Insn::dmai { x } => self.load_dma(x, bus),
}
} else {
return Err(crate::error::Error::UnimplementedInstruction {
word: u16::from_be_bytes(*opcode),
});
}
// process breakpoints
if self.breakpoints.contains(&self.pc) {
if !self.breakpoints.is_empty() && self.breakpoints.contains(&self.pc) {
self.flags.pause = true;
}
self
Ok(self)
}
/// Dumps the current state of all CPU registers, and the cycle count
@@ -770,7 +722,7 @@ impl Default for CPU {
debug: true,
..Default::default()
},
timer: Instant::now(),
timers: Default::default(),
breakpoints: vec![],
disassembler: Dis::default(),
}
@@ -787,18 +739,8 @@ impl Default for CPU {
// | 00e0 | Clear screen memory to all 0 |
// | 00ee | Return from subroutine |
impl CPU {
/// Unused instructions
#[inline]
fn unimplemented(&self, opcode: u16) {
unimplemented!("Opcode: {opcode:04x}")
}
/// 0aaa: Handles a "machine language function call" (lmao)
#[inline]
fn sys(&mut self, a: Adr) {
unimplemented!("SYS\t{a:03x}");
}
/// 00e0: Clears the screen memory to 0
#[inline]
#[inline(always)]
fn clear_screen(&mut self, bus: &mut Bus) {
if let Some(screen) = bus.get_region_mut(Region::Screen) {
for byte in screen {
@@ -807,7 +749,7 @@ impl CPU {
}
}
/// 00ee: Returns from subroutine
#[inline]
#[inline(always)]
fn ret(&mut self, bus: &impl Read<u16>) {
self.sp = self.sp.wrapping_add(2);
self.pc = bus.read(self.sp);
@@ -817,7 +759,7 @@ impl CPU {
// | 1aaa | Sets pc to an absolute address
impl CPU {
/// 1aaa: Sets the program counter to an absolute address
#[inline]
#[inline(always)]
fn jump(&mut self, a: Adr) {
// jump to self == halt
if a.wrapping_add(2) == self.pc {
@@ -830,7 +772,7 @@ impl CPU {
// | 2aaa | Pushes pc onto the stack, then jumps to a
impl CPU {
/// 2aaa: Pushes pc onto the stack, then jumps to a
#[inline]
#[inline(always)]
fn call(&mut self, a: Adr, bus: &mut impl Write<u16>) {
bus.write(self.sp, self.pc);
self.sp = self.sp.wrapping_sub(2);
@@ -841,7 +783,7 @@ impl CPU {
// | 3xbb | Skips next instruction if register X == b
impl CPU {
/// 3xbb: Skips the next instruction if register X == b
#[inline]
#[inline(always)]
fn skip_equals_immediate(&mut self, x: Reg, b: u8) {
if self.v[x] == b {
self.pc = self.pc.wrapping_add(2);
@@ -852,7 +794,7 @@ impl CPU {
// | 4xbb | Skips next instruction if register X != b
impl CPU {
/// 4xbb: Skips the next instruction if register X != b
#[inline]
#[inline(always)]
fn skip_not_equals_immediate(&mut self, x: Reg, b: u8) {
if self.v[x] != b {
self.pc = self.pc.wrapping_add(2);
@@ -867,7 +809,7 @@ impl CPU {
// | 5XY0 | Skip next instruction if vX == vY |
impl CPU {
/// 5xy0: Skips the next instruction if register X != register Y
#[inline]
#[inline(always)]
fn skip_equals(&mut self, x: Reg, y: Reg) {
if self.v[x] == self.v[y] {
self.pc = self.pc.wrapping_add(2);
@@ -878,7 +820,7 @@ impl CPU {
// | 6xbb | Loads immediate byte b into register vX
impl CPU {
/// 6xbb: Loads immediate byte b into register vX
#[inline]
#[inline(always)]
fn load_immediate(&mut self, x: Reg, b: u8) {
self.v[x] = b;
}
@@ -887,7 +829,7 @@ impl CPU {
// | 7xbb | Adds immediate byte b to register vX
impl CPU {
/// 7xbb: Adds immediate byte b to register vX
#[inline]
#[inline(always)]
fn add_immediate(&mut self, x: Reg, b: u8) {
self.v[x] = self.v[x].wrapping_add(b);
}
@@ -908,7 +850,7 @@ impl CPU {
// | 8xyE | X = X << 1 |
impl CPU {
/// 8xy0: Loads the value of y into x
#[inline]
#[inline(always)]
fn load(&mut self, x: Reg, y: Reg) {
self.v[x] = self.v[y];
}
@@ -916,7 +858,7 @@ impl CPU {
///
/// # Quirk
/// The original chip-8 interpreter will clobber vF for any 8-series instruction
#[inline]
#[inline(always)]
fn or(&mut self, x: Reg, y: Reg) {
self.v[x] |= self.v[y];
if self.flags.quirks.bin_ops {
@@ -927,7 +869,7 @@ impl CPU {
///
/// # Quirk
/// The original chip-8 interpreter will clobber vF for any 8-series instruction
#[inline]
#[inline(always)]
fn and(&mut self, x: Reg, y: Reg) {
self.v[x] &= self.v[y];
if self.flags.quirks.bin_ops {
@@ -938,7 +880,7 @@ impl CPU {
///
/// # Quirk
/// The original chip-8 interpreter will clobber vF for any 8-series instruction
#[inline]
#[inline(always)]
fn xor(&mut self, x: Reg, y: Reg) {
self.v[x] ^= self.v[y];
if self.flags.quirks.bin_ops {
@@ -946,14 +888,14 @@ impl CPU {
}
}
/// 8xy4: Performs addition of vX and vY, and stores the result in vX
#[inline]
#[inline(always)]
fn add(&mut self, x: Reg, y: Reg) {
let carry;
(self.v[x], carry) = self.v[x].overflowing_add(self.v[y]);
self.v[0xf] = carry.into();
}
/// 8xy5: Performs subtraction of vX and vY, and stores the result in vX
#[inline]
#[inline(always)]
fn sub(&mut self, x: Reg, y: Reg) {
let carry;
(self.v[x], carry) = self.v[x].overflowing_sub(self.v[y]);
@@ -963,7 +905,7 @@ impl CPU {
///
/// # Quirk
/// On the original chip-8 interpreter, this shifts vY and stores the result in vX
#[inline]
#[inline(always)]
fn shift_right(&mut self, x: Reg, y: Reg) {
let src: Reg = if self.flags.quirks.shift { y } else { x };
let shift_out = self.v[src] & 1;
@@ -971,7 +913,7 @@ impl CPU {
self.v[0xf] = shift_out;
}
/// 8xy7: Performs subtraction of vY and vX, and stores the result in vX
#[inline]
#[inline(always)]
fn backwards_sub(&mut self, x: Reg, y: Reg) {
let carry;
(self.v[x], carry) = self.v[y].overflowing_sub(self.v[x]);
@@ -982,7 +924,7 @@ impl CPU {
/// # Quirk
/// On the original chip-8 interpreter, this would perform the operation on vY
/// and store the result in vX. This behavior was left out, for now.
#[inline]
#[inline(always)]
fn shift_left(&mut self, x: Reg, y: Reg) {
let src: Reg = if self.flags.quirks.shift { y } else { x };
let shift_out: u8 = self.v[src] >> 7;
@@ -998,7 +940,7 @@ impl CPU {
// | 9XY0 | Skip next instruction if vX != vY |
impl CPU {
/// 9xy0: Skip next instruction if X != y
#[inline]
#[inline(always)]
fn skip_not_equals(&mut self, x: Reg, y: Reg) {
if self.v[x] != self.v[y] {
self.pc = self.pc.wrapping_add(2);
@@ -1009,7 +951,7 @@ impl CPU {
// | Aaaa | Load address #a into register I
impl CPU {
/// Aadr: Load address #adr into register I
#[inline]
#[inline(always)]
fn load_i_immediate(&mut self, a: Adr) {
self.i = a;
}
@@ -1021,7 +963,7 @@ impl CPU {
///
/// Quirk:
/// On the Super-Chip, this does stupid shit
#[inline]
#[inline(always)]
fn jump_indexed(&mut self, a: Adr) {
let reg = if self.flags.quirks.stupid_jumps {
a as usize >> 8
@@ -1035,7 +977,7 @@ impl CPU {
// | Cxbb | Stores a random number & the provided byte into vX
impl CPU {
/// Cxbb: Stores a random number & the provided byte into vX
#[inline]
#[inline(always)]
fn rand(&mut self, x: Reg, b: u8) {
self.v[x] = random::<u8>() & b;
}
@@ -1047,6 +989,7 @@ impl CPU {
///
/// # Quirk
/// On the original chip-8 interpreter, this will wait for a VBI
#[inline(always)]
fn draw(&mut self, x: Reg, y: Reg, n: Nib, bus: &mut Bus) {
let (x, y) = (self.v[x] as u16 % 64, self.v[y] as u16 % 32);
if self.flags.quirks.draw_wait {
@@ -1084,7 +1027,7 @@ impl CPU {
// | eXa1 | Skip next instruction if key != vX |
impl CPU {
/// Ex9E: Skip next instruction if key == vX
#[inline]
#[inline(always)]
fn skip_key_equals(&mut self, x: Reg) {
let x = self.v[x] as usize;
if self.keys[x] {
@@ -1092,7 +1035,7 @@ impl CPU {
}
}
/// ExaE: Skip next instruction if key != vX
#[inline]
#[inline(always)]
fn skip_key_not_equals(&mut self, x: Reg) {
let x = self.v[x] as usize;
if !self.keys[x] {
@@ -1119,12 +1062,12 @@ impl CPU {
/// ```py
/// vX = DT
/// ```
#[inline]
#[inline(always)]
fn load_delay_timer(&mut self, x: Reg) {
self.v[x] = self.delay as u8;
}
/// Fx0A: Wait for key, then vX = K
#[inline]
#[inline(always)]
fn wait_for_key(&mut self, x: Reg) {
if let Some(key) = self.flags.lastkey {
self.v[x] = key as u8;
@@ -1138,7 +1081,7 @@ impl CPU {
/// ```py
/// DT = vX
/// ```
#[inline]
#[inline(always)]
fn store_delay_timer(&mut self, x: Reg) {
self.delay = self.v[x] as f64;
}
@@ -1146,7 +1089,7 @@ impl CPU {
/// ```py
/// ST = vX;
/// ```
#[inline]
#[inline(always)]
fn store_sound_timer(&mut self, x: Reg) {
self.sound = self.v[x] as f64;
}
@@ -1154,7 +1097,7 @@ impl CPU {
/// ```py
/// I += vX;
/// ```
#[inline]
#[inline(always)]
fn add_i(&mut self, x: Reg) {
self.i += self.v[x] as u16;
}
@@ -1162,12 +1105,12 @@ impl CPU {
/// ```py
/// I = sprite(X);
/// ```
#[inline]
#[inline(always)]
fn load_sprite(&mut self, x: Reg) {
self.i = self.font + (5 * (self.v[x] as Adr % 0x10));
}
/// Fx33: BCD convert X into I`[0..3]`
#[inline]
#[inline(always)]
fn bcd_convert(&mut self, x: Reg, bus: &mut Bus) {
let x = self.v[x];
bus.write(self.i.wrapping_add(2), x % 10);
@@ -1179,7 +1122,7 @@ impl CPU {
/// # Quirk
/// The original chip-8 interpreter uses I to directly index memory,
/// with the side effect of leaving I as I+X+1 after the transfer is done.
#[inline]
#[inline(always)]
fn store_dma(&mut self, x: Reg, bus: &mut Bus) {
let i = self.i as usize;
for (reg, value) in bus
@@ -1199,7 +1142,7 @@ impl CPU {
/// # Quirk
/// The original chip-8 interpreter uses I to directly index memory,
/// with the side effect of leaving I as I+X+1 after the transfer is done.
#[inline]
#[inline(always)]
fn load_dma(&mut self, x: Reg, bus: &mut Bus) {
let i = self.i as usize;
for (reg, value) in bus