mirror of
https://git.soft.fish/val/MicroCorruption.git
synced 2024-11-24 12:15:58 +00:00
250 lines
8.1 KiB
Python
250 lines
8.1 KiB
Python
|
|
|
|
# Copied from MSProbe/msprobe.py
|
|
|
|
PC = 0 #Incremented by each disassembled instruction, incremented in words NOT bytes
|
|
asm = [0x7f7f, 0x4242, 0x4343] # fuck you *hardcodes your instructions
|
|
output = {}
|
|
|
|
register_names = ['pc', 'sp', 'sr', 'cg', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15']
|
|
|
|
def dis_int(i: int, e: str = 'big'):
|
|
dis_bytes(i.to_bytes(6,'big'), e)
|
|
|
|
def dis_bytes(b: bytes, e: str = 'big'):
|
|
global PC, asm
|
|
asm[0] = int.from_bytes(b[0:2], e)
|
|
asm[1] = int.from_bytes(b[2:4], e)
|
|
asm[2] = int.from_bytes(b[4:6], e)
|
|
PC = 0
|
|
return disassemble(asm[PC])
|
|
|
|
|
|
|
|
def bitrep(number, bits = 16):
|
|
"""Converts to binary form, fixing leading zeroes."""
|
|
mask = int('0b' + '1' * bits, 2)
|
|
binstr = str(bin(number & mask))[2:]
|
|
#negative = binstr[0] == '-'
|
|
bitcount = len(binstr)
|
|
leading0s = bits - bitcount
|
|
return ('0' * leading0s) + binstr
|
|
|
|
def hexrep(number, zeroes = 4):
|
|
"""Converts to hex form, fixing leading zeroes."""
|
|
mask = int('0b' + '1' * (zeroes * 4), 2)
|
|
hexstr = hex(number & mask)[2:]
|
|
hexcount = len(hexstr)
|
|
leading0s = zeroes - hexcount
|
|
return ('0' * leading0s) + hexstr
|
|
|
|
def disassemble(instruction):
|
|
"""Main disassembly, calls other disassembly functions given a 2-byte instruction."""
|
|
#Let's start by getting the binary representation.
|
|
#Need to invert bytes because little endian.
|
|
ins = bitrep(instruction)
|
|
#What kind of instruction are we dealing with?
|
|
if ins[0:3] == '001':
|
|
return disassemble_jump_instruction(ins)
|
|
elif ins[0:6] == '000100':
|
|
return disassemble_one_word_instruction(ins)
|
|
else:
|
|
return disassemble_two_word_instruction(ins)
|
|
|
|
one_word_opcodes = ['rrc', 'swpb', 'rra', 'sxt', 'push', 'call', 'reti']
|
|
def disassemble_one_word_instruction(ins):
|
|
"""Given a one-operand (format I) instruction in a 16-bit string, output disassembly."""
|
|
global PC #Get PC
|
|
|
|
bytemode = '.b' if ins[9] == '1' else ''
|
|
opcodeID = int(ins[6:9], 2)
|
|
opcode = one_word_opcodes[opcodeID]
|
|
reg = int(ins[12:], 2)
|
|
|
|
adrmode = int(ins[10:12], 2)
|
|
reg_output, extensionWord = disassemble_addressing_mode(reg, adrmode)
|
|
|
|
PC += 1 + (1 if extensionWord else 0)
|
|
|
|
return opcode + bytemode + ' ' + reg_output
|
|
|
|
jump_opcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn ', 'jge', 'jl ', 'jmp']
|
|
def disassemble_jump_instruction(ins):
|
|
"""Given a jump instruction (format II) in a 16-bit string, output disassembly."""
|
|
global PC #Get PC
|
|
|
|
condition = int(ins[3:6], 2) #Get condition code from bits
|
|
#Sign extend
|
|
offset = ins[6] * 6 + ins[6:]
|
|
sign_subtract = 65536 if offset[0] == '1' else 0 #Sign bit
|
|
pcOffset = ((int(offset, 2) - sign_subtract) * 2) + 2
|
|
|
|
#Add a plus if it's not negative for readability
|
|
plus = '+' if sign_subtract == 0 else ''
|
|
|
|
PC += 1
|
|
|
|
return jump_opcodes[condition] + ' ' + plus + hex(pcOffset)
|
|
|
|
#Two-operand opcodes start at 4 (0b0100)
|
|
two_word_opcodes = ['!!!', '!!!', '!!!', '!!!', 'mov', 'add', 'addc', 'subc', 'sub', 'cmp', 'dadd', 'bit', 'bic', 'bis', 'xor', 'and']
|
|
def disassemble_two_word_instruction(ins):
|
|
"""Given a two-operand instruction (format III) in a 16-bit string, output disassembly."""
|
|
global PC #Get PC
|
|
|
|
bytemode = '.b' if ins[9] == '1' else ''
|
|
opcodeID = int(ins[0:4], 2)
|
|
opcode = two_word_opcodes[opcodeID]
|
|
|
|
srcReg = int(ins[4:8], 2)
|
|
srcAdrMode = int(ins[10:12], 2)
|
|
|
|
reg_output_src, extWordSrc = disassemble_addressing_mode(srcReg, srcAdrMode)
|
|
PC += 1 if extWordSrc else 0
|
|
|
|
dstReg = int(ins[12:], 2)
|
|
dstAdrMode = int(ins[8], 2)
|
|
|
|
reg_output_dst, ext_word_dst = disassemble_addressing_mode(dstReg, dstAdrMode)
|
|
PC += 1 if ext_word_dst else 0
|
|
|
|
PC += 1 #Instruction word
|
|
|
|
finalins = opcode + bytemode + ' ' + reg_output_src + ', ' + reg_output_dst
|
|
|
|
#Disassemble pseudo (emulated) instructions
|
|
|
|
#These are the easy ones to catch
|
|
finalins = 'ret' if finalins == 'mov @sp+, pc' else finalins
|
|
|
|
#Status register twiddling
|
|
finalins = 'clrc' if finalins == 'bic #1, sr' else finalins
|
|
finalins = 'setc' if finalins == 'bis #1, sr' else finalins
|
|
finalins = 'clrz' if finalins == 'bic #2, sr' else finalins
|
|
finalins = 'setz' if finalins == 'bis #2, sr' else finalins
|
|
finalins = 'clrn' if finalins == 'bic #4, sr' else finalins
|
|
finalins = 'setn' if finalins == 'bis #4, sr' else finalins
|
|
finalins = 'dint' if finalins == 'bic #8, sr' else finalins
|
|
finalins = 'eint' if finalins == 'bic #8, sr' else finalins
|
|
#nop = mov dst, dst
|
|
finalins = 'nop' if opcode == 'mov' and reg_output_src == reg_output_dst else finalins
|
|
|
|
#These ones require a small amount of effort because it uses any register.
|
|
#All of these are one-operand instructions, so if we need to reassemble
|
|
#the instruction, it'll simply follow the one-operand format.
|
|
|
|
reassembleins = True
|
|
uses_dest = True
|
|
|
|
#Branch. Requires a little bit of extra sanity checking
|
|
#because it could get mistaken for ret
|
|
if opcode == 'mov' and reg_output_dst == 'pc' and finalins != 'ret': #br = mov src, pc
|
|
opcode = 'br'
|
|
uses_dest = False #We're actually using src here
|
|
|
|
#Pop. Could also get mistaken for ret.
|
|
elif opcode == 'mov' and reg_output_src == '@sp+' and finalins != 'ret': #pop = mov @sp+, dst
|
|
opcode = 'pop'
|
|
|
|
#Shift and rotate left
|
|
|
|
elif opcode == 'add' and srcReg == dstReg: #rla = add dst, dst
|
|
opcode = 'rla'
|
|
elif opcode == 'addc' and srcReg == dstReg: #rlc = addc dst, dst
|
|
opcode = 'rlc'
|
|
|
|
#Common one-operand instructions
|
|
|
|
elif opcode == 'xor' and reg_output_src == '#0xffff {-1}': #inv = xor 0xffff, dst
|
|
opcode = 'inv'
|
|
#Extra sanity checking to prevent being mistaken for nop
|
|
elif opcode == 'mov' and reg_output_src == '#0' and reg_output_dst != '#0': #clr = mov #0, dst
|
|
opcode = 'clr'
|
|
elif opcode == 'cmp' and reg_output_src == '#0': #tst = cmp #0, dst
|
|
opcode = 'tst'
|
|
|
|
|
|
#Increment and decrement (by one or two)
|
|
|
|
elif opcode == 'sub' and reg_output_src == '#1': #dec = sub #1, dst
|
|
opcode = 'dec'
|
|
elif opcode == 'sub' and reg_output_src == '#2': #decd = sub #2, dst
|
|
opcode = 'decd'
|
|
elif opcode == 'add' and reg_output_src == '#1': #inc = add #1, dst
|
|
opcode = 'inc'
|
|
elif opcode == 'add' and reg_output_src == '#2': #incd = add #1, dst
|
|
opcode = 'incd'
|
|
|
|
#Add and subtract only the carry bit:
|
|
|
|
elif opcode == 'addc' and reg_output_src == '#0': #adc = addc #0, dst
|
|
opcode = 'adc'
|
|
elif opcode == 'dadd' and reg_output_src == '#0': #dadc = dadd #0, dst
|
|
opcode = 'dadc'
|
|
elif opcode == 'subc' and reg_output_src == '#0': #sbc = subc #0, dst
|
|
opcode = 'sbc'
|
|
|
|
#The instruction is not an emulated instruction
|
|
else:
|
|
reassembleins = False
|
|
|
|
if reassembleins:
|
|
finalins = opcode + bytemode + ' ' + (reg_output_dst if uses_dest else reg_output_src)
|
|
|
|
return finalins
|
|
|
|
|
|
adr_modes = ['{register}', '{index}({register})', '@{register}', '@{register}+']
|
|
|
|
def disassemble_addressing_mode(reg, adrmode):
|
|
"""Outputs disassembly of a register's addressing mode and whether an extension
|
|
word was used (to update PC accordingly in the calling function),
|
|
given the register number and addressing mode number."""
|
|
|
|
#http://mspgcc.sourceforge.net/manual/x147.html
|
|
|
|
extensionWord = False
|
|
#print(f"{PC = :x}, {asm = }", end="");
|
|
|
|
#r2 (status register) and r3 (CG) are encoded as constant registers
|
|
if reg == 2:
|
|
if adrmode == 0: #Normal access
|
|
reg_output = adr_modes[adrmode].format(register=register_names[reg])
|
|
elif adrmode == 1: #Absolute address using extension word
|
|
reg_output = '&' + hex(asm[PC + 1]) #Get next word
|
|
extensionWord = True
|
|
elif adrmode == 2:
|
|
reg_output = '#4'
|
|
elif adrmode == 3:
|
|
reg_output = '#8'
|
|
|
|
elif reg == 3:
|
|
if adrmode == 0:
|
|
reg_output = '#0'
|
|
elif adrmode == 1:
|
|
reg_output = '#1'
|
|
elif adrmode == 2:
|
|
reg_output = '#2'
|
|
elif adrmode == 3:
|
|
#Just a little reminder that all bits set == -1
|
|
reg_output = '#0xffff {-1}'
|
|
|
|
elif adrmode == 0:
|
|
reg_output = adr_modes[adrmode].format(register=register_names[reg])
|
|
|
|
elif adrmode == 1:
|
|
reg_output = adr_modes[adrmode].format(register=register_names[reg], index=hex(asm[PC + 1]))
|
|
extensionWord = True
|
|
|
|
elif adrmode == 2:
|
|
reg_output = adr_modes[adrmode].format(register=register_names[reg])
|
|
|
|
elif adrmode == 3 and reg == 0: #PC was incremented for a constant
|
|
reg_output = '#' + hex(asm[PC + 1])
|
|
extensionWord = True
|
|
|
|
elif adrmode == 3:
|
|
reg_output = adr_modes[adrmode].format(register=register_names[reg])
|
|
|
|
return (reg_output, extensionWord)
|