MicroCorruption/17-Lagos/disassemble.py

250 lines
8.1 KiB
Python

# Copied from MSProbe/msprobe.py
PC = 0 #Incremented by each disassembled instruction, incremented in words NOT bytes
asm = [0x7f7f, 0x4242, 0x4343] # fuck you *hardcodes your instructions
output = {}
register_names = ['pc', 'sp', 'sr', 'cg', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15']
def dis_int(i: int, e: str = 'big'):
dis_bytes(i.to_bytes(6,'big'), e)
def dis_bytes(b: bytes, e: str = 'big'):
global PC, asm
asm[0] = int.from_bytes(b[0:2], e)
asm[1] = int.from_bytes(b[2:4], e)
asm[2] = int.from_bytes(b[4:6], e)
PC = 0
return disassemble(asm[PC])
def bitrep(number, bits = 16):
"""Converts to binary form, fixing leading zeroes."""
mask = int('0b' + '1' * bits, 2)
binstr = str(bin(number & mask))[2:]
#negative = binstr[0] == '-'
bitcount = len(binstr)
leading0s = bits - bitcount
return ('0' * leading0s) + binstr
def hexrep(number, zeroes = 4):
"""Converts to hex form, fixing leading zeroes."""
mask = int('0b' + '1' * (zeroes * 4), 2)
hexstr = hex(number & mask)[2:]
hexcount = len(hexstr)
leading0s = zeroes - hexcount
return ('0' * leading0s) + hexstr
def disassemble(instruction):
"""Main disassembly, calls other disassembly functions given a 2-byte instruction."""
#Let's start by getting the binary representation.
#Need to invert bytes because little endian.
ins = bitrep(instruction)
#What kind of instruction are we dealing with?
if ins[0:3] == '001':
return disassemble_jump_instruction(ins)
elif ins[0:6] == '000100':
return disassemble_one_word_instruction(ins)
else:
return disassemble_two_word_instruction(ins)
one_word_opcodes = ['rrc', 'swpb', 'rra', 'sxt', 'push', 'call', 'reti']
def disassemble_one_word_instruction(ins):
"""Given a one-operand (format I) instruction in a 16-bit string, output disassembly."""
global PC #Get PC
bytemode = '.b' if ins[9] == '1' else ''
opcodeID = int(ins[6:9], 2)
opcode = one_word_opcodes[opcodeID]
reg = int(ins[12:], 2)
adrmode = int(ins[10:12], 2)
reg_output, extensionWord = disassemble_addressing_mode(reg, adrmode)
PC += 1 + (1 if extensionWord else 0)
return opcode + bytemode + ' ' + reg_output
jump_opcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn ', 'jge', 'jl ', 'jmp']
def disassemble_jump_instruction(ins):
"""Given a jump instruction (format II) in a 16-bit string, output disassembly."""
global PC #Get PC
condition = int(ins[3:6], 2) #Get condition code from bits
#Sign extend
offset = ins[6] * 6 + ins[6:]
sign_subtract = 65536 if offset[0] == '1' else 0 #Sign bit
pcOffset = ((int(offset, 2) - sign_subtract) * 2) + 2
#Add a plus if it's not negative for readability
plus = '+' if sign_subtract == 0 else ''
PC += 1
return jump_opcodes[condition] + ' ' + plus + hex(pcOffset)
#Two-operand opcodes start at 4 (0b0100)
two_word_opcodes = ['!!!', '!!!', '!!!', '!!!', 'mov', 'add', 'addc', 'subc', 'sub', 'cmp', 'dadd', 'bit', 'bic', 'bis', 'xor', 'and']
def disassemble_two_word_instruction(ins):
"""Given a two-operand instruction (format III) in a 16-bit string, output disassembly."""
global PC #Get PC
bytemode = '.b' if ins[9] == '1' else ''
opcodeID = int(ins[0:4], 2)
opcode = two_word_opcodes[opcodeID]
srcReg = int(ins[4:8], 2)
srcAdrMode = int(ins[10:12], 2)
reg_output_src, extWordSrc = disassemble_addressing_mode(srcReg, srcAdrMode)
PC += 1 if extWordSrc else 0
dstReg = int(ins[12:], 2)
dstAdrMode = int(ins[8], 2)
reg_output_dst, ext_word_dst = disassemble_addressing_mode(dstReg, dstAdrMode)
PC += 1 if ext_word_dst else 0
PC += 1 #Instruction word
finalins = opcode + bytemode + ' ' + reg_output_src + ', ' + reg_output_dst
#Disassemble pseudo (emulated) instructions
#These are the easy ones to catch
finalins = 'ret' if finalins == 'mov @sp+, pc' else finalins
#Status register twiddling
finalins = 'clrc' if finalins == 'bic #1, sr' else finalins
finalins = 'setc' if finalins == 'bis #1, sr' else finalins
finalins = 'clrz' if finalins == 'bic #2, sr' else finalins
finalins = 'setz' if finalins == 'bis #2, sr' else finalins
finalins = 'clrn' if finalins == 'bic #4, sr' else finalins
finalins = 'setn' if finalins == 'bis #4, sr' else finalins
finalins = 'dint' if finalins == 'bic #8, sr' else finalins
finalins = 'eint' if finalins == 'bic #8, sr' else finalins
#nop = mov dst, dst
finalins = 'nop' if opcode == 'mov' and reg_output_src == reg_output_dst else finalins
#These ones require a small amount of effort because it uses any register.
#All of these are one-operand instructions, so if we need to reassemble
#the instruction, it'll simply follow the one-operand format.
reassembleins = True
uses_dest = True
#Branch. Requires a little bit of extra sanity checking
#because it could get mistaken for ret
if opcode == 'mov' and reg_output_dst == 'pc' and finalins != 'ret': #br = mov src, pc
opcode = 'br'
uses_dest = False #We're actually using src here
#Pop. Could also get mistaken for ret.
elif opcode == 'mov' and reg_output_src == '@sp+' and finalins != 'ret': #pop = mov @sp+, dst
opcode = 'pop'
#Shift and rotate left
elif opcode == 'add' and srcReg == dstReg: #rla = add dst, dst
opcode = 'rla'
elif opcode == 'addc' and srcReg == dstReg: #rlc = addc dst, dst
opcode = 'rlc'
#Common one-operand instructions
elif opcode == 'xor' and reg_output_src == '#0xffff {-1}': #inv = xor 0xffff, dst
opcode = 'inv'
#Extra sanity checking to prevent being mistaken for nop
elif opcode == 'mov' and reg_output_src == '#0' and reg_output_dst != '#0': #clr = mov #0, dst
opcode = 'clr'
elif opcode == 'cmp' and reg_output_src == '#0': #tst = cmp #0, dst
opcode = 'tst'
#Increment and decrement (by one or two)
elif opcode == 'sub' and reg_output_src == '#1': #dec = sub #1, dst
opcode = 'dec'
elif opcode == 'sub' and reg_output_src == '#2': #decd = sub #2, dst
opcode = 'decd'
elif opcode == 'add' and reg_output_src == '#1': #inc = add #1, dst
opcode = 'inc'
elif opcode == 'add' and reg_output_src == '#2': #incd = add #1, dst
opcode = 'incd'
#Add and subtract only the carry bit:
elif opcode == 'addc' and reg_output_src == '#0': #adc = addc #0, dst
opcode = 'adc'
elif opcode == 'dadd' and reg_output_src == '#0': #dadc = dadd #0, dst
opcode = 'dadc'
elif opcode == 'subc' and reg_output_src == '#0': #sbc = subc #0, dst
opcode = 'sbc'
#The instruction is not an emulated instruction
else:
reassembleins = False
if reassembleins:
finalins = opcode + bytemode + ' ' + (reg_output_dst if uses_dest else reg_output_src)
return finalins
adr_modes = ['{register}', '{index}({register})', '@{register}', '@{register}+']
def disassemble_addressing_mode(reg, adrmode):
"""Outputs disassembly of a register's addressing mode and whether an extension
word was used (to update PC accordingly in the calling function),
given the register number and addressing mode number."""
#http://mspgcc.sourceforge.net/manual/x147.html
extensionWord = False
#print(f"{PC = :x}, {asm = }", end="");
#r2 (status register) and r3 (CG) are encoded as constant registers
if reg == 2:
if adrmode == 0: #Normal access
reg_output = adr_modes[adrmode].format(register=register_names[reg])
elif adrmode == 1: #Absolute address using extension word
reg_output = '&' + hex(asm[PC + 1]) #Get next word
extensionWord = True
elif adrmode == 2:
reg_output = '#4'
elif adrmode == 3:
reg_output = '#8'
elif reg == 3:
if adrmode == 0:
reg_output = '#0'
elif adrmode == 1:
reg_output = '#1'
elif adrmode == 2:
reg_output = '#2'
elif adrmode == 3:
#Just a little reminder that all bits set == -1
reg_output = '#0xffff {-1}'
elif adrmode == 0:
reg_output = adr_modes[adrmode].format(register=register_names[reg])
elif adrmode == 1:
reg_output = adr_modes[adrmode].format(register=register_names[reg], index=hex(asm[PC + 1]))
extensionWord = True
elif adrmode == 2:
reg_output = adr_modes[adrmode].format(register=register_names[reg])
elif adrmode == 3 and reg == 0: #PC was incremented for a constant
reg_output = '#' + hex(asm[PC + 1])
extensionWord = True
elif adrmode == 3:
reg_output = adr_modes[adrmode].format(register=register_names[reg])
return (reg_output, extensionWord)