# Copied from MSProbe/msprobe.py PC = 0 #Incremented by each disassembled instruction, incremented in words NOT bytes asm = [0x7f7f, 0x4242, 0x4343] # fuck you *hardcodes your instructions output = {} register_names = ['pc', 'sp', 'sr', 'cg', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15'] def dis_int(i: int, e: str = 'big'): dis_bytes(i.to_bytes(6,'big'), e) def dis_bytes(b: bytes, e: str = 'big'): global PC, asm asm[0] = int.from_bytes(b[0:2], e) asm[1] = int.from_bytes(b[2:4], e) asm[2] = int.from_bytes(b[4:6], e) PC = 0 return disassemble(asm[PC]) def bitrep(number, bits = 16): """Converts to binary form, fixing leading zeroes.""" mask = int('0b' + '1' * bits, 2) binstr = str(bin(number & mask))[2:] #negative = binstr[0] == '-' bitcount = len(binstr) leading0s = bits - bitcount return ('0' * leading0s) + binstr def hexrep(number, zeroes = 4): """Converts to hex form, fixing leading zeroes.""" mask = int('0b' + '1' * (zeroes * 4), 2) hexstr = hex(number & mask)[2:] hexcount = len(hexstr) leading0s = zeroes - hexcount return ('0' * leading0s) + hexstr def disassemble(instruction): """Main disassembly, calls other disassembly functions given a 2-byte instruction.""" #Let's start by getting the binary representation. #Need to invert bytes because little endian. ins = bitrep(instruction) #What kind of instruction are we dealing with? if ins[0:3] == '001': return disassemble_jump_instruction(ins) elif ins[0:6] == '000100': return disassemble_one_word_instruction(ins) else: return disassemble_two_word_instruction(ins) one_word_opcodes = ['rrc', 'swpb', 'rra', 'sxt', 'push', 'call', 'reti'] def disassemble_one_word_instruction(ins): """Given a one-operand (format I) instruction in a 16-bit string, output disassembly.""" global PC #Get PC bytemode = '.b' if ins[9] == '1' else '' opcodeID = int(ins[6:9], 2) opcode = one_word_opcodes[opcodeID] reg = int(ins[12:], 2) adrmode = int(ins[10:12], 2) reg_output, extensionWord = disassemble_addressing_mode(reg, adrmode) PC += 1 + (1 if extensionWord else 0) return opcode + bytemode + ' ' + reg_output jump_opcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn ', 'jge', 'jl ', 'jmp'] def disassemble_jump_instruction(ins): """Given a jump instruction (format II) in a 16-bit string, output disassembly.""" global PC #Get PC condition = int(ins[3:6], 2) #Get condition code from bits #Sign extend offset = ins[6] * 6 + ins[6:] sign_subtract = 65536 if offset[0] == '1' else 0 #Sign bit pcOffset = ((int(offset, 2) - sign_subtract) * 2) + 2 #Add a plus if it's not negative for readability plus = '+' if sign_subtract == 0 else '' PC += 1 return jump_opcodes[condition] + ' ' + plus + hex(pcOffset) #Two-operand opcodes start at 4 (0b0100) two_word_opcodes = ['!!!', '!!!', '!!!', '!!!', 'mov', 'add', 'addc', 'subc', 'sub', 'cmp', 'dadd', 'bit', 'bic', 'bis', 'xor', 'and'] def disassemble_two_word_instruction(ins): """Given a two-operand instruction (format III) in a 16-bit string, output disassembly.""" global PC #Get PC bytemode = '.b' if ins[9] == '1' else '' opcodeID = int(ins[0:4], 2) opcode = two_word_opcodes[opcodeID] srcReg = int(ins[4:8], 2) srcAdrMode = int(ins[10:12], 2) reg_output_src, extWordSrc = disassemble_addressing_mode(srcReg, srcAdrMode) PC += 1 if extWordSrc else 0 dstReg = int(ins[12:], 2) dstAdrMode = int(ins[8], 2) reg_output_dst, ext_word_dst = disassemble_addressing_mode(dstReg, dstAdrMode) PC += 1 if ext_word_dst else 0 PC += 1 #Instruction word finalins = opcode + bytemode + ' ' + reg_output_src + ', ' + reg_output_dst #Disassemble pseudo (emulated) instructions #These are the easy ones to catch finalins = 'ret' if finalins == 'mov @sp+, pc' else finalins #Status register twiddling finalins = 'clrc' if finalins == 'bic #1, sr' else finalins finalins = 'setc' if finalins == 'bis #1, sr' else finalins finalins = 'clrz' if finalins == 'bic #2, sr' else finalins finalins = 'setz' if finalins == 'bis #2, sr' else finalins finalins = 'clrn' if finalins == 'bic #4, sr' else finalins finalins = 'setn' if finalins == 'bis #4, sr' else finalins finalins = 'dint' if finalins == 'bic #8, sr' else finalins finalins = 'eint' if finalins == 'bic #8, sr' else finalins #nop = mov dst, dst finalins = 'nop' if opcode == 'mov' and reg_output_src == reg_output_dst else finalins #These ones require a small amount of effort because it uses any register. #All of these are one-operand instructions, so if we need to reassemble #the instruction, it'll simply follow the one-operand format. reassembleins = True uses_dest = True #Branch. Requires a little bit of extra sanity checking #because it could get mistaken for ret if opcode == 'mov' and reg_output_dst == 'pc' and finalins != 'ret': #br = mov src, pc opcode = 'br' uses_dest = False #We're actually using src here #Pop. Could also get mistaken for ret. elif opcode == 'mov' and reg_output_src == '@sp+' and finalins != 'ret': #pop = mov @sp+, dst opcode = 'pop' #Shift and rotate left elif opcode == 'add' and srcReg == dstReg: #rla = add dst, dst opcode = 'rla' elif opcode == 'addc' and srcReg == dstReg: #rlc = addc dst, dst opcode = 'rlc' #Common one-operand instructions elif opcode == 'xor' and reg_output_src == '#0xffff {-1}': #inv = xor 0xffff, dst opcode = 'inv' #Extra sanity checking to prevent being mistaken for nop elif opcode == 'mov' and reg_output_src == '#0' and reg_output_dst != '#0': #clr = mov #0, dst opcode = 'clr' elif opcode == 'cmp' and reg_output_src == '#0': #tst = cmp #0, dst opcode = 'tst' #Increment and decrement (by one or two) elif opcode == 'sub' and reg_output_src == '#1': #dec = sub #1, dst opcode = 'dec' elif opcode == 'sub' and reg_output_src == '#2': #decd = sub #2, dst opcode = 'decd' elif opcode == 'add' and reg_output_src == '#1': #inc = add #1, dst opcode = 'inc' elif opcode == 'add' and reg_output_src == '#2': #incd = add #1, dst opcode = 'incd' #Add and subtract only the carry bit: elif opcode == 'addc' and reg_output_src == '#0': #adc = addc #0, dst opcode = 'adc' elif opcode == 'dadd' and reg_output_src == '#0': #dadc = dadd #0, dst opcode = 'dadc' elif opcode == 'subc' and reg_output_src == '#0': #sbc = subc #0, dst opcode = 'sbc' #The instruction is not an emulated instruction else: reassembleins = False if reassembleins: finalins = opcode + bytemode + ' ' + (reg_output_dst if uses_dest else reg_output_src) return finalins adr_modes = ['{register}', '{index}({register})', '@{register}', '@{register}+'] def disassemble_addressing_mode(reg, adrmode): """Outputs disassembly of a register's addressing mode and whether an extension word was used (to update PC accordingly in the calling function), given the register number and addressing mode number.""" #http://mspgcc.sourceforge.net/manual/x147.html extensionWord = False #print(f"{PC = :x}, {asm = }", end=""); #r2 (status register) and r3 (CG) are encoded as constant registers if reg == 2: if adrmode == 0: #Normal access reg_output = adr_modes[adrmode].format(register=register_names[reg]) elif adrmode == 1: #Absolute address using extension word reg_output = '&' + hex(asm[PC + 1]) #Get next word extensionWord = True elif adrmode == 2: reg_output = '#4' elif adrmode == 3: reg_output = '#8' elif reg == 3: if adrmode == 0: reg_output = '#0' elif adrmode == 1: reg_output = '#1' elif adrmode == 2: reg_output = '#2' elif adrmode == 3: #Just a little reminder that all bits set == -1 reg_output = '#0xffff {-1}' elif adrmode == 0: reg_output = adr_modes[adrmode].format(register=register_names[reg]) elif adrmode == 1: reg_output = adr_modes[adrmode].format(register=register_names[reg], index=hex(asm[PC + 1])) extensionWord = True elif adrmode == 2: reg_output = adr_modes[adrmode].format(register=register_names[reg]) elif adrmode == 3 and reg == 0: #PC was incremented for a constant reg_output = '#' + hex(asm[PC + 1]) extensionWord = True elif adrmode == 3: reg_output = adr_modes[adrmode].format(register=register_names[reg]) return (reg_output, extensionWord)