From 19e385a861934e6dba2d8e9c2e3186842a71b75f Mon Sep 17 00:00:00 2001 From: Val Date: Wed, 15 Mar 2023 23:57:47 -0500 Subject: [PATCH] MSProbe: As git submodule (this is so nice! Why did I not think of it?) --- .gitignore | 6 +- .gitmodules | 3 + 25-Halifax/MSProbe | 1 + 25-Halifax/assemble.py | 532 ----------------------------------------- 25-Halifax/halifax.py | 9 +- 5 files changed, 12 insertions(+), 539 deletions(-) create mode 100644 .gitmodules create mode 160000 25-Halifax/MSProbe delete mode 100644 25-Halifax/assemble.py diff --git a/.gitignore b/.gitignore index 4b10a4d..fda950b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,10 +6,8 @@ Saves **/obj **/*.out -# MSProbe - https://github.com/Swiftloke/MSProbe -MSProbe* -# 25-Halifax uses a modified MSProbe assemble.py to assemble the binary. ~~It's All Rights Reserved, so I can't distribute.~~ Distributed with permission from the author. -#assemble.py +# MSProbe-insgen +MSProbe-insgen* # Halifax binary dumps 25-Halifax/*.bin diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..6d8e528 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "25-Halifax/MSProbe"] + path = 25-Halifax/MSProbe + url = git@github.com:ValShaped/MSProbe.git diff --git a/25-Halifax/MSProbe b/25-Halifax/MSProbe new file mode 160000 index 0000000..f6644d4 --- /dev/null +++ b/25-Halifax/MSProbe @@ -0,0 +1 @@ +Subproject commit f6644d4010a987674461cca8ce039af59dd55d2e diff --git a/25-Halifax/assemble.py b/25-Halifax/assemble.py deleted file mode 100644 index b9b1a26..0000000 --- a/25-Halifax/assemble.py +++ /dev/null @@ -1,532 +0,0 @@ -# Taken from https://github.com/ValShaped/MSProbe -# Forked from https://github.com/Swiftloke/MSProbe -# © 2018-2023 Swiftloke - -import sys -import pdb -import re - -from typing import Callable - -jumpOpcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn', 'jge', 'jl', 'jmp'] -twoOpOpcodes = ['!!!', '!!!', '!!!', '!!!', 'mov', 'add', 'addc', 'subc', 'sub', 'cmp', 'dadd', 'bit', 'bic', 'bis', 'xor', 'and'] -oneOpOpcodes = ['rrc', 'swpb', 'rra', 'sxt', 'push', 'call', 'reti'] -emulatedOpcodes = { -'ret' : 'mov @sp+, pc', -'clrc' : 'bic #1, sr', -'setc' : 'bis #1, sr', -'clrz' : 'bic #2, sr', -'setz' : 'bis #2, sr', -'clrn' : 'bic #4, sr', -'setn' : 'bis #4, sr', -'dint' : 'bic #8, sr', -'eint' : 'bis #8, sr', -'nop' : 'mov r3, r3', #Any register would do the same -'br' : 'mov {reg}, pc', -'pop' : 'mov @sp+, {reg}', -'rla' : 'add {reg}, {reg}', -'rlc' : 'addc {reg}, {reg}', -'inv' : 'xor #0xffff, {reg}', -'clr' : 'mov #0, {reg}', -'tst' : 'cmp #0, {reg}', -'dec' : 'sub #1, {reg}', -'decd' : 'sub #2, {reg}', -'inc' : 'add #1, {reg}', -'incd' : 'add #2, {reg}', -'adc' : 'addc #0, {reg}', -'dadc' : 'dadd #0, {reg}', -'sbc' : 'subc #0, {reg}', -'jnc' : 'jlo {reg}', #jlo, jhs are aliases of jnc, jc -'jnz' : 'jne {reg}', #jnz, jz are aliases of jne, jeq -'jc' : 'jhs {reg}', -'jz' : 'jeq {reg}', -} - -def bitrep(number, bits = 16): - """Converts to binary form, fixing leading zeroes.""" - mask = int('0b' + '1' * bits, 2) - binstr = str(bin(number & mask))[2:] - #negative = binstr[0] == '-' - bitcount = len(binstr) - leading0s = bits - bitcount - return ('0' * leading0s) + binstr - -def hexrep(number, zeroes = 4): - """Converts to hex form, fixing leading zeroes.""" - mask = int('0b' + '1' * (zeroes * 4), 2) - hexstr = hex(number & mask)[2:] - hexcount = len(hexstr) - leading0s = zeroes - hexcount - return ('0' * leading0s) + hexstr - -def highlight(string: str, substring: str) -> str: - """Highlight a substring in a string""" - return string.replace(substring, f"\033[4m{substring}\033[0m") if substring else string - -class AssemblyError(Exception): - """ - The base class for all Assembly Exceptions - """ - def __init__(self, name: str, reason: str) -> None: - self.type = "Improperly defined AssemblyError" - self.name = name - self.reason = reason - -class OpcodeError(AssemblyError): - """ - `OpcodeError` is raised when an opcode mnemonic is not found in the opcode map - """ - def __init__(self, opcode, reason = "Opcode not found in opcode map."): - super().__init__(name=opcode, reason=reason) - self.type = "Invalid opcode mnemonic" - -class RedefinedLabelError(AssemblyError): - """ - `RedefinedLabelError` is raised when a label is defined multiple times in the same source file. - Since labels are resolved after compilation, it cannot be known whether you intend to reference a past - or future definition of a label. - """ - def __init__(self, label, reason = "Label already defined."): - super().__init__(name=label, reason=reason) - self.type = "Redefined Label" - -class UndefinedLabelError(AssemblyError): - """ - `UndefinedLabelError` is raised when a label used in a jump instruction is not defined in the source - """ - def __init__(self, operand: str, reason: str): - super().__init__(name=operand, reason=reason) - self.type = "Undefined label" - -class AddressingModeError(AssemblyError): - """ - `AddressingModeError` is raised when the operand of an instruction is specified with an - unrepresentable addressing mode. - """ - def __init__(self, operand: str, reason: str): - super().__init__(name=operand, reason=reason) - self.type = "Invalid addressing mode" - -class JumpOffsetError(AssemblyError): - """ - `JumpOffsetError` is raised when a jump offset cannot be encoded. - Jump offsets are a 12 bit signed integer representing the number of processor words to jump. - As such, they can only encode jump offsets from -0x3fe to +0x400 - """ - def __init__(self, offset: str, reason: str): - super().__init__(name=offset, reason=reason) - self.type = "Invalid jump offset" - -class RegisterError(AssemblyError): - """ - `RegisterError` is raised when a register isn't one of - [`pc`, `sp`, `sr`, `cg`, `r0`, ..., `r15`] - """ - def __init__(self, register: str, reason: str = "Valid registers are pc, sp, sr, cg, or r0-r15."): - super().__init__(name=register, reason=reason) - self.type = "Invalid register mnemonic" - -preprocessor = [] -""" -`preprocessorHooks` are functions which take a line from the source file, and return a line. -All registered hooks are called for each line of the source file. - -Registering a `preprocessorHook` shall be done through the `registerPreprocessorHook` function. - -Their signature is as follows: -```py -hook(instruction_line: str) -> str: -``` -""" - -postprocessor = [] -""" -postprocessorHooks are functions which act on the output stream as a monolithic entity. -Each postprocessorHook is called exactly once per source file, after assembly and before output. - -Registering a `postprocessorHook` shall be done through the `registerPostprocessorHook` function. - -Their signature is as follows: -```py -hook(): -""" - -PC = 0 #Incremented by each instruction, incremented in words NOT bytes -labels = {} #Label name and its PC location -""" -`labels` are a label name, followed by a the address of the label relative to the loadaddr -""" -jumps = {} #PC location of jump and its corresponding label -""" -`jumps` are the address of a jump instruction and its corresponding label -During jump resolution, each jump in jumps is modified with a relative offset -Example jump: -{0: "loop"} -""" -output = [] #Output hex - -def asmMain(asm_file, outfile=None, silent=False): - line_number = 0 - global PC #Get PC - - outFP = open(outfile, 'w') if outfile else None - - if not asm_file: - #Provide a prompt for entry - instructions = '' - ins = '' - print('Input assembly. Terminate input with the ".end" directive, or Ctrl+D (EOF).') - while True: - ins = sys.stdin.readline() - if ins == '.end\n' or ins == '': - break - instructions = instructions + ins - else: - with open(asm_file) as fp: - instructions = fp.read() - - - for ins in instructions.splitlines(): - #Strip leading and trailing whitespace - ins = ins.strip() - ins = re.split(r'\s*[/;]', ins)[0] #Remove comments - #Skip empty lines or lines beginning with a comment - if len(ins) == 0 or ins.startswith((';', '//')): - continue - - #Handle .directives - if ins.startswith('.'): - if ins.startswith(".define"): - registerDefine(ins) - #Allow passing the .end directive in input files, for compatibility with stdin input - if ins.startswith(".end"): - break - continue - - #Handle preprocessor substitution hooks - for hook in preprocessor: - ins = hook(ins) - - #Handle label registration - if ':' in ins: - try: - registerLabel(ins) - except RedefinedLabelError as exp: - print('Label "' + exp.label + '" at line number ' + str(line_number + 1) + ' already defined') - sys.exit(-1) - else: - try: - assemble(ins) - except AssemblyError as exp: - ins = highlight(ins, exp.name) - print(f'{exp.type} found on line {line_number + 1}: "{ins}"\n{exp.reason}') - sys.exit(-1) - - line_number += 1 - - #Handle postprocessor hooks. - #These functions manipulate the raw output data, and perform tasks such as link resolution - for hook in postprocessor: - hook() - - #Output the object as hex - for i in output: - if not silent: - print(hexrep(i), end='', file=sys.stdout)# + ' (' + bitrep(i, 16) + ')') - if outFP: - print(hexrep(i), end='', file=outFP) - if not silent: - print('') #End hex representation with a newline - if outFP: - outFP.close() - -def registerPreprocessorHook(hook: Callable): - if hook not in preprocessor: - preprocessor.append(hook) - -def registerPostprocessorHook(hook: Callable): - if hook not in postprocessor: - postprocessor.append(hook) - -def processDirectives(ins: str) -> str: - pass - -def resolveJumps(): - """Resolve pending jumps in the jumps list""" - global labels, jumps, output - #Resolve jump labels - for pc, label in jumps.items(): - try: - labelpos = labels[label] - except KeyError: - print(f'Label "{label}" does not exist, but a jump instruction attempts to jump to it') - sys.exit(-1) - #Modify the jump instruction - #Get in little-endian format - ins = hexrep(output[pc]) - ins = int(ins[2:4] + ins[0:2], 16) - ins = [bit for bit in bitrep(ins, 16)] - offset = (labelpos - pc) * 2 #Words versus bytes - #Jump offsets are multiplied by two, added by two (PC increment), and sign extendedB - ins[6:] = bitrep((offset - 2) // 2, 10) - #Output again in little endian - strword = hexrep(int(''.join(str(e) for e in ins), 2), 4) - output[pc] = int(strword[2:] + strword[0:2], 16) - -#TODO: Resolve labels in calls - -def registerLabel(ins: str): - """Registers a label for later replacement""" - global labels #Get labels - global PC #Get PC - label, addr = ins.split(sep=':') - if label in labels: - raise RedefinedLabelError(label) - labels[label] = int(addr) if addr != '' else PC - -# -- Defines -- -def resolveDefines(ins: str) -> str: - global defines - for define in defines: - ins = ins.replace(define, defines[define]) - return ins - -def registerDefine(ins: str): - """ - Registers a define for replacement on subsequent lines - A define is of format - ```asm - .define identifier text... - """ - global defines, preprocessor - if 'defines' not in globals(): - defines = {} - #Define is of format .define [identifier] [any text] - #Space(s) not required, but if spaces are not used, ':' or '=' must be used in its place - define: tuple = re.match(r'.define\s*(\w+)[\s:=]+(.*)\s*', ins).groups() - if define != (): - label, replacement = define - defines[label] = replacement - registerPreprocessorHook(resolveDefines) - -def registerJumpInstruction(PC, label): - """Defer jump offset calculation until labels are defined""" - global jumps #Get jump instructions - jumps[PC] = label - registerPostprocessorHook(resolveJumps) - -def assemble(ins): - """Assemble a single instruction, and append results to the output stream.""" - opcode, notUsed = getOpcode(ins) - if opcode in jumpOpcodes: - return assembleJumpInstruction(ins) - elif opcode in oneOpOpcodes: - return assembleOneOpInstruction(ins) - elif opcode in twoOpOpcodes: - return assembleTwoOpInstruction(ins) - elif opcode in emulatedOpcodes: - return assembleEmulatedInstruction(ins) - else: - raise OpcodeError(opcode) - -def assembleEmulatedInstruction(ins): - """Assembles a zero- or one-operand 'emulated' instruction.""" - #Emulated instructions are either zero or one operand instructions. - opcode, notUsed = getOpcode(ins) - if '{reg}' in emulatedOpcodes[opcode]: - register = ins[ins.find(' ') + 1 : ] - ins = emulatedOpcodes[opcode].format(reg=register) - else: - ins = emulatedOpcodes[opcode] - return assemble(ins) - -def assembleOneOpInstruction(ins): - """Assembles a one-operand (format I) instruction.""" - out = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - out[0:6] = '000100' #One op identifier - - opcode, byteMode = getOpcode(ins) - out[6:9] = bitrep(oneOpOpcodes.index(opcode), 3) - out[9] = bitrep(byteMode, 1) - - #Figure out where the operand is - start = ins.find(' ') + 1 - reg = ins[start :] - - #We need to provide the opcode here to detect the push bug; see the function itself - extensionWord, adrmode, regID = assembleRegister(reg, opcode=opcode) - - out[10:12] = bitrep(adrmode, 2) - out[12:] = bitrep(regID, 4) - appendWord(int(''.join(str(e) for e in out), 2)) - if extensionWord: - appendWord(int(extensionWord, 16)) - -def assembleTwoOpInstruction(ins): - """Assembles a two-operand (format III) instruction.""" - out = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - - opcode, byteMode = getOpcode(ins) - out[0:4] = bitrep(twoOpOpcodes.index(opcode), 4) - out[9] = bitrep(byteMode, 1) - - #Find the location of the first operand - start = ins.find(' ') + 1 - end = ins.find(',') - regSrc = ins[start : end] - - extensionWordSrc, adrmodeSrc, regIDSrc = assembleRegister(regSrc) - - out[10:12] = bitrep(adrmodeSrc, 2) - out[4:8] = bitrep(regIDSrc, 4) - - #Figure out where the comment is - start = end + 2 #Right after the comma, and the space after the comma - regDest = ins[start :] - - extensionWordDest, adrmodeDest, regIDDest = assembleRegister(regDest, isDestReg = True) - - out[8] = bitrep(adrmodeDest, 1) - out[12:] = bitrep(regIDDest, 4) - - appendWord(int(''.join(str(e) for e in out), 2)) - if extensionWordSrc: - appendWord(int(extensionWordSrc, 16)) - if extensionWordDest: - appendWord(int(extensionWordDest, 16)) - -def assembleJumpInstruction(ins): - """Assembles a jump instruction. If the offset is supplied, it is assembled - immediately. Otherwise, if a label is provided, resolution of the offset is delayed - so that all labels can be read (including those further ahead in the instruction stream).""" - out = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - out[0:3] = '001' #Jump identifier - opcode, byteMode = getOpcode(ins) - - if byteMode: #Cannot have "jmp.b", how does that even make sense - raise OpcodeError(opcode + '.b') - - out[3:6] = bitrep(jumpOpcodes.index(opcode), 3) - - #Figure out where the operand is - start = ins.find(' ') + 1 - dest = ''.join(ins[start :].split()) #Remove whitespace - - #Immediate offset - char1 = dest[0] - #Is this a number? - if re.match(r'[+\-]?[0x|0b]?[0-9A-Fa-f]+', dest): - offset = int(dest, 16) - if offset % 2 != 0: - raise JumpOffsetError(dest, "Jump offset cannot be odd.") - if offset <= -0x3fe or offset >= 0x400: - raise JumpOffsetError(dest, "Jump offset out of range. Range is -3fe bytes through +400 bytes.") - #Jump offsets are multiplied by two, added by two (PC increment), and sign extended - out[6:] = bitrep((offset - 2) // 2, 10) - else: - registerJumpInstruction(PC, dest) - - appendWord(int(''.join(str(e) for e in out), 2)) - - - -def getRegister(registerName: str): - """Decodes special register names (or normal register names).""" - registerName = registerName.strip().lower() #Strip leading and trailing whitespace, and convert to lowercase - specialRegisterNames = {'pc': 0, 'sp': 1, 'sr': 2, 'cg': 3} - if registerName in specialRegisterNames: - return specialRegisterNames[registerName] - elif registerName.startswith('r'): - #FIXME: this allows registers with any integer name - return int(registerName[1:]) #Remove 'r' - else: - raise RegisterError(registerName) - -def getOpcode(ins: str): - """Returns the opcode and whether byte mode is being used.""" - #Split the opcode on characters that can't be used in an identifier - #Example: [mov].b r15, r15 - opcode = re.split(r'[\.\W]', ins)[0] - byteMode = False - if '.b' in ins: - byteMode = True - return opcode, byteMode - -def appendWord(word: int): - """Add a word to the output instruction stream, handling little endian format.""" - global PC #Get PC - global output #Get output - #Append in little-endian format - strword = hexrep(word, 4) - output.append(int(strword[2:] + strword[0:2], 16)) - PC += 1 - -def assembleRegister(reg: str, opcode=None, isDestReg = False): - """Assembles an operand, returning the extension word used (if applicable), - the addressing mode, and the register ID.""" - extensionWord = None - adrmode = 0 - regID = 0 - - if '(' in reg: #Indexed mode (mode 1) - extensionWord = reg[0 : reg.find('(')] - adrmode = 1 - regID = getRegister(reg[reg.find('(') + 1 : reg.find(')')]) - elif '@' in reg and '+' in reg: #Indirect with post-increment mode (mode 3) - #Destinations don't support indirect or indirect + post-increment. - if isDestReg: - raise AddressingModeError(reg, - 'Cannot use indirect with post-increment form for destination register.') - adrmode = 3 - regID = getRegister(reg[reg.find('@') + 1 : reg.find('+')]) - elif '@' in reg: #Indirect mode (mode 2) - #Destinations don't support indirect or indirect + post-increment. - #Indirect can be faked with an index of 0. What a waste. - if isDestReg: - adrmode = 1 - extensionWord = 0 - else: - adrmode = 2 - regID = getRegister(reg[reg.find('@') + 1 : ]) - elif '#' in reg: #Use PC to specify an immediate constant - if isDestReg: - raise AddressingModeError(reg, - 'Because immediates are encoded as @pc+, immediates cannot be used for ' + - 'destinations.\nConsider using &dest absolute addressing form instead.') - adrmode = 3 - regID = 0 - constant = reg[reg.find('#') + 1 :].strip() - - #This might be an immediate constant supported by the hardware - - #A CPU bug prevents push #4 and push #8 with r2/SR encoding from working, - #so one must simply use a 16-bit immediate there (what a waste, again) - if constant == '4' and opcode != 'push': - regID = 2 - adrmode = 2 - elif constant == '8' and opcode != 'push': - regID = 2 - adrmode = 3 - elif constant == '0': - regID = 3 - adrmode = 0 - elif constant == '1': - regID = 3 - adrmode = 1 - elif constant == '2': - regID = 3 - adrmode = 2 - elif constant == '-1' or constant.lower() == '0xffff': - regID = 3 - adrmode = 3 - else: - extensionWord = constant - elif '&' in reg: #Direct addressing. An extension word is fetched and used as the raw address. - regID = 2 - adrmode = 1 - extensionWord = reg[reg.find('&') + 1 : ] - else: #Regular register access (mode 0) - adrmode = 0 - regID = getRegister(reg) - - return extensionWord, adrmode, regID diff --git a/25-Halifax/halifax.py b/25-Halifax/halifax.py index 4a0f76a..62b367a 100644 --- a/25-Halifax/halifax.py +++ b/25-Halifax/halifax.py @@ -2,7 +2,8 @@ import re, os, sys from hashlib import sha256 -from assemble import asmMain +# This program uses an extended version of Swiftloke's brilliant MSProbe to assemble the payload +from MSProbe.assemble import asmMain # match this many hexadigits # must be corroborated within the script @@ -16,14 +17,16 @@ if len(sys.argv) > 1: shellcode_out = f'{shellcode_asm}.tmp' -#Compile shellcode w/ msprobe +# Compile shellcode w/ msprobe asmMain(shellcode_asm, shellcode_out, silent=True) +# Read compiled output with open(shellcode_out) as file: shellcode = file.readline() - shellcode_len = len(bytes.fromhex(shellcode)); os.remove(shellcode_out) +shellcode_len = len(bytes.fromhex(shellcode)); +# Print formatted payload as hex print(f"6000{shellcode_len:x}{shellcode}") def main():