From 19e385a861934e6dba2d8e9c2e3186842a71b75f Mon Sep 17 00:00:00 2001
From: Val <val@soft.fish>
Date: Wed, 15 Mar 2023 23:57:47 -0500
Subject: [PATCH] MSProbe: As git submodule (this is so nice! Why did I not
 think of it?)

---
 .gitignore             |   6 +-
 .gitmodules            |   3 +
 25-Halifax/MSProbe     |   1 +
 25-Halifax/assemble.py | 532 -----------------------------------------
 25-Halifax/halifax.py  |   9 +-
 5 files changed, 12 insertions(+), 539 deletions(-)
 create mode 100644 .gitmodules
 create mode 160000 25-Halifax/MSProbe
 delete mode 100644 25-Halifax/assemble.py

diff --git a/.gitignore b/.gitignore
index 4b10a4d..fda950b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,10 +6,8 @@ Saves
 **/obj
 **/*.out
 
-# MSProbe - https://github.com/Swiftloke/MSProbe
-MSProbe*
-# 25-Halifax uses a modified MSProbe assemble.py to assemble the binary. ~~It's All Rights Reserved, so I can't distribute.~~ Distributed with permission from the author.
-#assemble.py
+# MSProbe-insgen
+MSProbe-insgen*
 
 # Halifax binary dumps
 25-Halifax/*.bin
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..6d8e528
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "25-Halifax/MSProbe"]
+	path = 25-Halifax/MSProbe
+	url = git@github.com:ValShaped/MSProbe.git
diff --git a/25-Halifax/MSProbe b/25-Halifax/MSProbe
new file mode 160000
index 0000000..f6644d4
--- /dev/null
+++ b/25-Halifax/MSProbe
@@ -0,0 +1 @@
+Subproject commit f6644d4010a987674461cca8ce039af59dd55d2e
diff --git a/25-Halifax/assemble.py b/25-Halifax/assemble.py
deleted file mode 100644
index b9b1a26..0000000
--- a/25-Halifax/assemble.py
+++ /dev/null
@@ -1,532 +0,0 @@
-# Taken from https://github.com/ValShaped/MSProbe
-# Forked from https://github.com/Swiftloke/MSProbe
-# © 2018-2023 Swiftloke
-
-import sys
-import pdb
-import re
-
-from typing import Callable
-
-jumpOpcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn', 'jge', 'jl', 'jmp']
-twoOpOpcodes = ['!!!', '!!!', '!!!', '!!!', 'mov', 'add', 'addc', 'subc', 'sub', 'cmp', 'dadd', 'bit', 'bic', 'bis', 'xor', 'and']
-oneOpOpcodes = ['rrc', 'swpb', 'rra', 'sxt', 'push', 'call', 'reti']
-emulatedOpcodes = {
-'ret' : 'mov @sp+, pc',
-'clrc' : 'bic #1, sr',
-'setc' : 'bis #1, sr',
-'clrz' : 'bic #2, sr',
-'setz' : 'bis #2, sr',
-'clrn' : 'bic #4, sr',
-'setn' : 'bis #4, sr',
-'dint' : 'bic #8, sr',
-'eint' : 'bis #8, sr',
-'nop'  : 'mov r3, r3', #Any register would do the same
-'br'   : 'mov {reg}, pc',
-'pop'  : 'mov @sp+, {reg}',
-'rla'  : 'add {reg}, {reg}',
-'rlc'  : 'addc {reg}, {reg}',
-'inv'  : 'xor #0xffff, {reg}',
-'clr'  : 'mov #0, {reg}',
-'tst'  : 'cmp #0, {reg}',
-'dec'  : 'sub #1, {reg}',
-'decd' : 'sub #2, {reg}',
-'inc'  : 'add #1, {reg}',
-'incd' : 'add #2, {reg}',
-'adc'  : 'addc #0, {reg}',
-'dadc' : 'dadd #0, {reg}',
-'sbc'  : 'subc #0, {reg}',
-'jnc'  : 'jlo {reg}', #jlo, jhs are aliases of jnc, jc
-'jnz'  : 'jne {reg}', #jnz, jz are aliases of jne, jeq
-'jc'   : 'jhs {reg}',
-'jz'   : 'jeq {reg}',
-}
-
-def bitrep(number, bits = 16):
-	"""Converts to binary form, fixing leading zeroes."""
-	mask = int('0b' + '1' * bits, 2)
-	binstr = str(bin(number & mask))[2:]
-	#negative = binstr[0] == '-'
-	bitcount = len(binstr)
-	leading0s = bits - bitcount
-	return ('0' * leading0s) + binstr
-
-def hexrep(number, zeroes = 4):
-	"""Converts to hex form, fixing leading zeroes."""
-	mask = int('0b' + '1' * (zeroes * 4), 2)
-	hexstr = hex(number & mask)[2:]
-	hexcount = len(hexstr)
-	leading0s = zeroes - hexcount
-	return ('0' * leading0s) + hexstr
-
-def highlight(string: str, substring: str) -> str:
-	"""Highlight a substring in a string"""
-	return string.replace(substring, f"\033[4m{substring}\033[0m") if substring else string
-
-class AssemblyError(Exception):
-	"""
-	The base class for all Assembly Exceptions
-	"""
-	def __init__(self, name: str, reason: str) -> None:
-		self.type = "Improperly defined AssemblyError"
-		self.name = name
-		self.reason = reason
-
-class OpcodeError(AssemblyError):
-	"""
-	`OpcodeError` is raised when an opcode mnemonic is not found in the opcode map
-	"""
-	def __init__(self, opcode, reason = "Opcode not found in opcode map."):
-		super().__init__(name=opcode, reason=reason)
-		self.type = "Invalid opcode mnemonic"
-
-class RedefinedLabelError(AssemblyError):
-	"""
-	`RedefinedLabelError` is raised when a label is defined multiple times in the same source file.
-	Since labels are resolved after compilation, it cannot be known whether you intend to reference a past
-	or future definition of a label.
-	"""
-	def __init__(self, label, reason = "Label already defined."):
-		super().__init__(name=label, reason=reason)
-		self.type = "Redefined Label"
-
-class UndefinedLabelError(AssemblyError):
-	"""
-	`UndefinedLabelError` is raised when a label used in a jump instruction is not defined in the source
-	"""
-	def __init__(self, operand: str, reason: str):
-		super().__init__(name=operand, reason=reason)
-		self.type = "Undefined label"
-
-class AddressingModeError(AssemblyError):
-	"""
-	`AddressingModeError` is raised when the operand of an instruction is specified with an
-	unrepresentable addressing mode.
-	"""
-	def __init__(self, operand: str, reason: str):
-		super().__init__(name=operand, reason=reason)
-		self.type = "Invalid addressing mode"
-
-class JumpOffsetError(AssemblyError):
-	"""
-	`JumpOffsetError` is raised when a jump offset cannot be encoded.
-	Jump offsets are a 12 bit signed integer representing the number of processor words to jump.
-	As such, they can only encode jump offsets from -0x3fe to +0x400
-	"""
-	def __init__(self, offset: str, reason: str):
-		super().__init__(name=offset, reason=reason)
-		self.type = "Invalid jump offset"
-
-class RegisterError(AssemblyError):
-	"""
-	`RegisterError` is raised when a register isn't one of
-	[`pc`, `sp`, `sr`, `cg`, `r0`, ..., `r15`]
-	"""
-	def __init__(self, register: str, reason: str = "Valid registers are pc, sp, sr, cg, or r0-r15."):
-		super().__init__(name=register, reason=reason)
-		self.type = "Invalid register mnemonic"
-
-preprocessor = []
-"""
-`preprocessorHooks` are functions which take a line from the source file, and return a line.
-All registered hooks are called for each line of the source file.
-
-Registering a `preprocessorHook` shall be done through the `registerPreprocessorHook` function.
-
-Their signature is as follows:
-```py
-hook(instruction_line: str) -> str:
-```
-"""
-
-postprocessor = []
-"""
-postprocessorHooks are functions which act on the output stream as a monolithic entity.
-Each postprocessorHook is called exactly once per source file, after assembly and before output.
-
-Registering a `postprocessorHook` shall be done through the `registerPostprocessorHook` function.
-
-Their signature is as follows:
-```py
-hook():
-"""
-
-PC = 0  #Incremented by each instruction, incremented in words NOT bytes
-labels = {} #Label name and its PC location
-"""
-`labels` are a label name, followed by a the address of the label relative to the loadaddr
-"""
-jumps = {} #PC location of jump and its corresponding label
-"""
-`jumps` are the address of a jump instruction and its corresponding label
-During jump resolution, each jump in jumps is modified with a relative offset
-Example jump:
-{0: "loop"}
-"""
-output = [] #Output hex
-
-def asmMain(asm_file, outfile=None, silent=False):
-	line_number = 0
-	global PC #Get PC
-
-	outFP = open(outfile, 'w') if outfile else None
-
-	if not asm_file:
-		#Provide a prompt for entry
-		instructions = ''
-		ins = ''
-		print('Input assembly. Terminate input with the ".end" directive, or Ctrl+D (EOF).')
-		while True:
-			ins = sys.stdin.readline()
-			if ins == '.end\n' or ins == '':
-				break
-			instructions = instructions + ins
-	else:
-		with open(asm_file) as fp:
-			instructions = fp.read()
-
-
-	for ins in instructions.splitlines():
-		#Strip leading and trailing whitespace
-		ins = ins.strip()
-		ins = re.split(r'\s*[/;]', ins)[0] #Remove comments
-		#Skip empty lines or lines beginning with a comment
-		if len(ins) == 0 or ins.startswith((';', '//')):
-			continue
-
-		#Handle .directives
-		if ins.startswith('.'):
-			if ins.startswith(".define"):
-				registerDefine(ins)
-			#Allow passing the .end directive in input files, for compatibility with stdin input
-			if ins.startswith(".end"):
-				break
-			continue
-
-		#Handle preprocessor substitution hooks
-		for hook in preprocessor:
-			ins = hook(ins)
-
-		#Handle label registration
-		if ':' in ins:
-			try:
-				registerLabel(ins)
-			except RedefinedLabelError as exp:
-				print('Label "' + exp.label + '" at line number ' + str(line_number + 1) + ' already defined')
-				sys.exit(-1)
-		else:
-			try:
-				assemble(ins)
-			except AssemblyError as exp:
-				ins = highlight(ins, exp.name)
-				print(f'{exp.type} found on line {line_number + 1}: "{ins}"\n{exp.reason}')
-				sys.exit(-1)
-
-		line_number += 1
-
-	#Handle postprocessor hooks.
-	#These functions manipulate the raw output data, and perform tasks such as link resolution
-	for hook in postprocessor:
-		hook()
-
-	#Output the object as hex
-	for i in output:
-		if not silent:
-			print(hexrep(i), end='', file=sys.stdout)# + ' (' + bitrep(i, 16) + ')')
-		if outFP:
-			print(hexrep(i), end='', file=outFP)
-	if not silent:
-		print('') #End hex representation with a newline
-	if outFP:
-		outFP.close()
-
-def registerPreprocessorHook(hook: Callable):
-	if hook not in preprocessor:
-		preprocessor.append(hook)
-
-def registerPostprocessorHook(hook: Callable):
-	if hook not in postprocessor:
-		postprocessor.append(hook)
-
-def processDirectives(ins: str) -> str:
-	pass
-
-def resolveJumps():
-	"""Resolve pending jumps in the jumps list"""
-	global labels, jumps, output
-	#Resolve jump labels
-	for pc, label in jumps.items():
-		try:
-			labelpos = labels[label]
-		except KeyError:
-			print(f'Label "{label}" does not exist, but a jump instruction attempts to jump to it')
-			sys.exit(-1)
-		#Modify the jump instruction
-		#Get in little-endian format
-		ins = hexrep(output[pc])
-		ins = int(ins[2:4] + ins[0:2], 16)
-		ins = [bit for bit in bitrep(ins, 16)]
-		offset = (labelpos - pc) * 2 #Words versus bytes
-		#Jump offsets are multiplied by two, added by two (PC increment), and sign extendedB
-		ins[6:] = bitrep((offset - 2) // 2, 10)
-		#Output again in little endian
-		strword = hexrep(int(''.join(str(e) for e in ins), 2), 4)
-		output[pc] = int(strword[2:] + strword[0:2], 16)
-
-#TODO: Resolve labels in calls
-
-def registerLabel(ins: str):
-	"""Registers a label for later replacement"""
-	global labels #Get labels
-	global PC #Get PC
-	label, addr = ins.split(sep=':')
-	if label in labels:
-		raise RedefinedLabelError(label)
-	labels[label] = int(addr) if addr != '' else PC
-
-# -- Defines --
-def resolveDefines(ins: str) -> str:
-	global defines
-	for define in defines:
-		ins = ins.replace(define, defines[define])
-	return ins
-
-def registerDefine(ins: str):
-	"""
-	Registers a define for replacement on subsequent lines
-	A define is of format
-	```asm
-	.define identifier text...
-	"""
-	global defines, preprocessor
-	if 'defines' not in globals():
-		defines = {}
-	#Define is of format .define [identifier] [any text]
-	#Space(s) not required, but if spaces are not used, ':' or '=' must be used in its place
-	define: tuple = re.match(r'.define\s*(\w+)[\s:=]+(.*)\s*', ins).groups()
-	if define != ():
-		label, replacement = define
-		defines[label] = replacement
-		registerPreprocessorHook(resolveDefines)
-
-def registerJumpInstruction(PC, label):
-	"""Defer jump offset calculation until labels are defined"""
-	global jumps #Get jump instructions
-	jumps[PC] = label
-	registerPostprocessorHook(resolveJumps)
-
-def assemble(ins):
-	"""Assemble a single instruction, and append results to the output stream."""
-	opcode, notUsed = getOpcode(ins)
-	if opcode in jumpOpcodes:
-		return assembleJumpInstruction(ins)
-	elif opcode in oneOpOpcodes:
-		return assembleOneOpInstruction(ins)
-	elif opcode in twoOpOpcodes:
-		return assembleTwoOpInstruction(ins)
-	elif opcode in emulatedOpcodes:
-		return assembleEmulatedInstruction(ins)
-	else:
-		raise OpcodeError(opcode)
-
-def assembleEmulatedInstruction(ins):
-	"""Assembles a zero- or one-operand 'emulated' instruction."""
-	#Emulated instructions are either zero or one operand instructions.
-	opcode, notUsed = getOpcode(ins)
-	if '{reg}' in emulatedOpcodes[opcode]:
-		register = ins[ins.find(' ') + 1 : ]
-		ins = emulatedOpcodes[opcode].format(reg=register)
-	else:
-		ins = emulatedOpcodes[opcode]
-	return assemble(ins)
-
-def assembleOneOpInstruction(ins):
-	"""Assembles a one-operand (format I) instruction."""
-	out = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-	out[0:6] = '000100' #One op identifier
-
-	opcode, byteMode = getOpcode(ins)
-	out[6:9] = bitrep(oneOpOpcodes.index(opcode), 3)
-	out[9] = bitrep(byteMode, 1)
-
-	#Figure out where the operand is
-	start = ins.find(' ') + 1
-	reg = ins[start :]
-
-	#We need to provide the opcode here to detect the push bug; see the function itself
-	extensionWord, adrmode, regID = assembleRegister(reg, opcode=opcode)
-
-	out[10:12] = bitrep(adrmode, 2)
-	out[12:] = bitrep(regID, 4)
-	appendWord(int(''.join(str(e) for e in out), 2))
-	if extensionWord:
-		appendWord(int(extensionWord, 16))
-
-def assembleTwoOpInstruction(ins):
-	"""Assembles a two-operand (format III) instruction."""
-	out = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-
-	opcode, byteMode = getOpcode(ins)
-	out[0:4] = bitrep(twoOpOpcodes.index(opcode), 4)
-	out[9] = bitrep(byteMode, 1)
-
-	#Find the location of the first operand
-	start = ins.find(' ') + 1
-	end = ins.find(',')
-	regSrc = ins[start : end]
-
-	extensionWordSrc, adrmodeSrc, regIDSrc = assembleRegister(regSrc)
-
-	out[10:12] = bitrep(adrmodeSrc, 2)
-	out[4:8] = bitrep(regIDSrc, 4)
-
-	#Figure out where the comment is
-	start = end + 2 #Right after the comma, and the space after the comma
-	regDest = ins[start :]
-
-	extensionWordDest, adrmodeDest, regIDDest = assembleRegister(regDest, isDestReg = True)
-
-	out[8] = bitrep(adrmodeDest, 1)
-	out[12:] = bitrep(regIDDest, 4)
-
-	appendWord(int(''.join(str(e) for e in out), 2))
-	if extensionWordSrc:
-		appendWord(int(extensionWordSrc, 16))
-	if extensionWordDest:
-		appendWord(int(extensionWordDest, 16))
-
-def assembleJumpInstruction(ins):
-	"""Assembles a jump instruction. If the offset is supplied, it is assembled
-	immediately. Otherwise, if a label is provided, resolution of the offset is delayed
-	so that all labels can be read (including those further ahead in the instruction stream)."""
-	out = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-	out[0:3] = '001' #Jump identifier
-	opcode, byteMode = getOpcode(ins)
-
-	if byteMode: #Cannot have "jmp.b", how does that even make sense
-		raise OpcodeError(opcode + '.b')
-
-	out[3:6] = bitrep(jumpOpcodes.index(opcode), 3)
-
-	#Figure out where the operand is
-	start = ins.find(' ') + 1
-	dest = ''.join(ins[start :].split()) #Remove whitespace
-
-	#Immediate offset
-	char1 = dest[0]
-	#Is this a number?
-	if re.match(r'[+\-]?[0x|0b]?[0-9A-Fa-f]+', dest):
-		offset = int(dest, 16)
-		if offset % 2 != 0:
-			raise JumpOffsetError(dest, "Jump offset cannot be odd.")
-		if offset <= -0x3fe or offset >= 0x400:
-			raise JumpOffsetError(dest, "Jump offset out of range. Range is -3fe bytes through +400 bytes.")
-		#Jump offsets are multiplied by two, added by two (PC increment), and sign extended
-		out[6:] = bitrep((offset - 2) // 2, 10)
-	else:
-		registerJumpInstruction(PC, dest)
-
-	appendWord(int(''.join(str(e) for e in out), 2))
-
-
-
-def getRegister(registerName: str):
-	"""Decodes special register names (or normal register names)."""
-	registerName = registerName.strip().lower() #Strip leading and trailing whitespace, and convert to lowercase
-	specialRegisterNames = {'pc': 0, 'sp': 1, 'sr': 2, 'cg': 3}
-	if registerName in specialRegisterNames:
-		return specialRegisterNames[registerName]
-	elif registerName.startswith('r'):
-		#FIXME: this allows registers with any integer name
-		return int(registerName[1:]) #Remove 'r'
-	else:
-		raise RegisterError(registerName)
-
-def getOpcode(ins: str):
-	"""Returns the opcode and whether byte mode is being used."""
-	#Split the opcode on characters that can't be used in an identifier
-	#Example: [mov].b r15, r15
-	opcode = re.split(r'[\.\W]', ins)[0]
-	byteMode = False
-	if '.b' in ins:
-		byteMode = True
-	return opcode, byteMode
-
-def appendWord(word: int):
-	"""Add a word to the output instruction stream, handling little endian format."""
-	global PC #Get PC
-	global output #Get output
-	#Append in little-endian format
-	strword = hexrep(word, 4)
-	output.append(int(strword[2:] + strword[0:2], 16))
-	PC += 1
-
-def assembleRegister(reg: str, opcode=None, isDestReg = False):
-	"""Assembles an operand, returning the extension word used (if applicable),
-	the addressing mode, and the register ID."""
-	extensionWord = None
-	adrmode = 0
-	regID = 0
-
-	if '(' in reg: #Indexed mode (mode 1)
-		extensionWord = reg[0 : reg.find('(')]
-		adrmode = 1
-		regID = getRegister(reg[reg.find('(') + 1 : reg.find(')')])
-	elif '@' in reg and '+' in reg: #Indirect with post-increment mode (mode 3)
-		#Destinations don't support indirect or indirect + post-increment.
-		if isDestReg:
-			raise AddressingModeError(reg,
-				'Cannot use indirect with post-increment form for destination register.')
-		adrmode = 3
-		regID = getRegister(reg[reg.find('@') + 1 : reg.find('+')])
-	elif '@' in reg: #Indirect mode (mode 2)
-		#Destinations don't support indirect or indirect + post-increment.
-		#Indirect can be faked with an index of 0. What a waste.
-		if isDestReg:
-			adrmode = 1
-			extensionWord = 0
-		else:
-			adrmode = 2
-			regID = getRegister(reg[reg.find('@') + 1 : ])
-	elif '#' in reg: #Use PC to specify an immediate constant
-		if isDestReg:
-			raise AddressingModeError(reg,
-				'Because immediates are encoded as @pc+, immediates cannot be used for ' +
-				'destinations.\nConsider using &dest absolute addressing form instead.')
-		adrmode = 3
-		regID = 0
-		constant = reg[reg.find('#') + 1 :].strip()
-
-		#This might be an immediate constant supported by the hardware
-
-		#A CPU bug prevents push #4 and push #8 with r2/SR encoding from working,
-		#so one must simply use a 16-bit immediate there (what a waste, again)
-		if constant == '4' and opcode != 'push':
-			regID = 2
-			adrmode = 2
-		elif constant == '8' and opcode != 'push':
-			regID = 2
-			adrmode = 3
-		elif constant == '0':
-			regID = 3
-			adrmode = 0
-		elif constant == '1':
-			regID = 3
-			adrmode = 1
-		elif constant == '2':
-			regID = 3
-			adrmode = 2
-		elif constant == '-1' or constant.lower() == '0xffff':
-			regID = 3
-			adrmode = 3
-		else:
-			extensionWord = constant
-	elif '&' in reg: #Direct addressing. An extension word is fetched and used as the raw address.
-		regID = 2
-		adrmode = 1
-		extensionWord = reg[reg.find('&') + 1 : ]
-	else: #Regular register access (mode 0)
-		adrmode = 0
-		regID = getRegister(reg)
-
-	return extensionWord, adrmode, regID
diff --git a/25-Halifax/halifax.py b/25-Halifax/halifax.py
index 4a0f76a..62b367a 100644
--- a/25-Halifax/halifax.py
+++ b/25-Halifax/halifax.py
@@ -2,7 +2,8 @@
 
 import re, os, sys
 from hashlib import sha256
-from assemble import asmMain
+# This program uses an extended version of Swiftloke's brilliant MSProbe to assemble the payload
+from MSProbe.assemble import asmMain
 
 # match this many hexadigits
 # must be corroborated within the script
@@ -16,14 +17,16 @@ if len(sys.argv) > 1:
 
 shellcode_out = f'{shellcode_asm}.tmp'
 
-#Compile shellcode w/ msprobe
+# Compile shellcode w/ msprobe
 asmMain(shellcode_asm, shellcode_out, silent=True)
 
+# Read compiled output
 with open(shellcode_out) as file:
     shellcode = file.readline()
-    shellcode_len = len(bytes.fromhex(shellcode));
 os.remove(shellcode_out)
+shellcode_len = len(bytes.fromhex(shellcode));
 
+# Print formatted payload as hex
 print(f"6000{shellcode_len:x}{shellcode}")
 
 def main():