Integrate Parser (#4)

Integrate parser
2025-02-04 10:38:42 +00:00 · 2021-10-26 12:48:04 -05:00
parent 9db476dd39
commit d105544596
15 changed files with 1234 additions and 16 deletions
--- a/.github/workflows/vscode-test.yaml
+++ b/.github/workflows/vscode-test.yaml
@@ -0,0 +1,26 @@
+name: "@vscode/test-electron"
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        os: [macos-11, ubuntu-latest, windows-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install Node.js
+        uses: actions/setup-node@v1
+        with:
+          node-version: 10.x
+      - run: npm ci
+      - run: xvfb-run -a npm test
+        if: runner.os == 'Linux'
+      - run: npm test
+        if: runner.os != 'Linux'
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -23,7 +23,7 @@
 			"request": "launch",
 			"args": [
 				"--extensionDevelopmentPath=${workspaceFolder}",
-				"--extensionTestsPath=${workspaceFolder}/out/test/suite/index"
+				"--extensionTestsPath=${workspaceFolder}/out/test/suites/index"
 			],
 			"outFiles": [
 				"${workspaceFolder}/out/test/**/*.js"
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -1,9 +1,14 @@
 import * as vscode from 'vscode';
+import * as pl from './pylex';
+
+let parser: pl.Parser = new pl.Parser();

 export function activate(context: vscode.ExtensionContext) {
  console.log('Congratulations, your extension "mind-reader" is now active!');
  vscode.window.showInformationMessage('Mind_Reader is loaded!');

+  parser.parse('Beep Boop');
+
  // Increase Font Scale
  context.subscriptions.push(
    vscode.commands.registerCommand('mind-reader.increaseFontScale', () => {
--- a/src/pylex/index.ts
+++ b/src/pylex/index.ts
@@ -0,0 +1,7 @@
+// expose parser by default
+export {default as Parser} from './parser';
+export {default as LineToken} from './token';
+export {default as Lexer} from './lexer';
+export {default as LexNode} from './node';
+export {TabInfo as TabInfo} from './token';
+
--- a/src/pylex/lexer.ts
+++ b/src/pylex/lexer.ts
@@ -0,0 +1,214 @@
+import { LineToken } from '.';
+import { Symbol, EOFTOKEN, TabInfo } from './token';
+
+type Rule = {
+  pattern: RegExp,
+  type: Symbol,
+};
+
+/**
+ * List of recognition patterns, in order of priority
+ * The first item is a recognition pattern, used to recognize the token
+ * the second item is the token type
+ */
+const rules: Rule[] = [
+  {
+    pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/,
+    type: Symbol.FUNCTION
+  },
+  {
+    pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/,
+    type: Symbol.CLASS
+  },
+  {
+    pattern: /^\s*if\s+(?<attr>[^:]+):\s*/,
+    type: Symbol.IF
+  },
+  {
+    pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/,
+    type: Symbol.ELIF
+  },
+  {
+    pattern: /^\s*else\s*:/,
+    type: Symbol.ELSE
+  },
+  {
+    pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/,
+    type: Symbol.FOR
+  },
+  {
+    pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/,
+    type: Symbol.WHILE
+  },
+  {
+    pattern: /^\s*try\s*:/,
+    type: Symbol.TRY
+  },
+  {
+    pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/,
+    type: Symbol.EXCEPT
+  },
+  {
+    pattern: /^\s*finally\s*:\s*$/,
+    type: Symbol.FINALLY
+  },
+  {
+    pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/,
+    type: Symbol.WITH
+  },
+];
+
+/**
+ * Line-By-Line Lexer
+ */
+export default class Lexer {
+  private textLines: string[] = []; // array of text lines
+  private pos: number = 0;
+  private _currToken: LineToken = EOFTOKEN;
+
+  /**
+   * Calculates indentation level for a line. If using soft tabs,
+   * indent level rounds up (so, tabSize+1 spaces is 2 levels,
+   * 2*tabSize+1 is 3, etc.)
+   *
+   * @param `text` The line of text.
+   * @param `tabFmt` A tab information descriptor.
+   * @return The indent of `text` with consideration for `tabFmt`.
+   */
+  static getIndent(text: string, tabFmt: TabInfo): number {
+    let leadingSpace: number = text.length - text.trimLeft().length;
+    let indent: number;
+    if (tabFmt.hard) {
+      // used tabs
+      indent = leadingSpace;
+    } else {
+      // use spaces
+      indent = Math.ceil(leadingSpace/tabFmt.size!);
+    }
+
+    return indent;
+  }
+
+  /**
+   * @param `text` The text to lex.
+   * @param `tabFmt` A tab information descriptor
+   */
+  constructor(text?: string, private tabFmt?: TabInfo) {
+    // default is 4 wide expanded tabs
+    this.tabFmt = {
+      ...{size: 4, hard: false},
+      ...tabFmt
+    };
+
+    if (text) {
+      // normalize linefeeds
+      text = text.replace('\r\n', '\n');
+    }
+    this.restart(text);
+  }
+
+  /**
+   * Restart lexer with new text.
+   *
+   * @param `text` The new text to lex.
+   */
+  restart(text?: string): void {
+    this.pos = 0;
+    this._currToken = EOFTOKEN; // if no input, already on EOFTOKEN
+    if (text) {
+      this.textLines = text.split('\n');
+      this.next(); // advance to the first token
+    }
+  }
+
+  /**
+   * @return the current {@link LineToken}.
+   */
+  currToken(): LineToken { return this._currToken; }
+
+  /**
+   * Advance the position in the token stream.
+   *
+   * @return The new current token, after advancing
+   */
+  next(): LineToken {
+    if (this._currToken === EOFTOKEN && this.pos > this.textLines.length) {
+      throw new Error('Cannot advance past end');
+    }
+
+    // Until a LineToken is found, or EOF
+    while (this.pos < this.textLines.length) {
+      let line: string = this.textLines[this.pos];
+      let indent: number = Lexer.getIndent(line, this.tabFmt!);
+      let token: LineToken;
+      for (var r of rules) {
+        // Does line match pattern?
+        let match: RegExpMatchArray | null = line.match(r.pattern);
+        if (match) {
+          // Yes...
+          if (match.groups) {
+            token = new LineToken(r.type, this.pos, indent, match.groups["attr"]);
+          } else {
+            token = new LineToken(r.type, this.pos, indent);
+          }
+
+          this._currToken = token;
+          this.pos++;
+          return this.currToken();
+        }
+      }
+      // No rules matched
+
+      // Skip this line if it is whitespace, comment, or empty
+      if (/^\s*(#.*)?$/.test(line)) {
+        this.pos++;
+        continue;
+      }
+
+      // This is an INDENT token
+      token = new LineToken(Symbol.INDENT, this.pos, indent);
+      this._currToken = token;
+      this.pos++;
+      return this.currToken();
+    }
+
+    // Didn't return, must be EOF
+    this._currToken = EOFTOKEN;
+    this.pos++;
+    return this.currToken();
+  }
+
+  /**
+   * Move backwards in the token stream
+   *
+   * @param `n` The number of positions to retract.
+   * @return The new current token after retracting.
+   */
+  retract(n: number = 1): LineToken {
+    if (this.pos - 1 - n < 0) {
+      // -1 because this.pos is currently on the next token
+      throw new RangeError('Cannot retract past start');
+    }
+
+    if (n <= 0) {
+      throw new RangeError('Retract distance must be positive');
+    }
+
+    if (this.pos - n === 0) {
+      // just restart
+      this.pos = 0;
+      return this.next();
+    }
+
+    let c = n + 1;
+    while (c > 0) {
+      this.pos--;
+      while (/^\s*(#.*)?$/.test(this.textLines[this.pos])) {
+        // Skip empty lines
+        this.pos--;
+      }
+      c--;
+    }
+    return this.next();
+  }
+}
--- a/src/pylex/node.ts
+++ b/src/pylex/node.ts
@@ -0,0 +1,82 @@
+import * as vscode from 'vscode';
+
+import LineToken from './token';
+
+/**
+ * A node in a Parse tree.
+ */
+export default class LexNode extends vscode.TreeItem {
+
+  /**
+   * @param `label` A human-readable string describing this item
+   * @param `collapsibleState` {@link TreeItemCollapsibleState} of the tree item.
+   * @param `token` The token at this node.
+   * @param `_children` The children in this node's subtree.
+   * @param `_parent` The parent node of this node.
+   */
+  constructor(
+    public readonly label: string,
+    public readonly collapsibleState: vscode.TreeItemCollapsibleState,
+    public readonly token: LineToken | null,
+    private _children: LexNode[] | null = null,
+    private _parent: LexNode | null = null,
+  ) {
+    super(label, collapsibleState);
+    this.tooltip = this.label;
+    if (this.token && this.token.linenr >= 0) {
+      this.tooltip += `: ${this.token.linenr+1}`;
+    }
+  }
+
+  /**
+   * @return The children of this node.
+   */
+  children(): LexNode[] | null {
+    return this._children;
+  }
+
+  /**
+   * @return The parent of this node.
+   */
+  parent(): LexNode | null {
+    return this._parent;
+  }
+
+  /**
+   * Adopt child nodes.
+   *
+   * @param `child` Array of nodes to adopt.
+   */
+  adopt(children: LexNode[]): void {
+    let parentedChildren = children.map(c => new LexNode(
+      c.label,
+      c.collapsibleState,
+      c.token,
+      c.children(),
+      this
+    ));
+
+    // Are there any other children?
+    if (this._children) {
+      // Yes...
+      this._children = this._children.concat(children);
+    } else {
+      // No....
+      this._children = parentedChildren;
+    }
+  }
+
+  /**
+   * Return the root path for this node.
+   *
+   * @return A path of parent nodes from this node to the root of the tree.
+   */
+  rootPath(): LexNode[] {
+    if (this._parent) {
+      return [new LexNode(this.label, this.collapsibleState, this.token, this._children, this._parent)].concat(this._parent.rootPath());
+    } else {
+      return [new LexNode(this.label, this.collapsibleState, this.token, this._children, this._parent)];
+    }
+
+  }
+}
--- a/src/pylex/parser.ts
+++ b/src/pylex/parser.ts
@@ -0,0 +1,133 @@
+import * as vscode from 'vscode';
+
+import { EOFTOKEN, Symbol, TabInfo } from './token';
+import Lexer from './lexer';
+import LexNode from './node';
+
+/**
+ * A parse tree generator
+ */
+export default class Parser {
+  private lexer: Lexer;
+  private currIndent: number;
+  private root: LexNode; // Root of syntax tree
+
+  /**
+   * @param `text` Text to parse.
+   * @param `tabFmt` A tab information descriptor
+   */
+  constructor (private text?: string, private tabFmt?: TabInfo) {}
+
+  /**
+   * Parse the passed text.
+   *
+   * @param `text` Text to parse. If undefined, use current value of `this.text`
+   * @param `tabFmt` A tab information descriptor
+   * @return A parse tree representing `text`.
+   */
+  parse(text?: string, tabFmt?: TabInfo): LexNode {
+    if (text) {
+      // save text
+      this.text = text;
+    } else {
+      // default to this.text
+      // this might still be undefined
+      text = this.text;
+    }
+
+    if (tabFmt) {
+      // save tabFmt
+      this.tabFmt = tabFmt;
+    } else {
+      // default to this.tabFmt
+      // this might still be undefined
+      tabFmt = this.tabFmt;
+    }
+
+    // initialize root
+    this.lexer = new Lexer(this.text, this.tabFmt);
+    this.root = new LexNode(
+      "root",
+      vscode.TreeItemCollapsibleState.None,
+      null,
+      null,
+      null
+    );
+
+    // parse children
+    this.currIndent = 0;
+    const children = this._parse(this.root);
+
+    if (children.length > 0) {
+      this.root.adopt(children);
+    }
+    return this.root;
+  }
+
+  private _parse(parent: LexNode | null): LexNode[] {
+    let children: LexNode[] = [];
+    while (this.lexer.currToken() !== EOFTOKEN) {
+      if (this.lexer.currToken().indentLevel < this.currIndent) {
+        // go up 1 level of recursion at a time to unravel properly
+        this.currIndent--;
+        return children;
+      } else if (this.lexer.currToken().type === Symbol.INDENT) {
+        // regular code, advance and stay in same block
+        this.lexer.next();
+        continue;
+      } else {
+        // new block starts here
+        const label = this.lexer.currToken().type + (this.lexer.currToken().attr === undefined ? "" : " " + this.lexer.currToken().attr);
+        let blockRoot = new LexNode(
+          label,
+          vscode.TreeItemCollapsibleState.None,
+          this.lexer.currToken(),
+          null,
+          parent
+        );
+        this.lexer.next();
+        this.currIndent++;
+        const blockChildren = this._parse(blockRoot); // Recursively parse all child blocks
+        if (blockChildren.length > 0) {
+          blockRoot.adopt(blockChildren);
+        }
+        children.push(blockRoot);
+      }
+    }
+    return children;
+  }
+
+  /**
+   * Get an array of LexNodes representing the rootpath of LexNodes from the
+   * passed line number to the root of the document. A list of "this" inside
+   * "that" inside ... inside the document root.
+   *
+   * @param `lineNumber` The line number to query context for.
+   * @return An array of LexNodes for the root path containing `lineNumber`
+   */
+  context(lineNumber: number): LexNode[] {
+    if (!this.root.children()) {
+      return [];
+    }
+
+    // Returns the LexNode that is the parent
+    // of the queried line number
+    let find = (root: LexNode): LexNode | undefined => {
+      let prevChild: LexNode;
+      for (var child of root.children()!) {
+        if (lineNumber < child.token!.linenr) {
+          if (prevChild!.children()) {
+            return find(prevChild!);
+          } else {
+            return prevChild!;
+          }
+        } else {
+          prevChild = child;
+        }
+      }
+    };
+
+    let target = find(this.root);
+    return target!.rootPath();
+  }
+}
--- a/src/pylex/token.ts
+++ b/src/pylex/token.ts
@@ -0,0 +1,66 @@
+/* eslint-disable @typescript-eslint/naming-convention */
+/* ^ allow uppercase enum */
+
+/**
+ * LineToken Symbol Types
+ */
+export enum Symbol {
+  FUNCTION = "function",
+  CLASS = "class",
+  IF = "if",
+  ELSE = "else",
+  ELIF = "elif",
+  FOR = "for",
+  WHILE = "while",
+  TRY = "try",
+  EXCEPT = "except",
+  FINALLY = "finally",
+  WITH = "with",
+  INDENT = "INDENT", // Indent token, default if not EOF, only contains indent information
+  EOF = "EOF"
+}
+
+/**
+ * @typedef {Object} TabInfo
+ * @prop {number} size // The width of a tab in spaces
+ * @prop {boolean} hard // Whether to use literal tab characters
+ */
+export type TabInfo = {
+  size: number,
+  hard: boolean,
+};
+
+/**
+ * A token for a line in a Python file
+ */
+export default class LineToken {
+
+  /**
+   * @param `type` The type of token for this line.
+   * @param `linenr` The line number (0-indexed)
+   * @param `indentLevel` The level of indentation.
+   * @param `attr` Additional item for tokens that might need it.
+   */
+  constructor(
+    public readonly type: Symbol,
+    public readonly linenr: number,
+    public readonly indentLevel: number,
+    public readonly attr?: any // Any additional things a token might need (class name, control conidition)
+  ) { }
+
+  /**
+   * @return A string representation of the token
+   */
+  toString(): string {
+    return this.type + ", linenr:" + (this.linenr+1) + ", indentLevel: " + this.indentLevel + ", attr: " + this.attr;
+  }
+}
+
+/**
+ * The End-Of-File token
+ *
+ * EOFTOKEN is returned when `next()` is called
+ * while the lexer is on the last token in the stream.
+ */
+const EOFTOKEN = new LineToken(Symbol.EOF, -1, -1);
+export { EOFTOKEN };
--- a/src/test/runTest.ts
+++ b/src/test/runTest.ts
@@ -5,10 +5,12 @@ import { runTests } from '@vscode/test-electron';
 async function main() {
  try {
    // The folder containing package.json
+    // Passed to `--extensionDevelopmentPath`
    const extensionDevelopmentPath: string = path.resolve(__dirname, '../../');

    // The path to the test runner script
-    const extensionTestsPath: string = path.resolve(__dirname, './suite/index');
+    // Passed to `--extensionTestsPath`
+    const extensionTestsPath: string = path.resolve(__dirname, './suites/index');

    // Download VS Code, unzip it and run the integration test
    await runTests({ extensionDevelopmentPath, extensionTestsPath });
--- a/src/test/suite/extension.test.ts
+++ b/src/test/suite/extension.test.ts
@@ -1,13 +0,0 @@
-import * as assert from 'assert';
-import * as vscode from 'vscode';
-import { after } from 'mocha';
-
-suite('Dummy Test Suite', () => {
-  after(() => {
-    vscode.window.showInformationMessage('All tests passed!');
-  });
-
-  test('Dummy Test', () => {
-    assert.strictEqual(0 === 0, true);
-  });
-});
--- a/src/test/suites/index.ts
+++ b/src/test/suites/index.ts
--- a/src/test/suites/pylex/lexer.test.ts
+++ b/src/test/suites/pylex/lexer.test.ts
@@ -0,0 +1,279 @@
+import * as assert from 'assert';
+import * as vscode from 'vscode';
+import { after } from 'mocha';
+
+import Lexer from '../../../pylex/lexer';
+import LineToken from '../../../pylex/token';
+import { EOFTOKEN, Symbol } from '../../../pylex/token';
+
+suite('Lexer Test Suite', () => {
+  after(() => {
+    vscode.window.showInformationMessage('All tests passed!');
+  });
+
+  test('Empty String', () => {
+    let l: Lexer = new Lexer(undefined);
+    assert.deepStrictEqual(l.currToken(), EOFTOKEN);
+  });
+
+  test('Undefined', () => {
+    let l: Lexer = new Lexer('');
+    assert.deepStrictEqual(l.currToken(), EOFTOKEN);
+  });
+
+  test('Whitespace', () => {
+    let l: Lexer = new Lexer('  \t\t'.repeat(4).repeat(4));
+    assert.deepStrictEqual(l.currToken(), EOFTOKEN);
+  });
+
+  test('Non-Whitespace with no construct', () => {
+    let l: Lexer = new Lexer('foobar');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.INDENT, 0, 0));
+  });
+
+  test('getIndent() accuracy, spaces', () => {
+    for (var i = 0; i < 100; i++) {
+      let l: Lexer = new Lexer('    '.repeat(i) + 'foobar');
+      assert.strictEqual(l.currToken().indentLevel, i);
+    }
+  });
+
+  test('getIndent() accuracy, tabs', () => {
+    for (var i = 0; i < 100; i++) {
+      let l: Lexer = new Lexer('\t'.repeat(i) + 'foobar', {size: 4, hard: true});
+      assert.strictEqual(l.currToken().indentLevel, i);
+    }
+  });
+
+  test('getIndent() accuracy, spaces with incomplete tab', () => {
+    for (var i = 0; i < 100; i++) {
+      for (var j = 1; j <= 3; j++) {
+        let l: Lexer = new Lexer('    '.repeat(i) + ' '.repeat(j) + 'foobar', {size: 4, hard: false});
+        assert.strictEqual(l.currToken().indentLevel, i+1);
+      }
+    }
+  });
+
+  test('class definition', () => {
+    let l: Lexer = new Lexer('class Foobar(object):');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.CLASS, 0, 0, 'Foobar'));
+  });
+
+  test('function definition', () => {
+    let l: Lexer = new Lexer('def Barbaz(this, that, andTheOther):');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.FUNCTION, 0, 0, 'Barbaz'));
+  });
+
+  test('if statement', () => {
+    let l: Lexer = new Lexer('if True and bar == baz:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'True and bar == baz'));
+  });
+
+  test('elif statement', () => {
+    let l: Lexer = new Lexer('elif name == "bar" and True:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.ELIF, 0, 0, 'name == "bar" and True'));
+  });
+
+  test('else statement', () => {
+    let l: Lexer = new Lexer('else:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.ELSE, 0, 0));
+  });
+
+  test('for loop', () => {
+    let l: Lexer = new Lexer('for pickle in pickleJars:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.FOR, 0, 0, 'pickle in pickleJars'));
+  });
+
+  test('while loop', () => {
+    let l: Lexer = new Lexer('while numCookies < capacity:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.WHILE, 0, 0, 'numCookies < capacity'));
+  });
+
+  test('try statement', () => {
+    let l: Lexer = new Lexer('try:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.TRY, 0, 0));
+  });
+
+  test('except statement with attr', () => {
+    let l: Lexer = new Lexer('except NameError:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.EXCEPT, 0, 0, 'NameError'));
+  });
+
+  test('except statement with no attr', () => {
+    let l: Lexer = new Lexer('except:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.EXCEPT, 0, 0));
+  });
+
+  test('finally statement', () => {
+    let l: Lexer = new Lexer('finally:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.FINALLY, 0, 0));
+  });
+
+  test('with statement', () => {
+    let l: Lexer = new Lexer('with open(file) as f:');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.WITH, 0, 0, 'open(file) as f'));
+  });
+
+  test('restart()', () => {
+    let l: Lexer = new Lexer('with open(file as f:');
+    l.restart('if is_old():');
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'is_old()'));
+  });
+
+  test('next() ignores empty lines', () => {
+    let lines: string[] = [
+      'if wurst_available():',
+      '',
+      '    eat_wurst()'
+    ];
+    let l: Lexer = new Lexer(lines.join('\n'));
+
+    l.next();
+
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.INDENT, 2, 1));
+  });
+
+  test('retract() ignores empty lines', () => {
+    let lines: string[] = [
+      'if wurst_available():',
+      '',
+      '    eat_wurst()'
+    ];
+    let l: Lexer = new Lexer(lines.join('\n'));
+
+    l.next();
+
+    l.retract();
+
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'wurst_available()'));
+  });
+
+  test('next() ignores whitespace lines', () => {
+    let lines: string[] = [
+      'if wurst_available():',
+      ' \t \t   ',
+      '    eat_wurst()'
+    ];
+    let l: Lexer = new Lexer(lines.join('\n'));
+
+    l.next();
+
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.INDENT, 2, 1));
+  });
+
+  test('retract() ignores whitespace lines', () => {
+    let lines: string[] = [
+      'if wurst_available():',
+      ' \t  \t   ',
+      '    eat_wurst()'
+    ];
+    let l: Lexer = new Lexer(lines.join('\n'));
+
+    // Advance to end of input
+    // Eliminates dependence on next()
+    // skipping whitespace
+    do {} while (l.next() !== EOFTOKEN);
+
+    l.retract(); // retract past EOFTOKEn
+    l.retract();
+
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'wurst_available()'));
+  });
+
+  test('next() ignores comment lines', () => {
+    let lines: string[] = [
+      'if wurst_available():',
+      ' \t # I hate testing \t',
+      '    eat_wurst()'
+    ];
+    let l: Lexer = new Lexer(lines.join('\n'));
+
+    l.next();
+
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.INDENT, 2, 1));
+  });
+
+  test('retract() ignores comment lines', () => {
+    let lines: string[] = [
+      'if wurst_available():',
+      ' \t # \t',
+      '    eat_wurst()'
+    ];
+    let l: Lexer = new Lexer(lines.join('\n'));
+
+    // Advance to end of input
+    // Eliminates dependence on next()
+    // skipping comment
+    do {} while (l.next() !== EOFTOKEN);
+
+    l.retract(); // retract past EOFTOKEn
+    l.retract();
+
+    assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'wurst_available()'));
+  });
+
+  test('next() out of range', () => {
+    let l: Lexer = new Lexer('foo = zaboomafoo');
+    l.next();
+    assert.throws(() => l.next());
+  });
+
+  test('retract() out of range', () => {
+    let l: Lexer = new Lexer('next_token = lexer.next()');
+    assert.throws(() => l.retract());
+  });
+
+  test('retract() validate argument', () => {
+    let l: Lexer = new Lexer();
+
+    // Negative
+    assert.throws(() => l.retract(-1));
+
+    // Zero, it doesn't make sense to retract 0 :P
+    assert.throws(() => l.retract(0));
+
+  });
+
+  test('retract() 1-100', () => {
+    let lines: string[] = Array.from(Array(100), (_, i) => 'line' + i);
+    let reference: LineToken[] = lines.map((_, i) => {
+      return new LineToken(Symbol.INDENT, i, 0);
+    });
+
+    for (var i = 0; i < 100; i++) {
+      let l: Lexer = new Lexer(lines.join('\n'));
+
+      // advance to EOF
+      do {} while (l.next() !== EOFTOKEN);
+
+      // try retract
+      l.retract(i+1);
+
+      assert.deepStrictEqual(l.currToken(), reference[99-i]);
+    }
+  });
+
+  test('2 full lex and retract passes', () => {
+    let lines: string[] = Array.from(Array(100), (_, i)=> 'line' + i);
+    let reference: LineToken[] = lines.map((_, i) => {
+      return new LineToken(Symbol.INDENT, i, 0);
+    });
+
+    let l: Lexer = new Lexer(lines.join('\n'));
+
+    // Twice
+    for (var _ of [0,1]) {
+      // advance to EOF
+      for (var i = 0; i < lines.length; i++) {
+        assert.deepStrictEqual(l.currToken(), reference[i]);
+        l.next();
+      }
+
+      // retract to start
+      for (var i = lines.length - 1; i >= 0; i--) {
+        l.retract();
+        assert.deepStrictEqual(l.currToken(), reference[i]);
+      }
+    }
+  });
+});
--- a/src/test/suites/pylex/node.test.ts
+++ b/src/test/suites/pylex/node.test.ts
@@ -0,0 +1,121 @@
+import * as assert from 'assert';
+import * as vscode from 'vscode';
+import { after } from 'mocha';
+import { deparent } from '../../util';
+
+import LineToken from '../../../pylex/token';
+import { Symbol } from '../../../pylex/token';
+import LexNode from '../../../pylex/node';
+
+suite('LexNode Test Suite', () => {
+  after(() => {
+    vscode.window.showInformationMessage('All tests passed!');
+  });
+
+  test('children() of leaf', () => {
+    let n: LexNode = new LexNode('leafLexNode', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.INDENT, 0, 0));
+    assert.strictEqual(n.children(), null);
+  });
+
+  test('children() of internal node', () => {
+    let children: LexNode[] = [
+      new LexNode('leafLexNode1', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 2, 1)),
+      new LexNode('leafLexNode2', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 3, 1)),
+      new LexNode('leafLexNode3', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 4, 1)),
+      new LexNode('leafLexNode4', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 5, 1)),
+      new LexNode('leafLexNode5', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 6, 1))
+    ];
+
+    let parent: LexNode = new LexNode(
+      'internalLexNode',
+      vscode.TreeItemCollapsibleState.None,
+      new LineToken(Symbol.FUNCTION, 0, 0, 'foobar'),
+      children
+    );
+
+    assert.notStrictEqual(parent.children(), null);
+    assert.notStrictEqual(parent.children(), []);
+    assert.strictEqual(parent.children()!.length, children.length);
+    for (var i = 0; i < children.length; i++) {}
+      assert.strictEqual(parent.children()![i], children[i]);
+    }
+  );
+
+  test('adopt() to empty', () => {
+    let children: LexNode[] = [
+      new LexNode('leafLexNode1', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 2, 1)),
+      new LexNode('leafLexNode2', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 3, 1)),
+      new LexNode('leafLexNode3', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 4, 1)),
+      new LexNode('leafLexNode4', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 5, 1)),
+      new LexNode('leafLexNode5', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 6, 1))
+    ];
+
+    let testParent: LexNode = new LexNode(
+      'internalLexNode',
+      vscode.TreeItemCollapsibleState.None,
+      new LineToken(Symbol.FUNCTION, 1, 0, 'foobar')
+    );
+
+    let referenceParent: LexNode = new LexNode(
+      'internalLexNode',
+      vscode.TreeItemCollapsibleState.None,
+      new LineToken(Symbol.FUNCTION, 1, 0, 'foobar'),
+      children
+    );
+
+    // parentify reference childdren
+    referenceParent = new LexNode(
+      referenceParent.label,
+      referenceParent.collapsibleState,
+      referenceParent.token,
+      referenceParent.children()!.map(c => new LexNode(c.label, c.collapsibleState, c.token, null, referenceParent))
+    );
+
+    testParent.adopt(children);
+
+    assert.deepStrictEqual(deparent(testParent), deparent(referenceParent));
+  });
+
+  test('adopt() to !empty', () => {
+    let children1: LexNode[] = [
+      new LexNode('leafLexNode1', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 2, 1)),
+    ];
+
+    let children2: LexNode[] = [
+      new LexNode('leafLexNode2', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 3, 1)),
+      new LexNode('leafLexNode3', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 4, 1)),
+    ];
+
+    let testParent: LexNode = new LexNode(
+      'internalLexNode',
+      vscode.TreeItemCollapsibleState.None,
+      new LineToken(Symbol.FUNCTION, 1, 0, 'foobar'),
+      children1,
+    );
+
+    let referenceParent: LexNode = new LexNode(
+      'internalLexNode',
+      vscode.TreeItemCollapsibleState.None,
+      new LineToken(Symbol.FUNCTION, 1, 0, 'foobar'),
+      children1.concat(children2),
+    );
+
+    testParent.adopt(children2);
+
+    assert.deepStrictEqual(deparent(testParent), deparent(referenceParent));
+  });
+
+  test('tooltip without line number', () => {
+    let testTooltip: string | vscode.MarkdownString | undefined = new LexNode('leafLexNode', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, -1, -11)).tooltip;
+    let referenceTooltip: string = "leafLexNode";
+    assert.notStrictEqual(testTooltip, undefined);
+    assert.strictEqual(testTooltip, referenceTooltip);
+  });
+
+  test('tooltip with line number', () => {
+    let testTooltip: string | vscode.MarkdownString | undefined = new LexNode('leafLexNode', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 6, 1)).tooltip;
+    let referenceTooltip: string = "leafLexNode: 7"; // 7 because it's 0 indexed in memory, but editor lines start at 1
+    assert.notStrictEqual(testTooltip, undefined);
+    assert.strictEqual(testTooltip, referenceTooltip);
+  });
+});
--- a/src/test/suites/pylex/parser.test.ts
+++ b/src/test/suites/pylex/parser.test.ts
@@ -0,0 +1,239 @@
+import * as assert from 'assert';
+import * as vscode from 'vscode';
+import { after } from 'mocha';
+import { deparent, root } from '../../util';
+
+import Parser from '../../../pylex/parser';
+import LexNode from '../../../pylex/node';
+import LineToken from '../../../pylex/token';
+import { Symbol } from '../../../pylex/token';
+
+type ParserTest = {
+  name: string,
+  input: string[],
+  output: LexNode,
+};
+
+const tests: ParserTest[] = [
+  {
+    name: 'No Input',
+    input: [ ],
+    output: root(null),
+  },
+
+  {
+    name: 'Single line without construct',
+    input: [ 'foo = "Yellow M&Ms make me angry >:(' ],
+    output: root(null),
+  },
+
+  {
+    name: 'Single line with construct',
+    input: [ 'for x of y:' ],
+    output: root([
+      new LexNode(
+        'for x of y',
+         vscode.TreeItemCollapsibleState.None,
+         new LineToken(Symbol.FOR, 0, 0, 'x of y')
+      )
+    ]),
+  },
+
+  {
+    name: 'Sequential lines, without construct',
+    input: [
+      'bar = "Blue M&Ms make me happy <:)"',
+      'reba = "A hard working gal"'
+    ],
+    output: root(null),
+  },
+
+  {
+    name: 'Sequential lines, with, then without construct',
+    input: [
+      'if radioshack:',
+      '    print radioshack.hours',
+      'billy = "Scrubbly Bubbles!"'
+    ],
+    output: root([
+      new LexNode('if radioshack',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.IF, 0, 0, 'radioshack'))
+    ])
+  },
+
+  {
+    name: 'Sequential lines, without, then with construct',
+    input: [
+      'billy = "Scrubbly Bubbles!"',
+      'if radioshack:',
+      '    print radioshack.hours'
+    ],
+    output: root([
+      new LexNode('if radioshack',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.IF, 1, 0, 'radioshack'))
+    ])
+  },
+
+  {
+    name: 'Sequential lines with constructs',
+    input: [
+      'if yummy:',
+      '    print("HOoray!")',
+      'elif just_ok:',
+      '    print("Do you have anything else?")',
+      'else:',
+      '    print("You really eat this?")',
+    ],
+    output: root([
+      new LexNode('if yummy',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.IF, 0, 0, 'yummy')),
+      new LexNode('elif just_ok',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.ELIF, 2, 0, 'just_ok')),
+      new LexNode('else',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.ELSE, 4, 0)),
+    ])
+  },
+
+  {
+    name: 'Singly Nested Block',
+    input: [
+      'if yummy:',
+      '    if in_my_tummy:',
+      '        exclaim("Scrumdiddlyumptious!")'
+    ],
+    output: root([
+      new LexNode('if yummy',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.IF, 0, 0, 'yummy'),
+        [
+          new LexNode('if in_my_tummy',
+            vscode.TreeItemCollapsibleState.None,
+            new LineToken(Symbol.IF, 1, 1, 'in_my_tummy'))
+        ]
+      )
+    ])
+  },
+
+  {
+    name: 'Singly Nested Block, then Block',
+    input: [
+      'if yummy:',
+      '    if in_my_tummy:',
+      '        exclaim("Scrumdiddlyumptious!")',
+      'else:',
+      '    exclaim("DAESGUSTEN~)"'
+    ],
+    output: root([
+      new LexNode('if yummy',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.IF, 0, 0, 'yummy'),
+        [
+          new LexNode('if in_my_tummy',
+            vscode.TreeItemCollapsibleState.None,
+            new LineToken(Symbol.IF, 1, 1, 'in_my_tummy'))
+        ]
+      ),
+        new LexNode('else',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.ELSE, 3, 0),
+      )
+    ])
+  },
+
+  {
+    name: 'Doubly Nested Block',
+    input: [
+      'if yummy:',
+      '    if in_my_tummy:',
+      '        if looks_like_a_mummy:',
+      '            print("you have a spot on your tummy"',
+      'else:',
+      '    print("Food is food...")'
+    ],
+    output: root([
+      new LexNode('if yummy',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.IF, 0, 0, 'yummy'),
+        [
+          new LexNode('if in_my_tummy',
+            vscode.TreeItemCollapsibleState.None,
+            new LineToken(Symbol.IF, 1, 1, 'in_my_tummy'),
+            [
+              new LexNode('if looks_like_a_mummy',
+                vscode.TreeItemCollapsibleState.None,
+                new LineToken(Symbol.IF, 2, 2, 'looks_like_a_mummy'))
+            ]
+          )
+        ]
+      ),
+        new LexNode('else',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.ELSE, 4, 0),
+      )
+    ])
+  },
+
+  {
+    name: 'Doubly Nested Block, with multiple indent resets',
+    input: [
+      'if yummy:',
+      '    if in_my_tummy:',
+      '        if looks_like_a_mummy:',
+      '            print("you have a spot on your tummy"',
+      '        else:',
+      '            print("eek! a zombie!)',
+      '    elif in_my_mouth:',
+      '        print("ill be in my tummy soon!"',
+      'else:',
+      '    print("Food is food...")'
+    ],
+    output: root([
+      new LexNode('if yummy',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.IF, 0, 0, 'yummy'),
+        [
+          new LexNode('if in_my_tummy',
+            vscode.TreeItemCollapsibleState.None,
+            new LineToken(Symbol.IF, 1, 1, 'in_my_tummy'),
+            [
+              new LexNode('if looks_like_a_mummy',
+                vscode.TreeItemCollapsibleState.None,
+                new LineToken(Symbol.IF, 2, 2, 'looks_like_a_mummy')),
+              new LexNode('else',
+                vscode.TreeItemCollapsibleState.None,
+                new LineToken(Symbol.ELSE, 4, 2))
+            ]
+          ),
+          new LexNode('elif in_my_mouth',
+            vscode.TreeItemCollapsibleState.None,
+            new LineToken(Symbol.ELIF, 6, 1, 'in_my_mouth'))
+        ]
+      ),
+        new LexNode('else',
+        vscode.TreeItemCollapsibleState.None,
+        new LineToken(Symbol.ELSE, 8, 0)
+      )
+    ])
+  }
+];
+
+suite('Parser Test Suite', () => {
+  after(() => {
+    vscode.window.showInformationMessage('All tests passed!');
+  });
+
+  for (var t of tests) {
+    let currTest = t; // without this, all test calls get the last test
+    test(currTest.name, () => {
+      let result: LexNode = deparent(new Parser(currTest.input.join('\n')).parse());
+      process.stdout.write(Object.entries(result).toString());
+
+      assert.deepStrictEqual(result, currTest.output);
+    });
+  }
+});
--- a/src/test/util.ts
+++ b/src/test/util.ts
@@ -0,0 +1,57 @@
+import * as vscode from 'vscode';
+
+import LexNode from '../pylex/node';
+
+/**
+ * TODO: Eliminate need for me.
+ * Recursively deparents a LexNode tree. Needed
+ * because I wasn't able to iterate the circular parent-child
+ * relationship by hand
+ */
+function deparent(root: null): null;
+function deparent(root: LexNode): LexNode;
+function deparent(root: any): any {
+  if (root === null) {
+    return root;
+  } else {
+    if (root.children() !== null) {
+      return new LexNode(
+        root.label,
+        root.collapsibleState,
+        root.token,
+        root.children()!.map(deparent),
+      );
+    } else {
+      return new LexNode(
+        root.label,
+        root.collapsibleState,
+        root.token,
+        null,
+        null
+      );
+    }
+  }
+}
+
+/**
+ * "Roots" a list of lexNodes to match the parser
+ *
+ * Required to properly test the output of the parser,
+ * since the parent child-relationship can't be modeled
+ * exhaustively otherwise
+ */
+function root(nodes: LexNode[] | null): LexNode {
+  return new LexNode(
+    "root",
+    vscode.TreeItemCollapsibleState.None,
+    null,
+    nodes,
+    null
+  );
+}
+
+
+export {
+  deparent,
+  root,
+};