From d105544596714bce01f8373d3f2b11d797e0ca01 Mon Sep 17 00:00:00 2001 From: Jake Grossman Date: Tue, 26 Oct 2021 12:48:04 -0500 Subject: [PATCH] Integrate Parser (#4) Integrate parser --- .github/workflows/vscode-test.yaml | 26 +++ .vscode/launch.json | 2 +- src/extension.ts | 5 + src/pylex/index.ts | 7 + src/pylex/lexer.ts | 214 ++++++++++++++++++++ src/pylex/node.ts | 82 ++++++++ src/pylex/parser.ts | 133 +++++++++++++ src/pylex/token.ts | 66 +++++++ src/test/runTest.ts | 4 +- src/test/suite/extension.test.ts | 13 -- src/test/{suite => suites}/index.ts | 2 +- src/test/suites/pylex/lexer.test.ts | 279 +++++++++++++++++++++++++++ src/test/suites/pylex/node.test.ts | 121 ++++++++++++ src/test/suites/pylex/parser.test.ts | 239 +++++++++++++++++++++++ src/test/util.ts | 57 ++++++ 15 files changed, 1234 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/vscode-test.yaml create mode 100644 src/pylex/index.ts create mode 100644 src/pylex/lexer.ts create mode 100644 src/pylex/node.ts create mode 100644 src/pylex/parser.ts create mode 100644 src/pylex/token.ts delete mode 100644 src/test/suite/extension.test.ts rename src/test/{suite => suites}/index.ts (99%) create mode 100644 src/test/suites/pylex/lexer.test.ts create mode 100644 src/test/suites/pylex/node.test.ts create mode 100644 src/test/suites/pylex/parser.test.ts create mode 100644 src/test/util.ts diff --git a/.github/workflows/vscode-test.yaml b/.github/workflows/vscode-test.yaml new file mode 100644 index 0000000..3e11b5e --- /dev/null +++ b/.github/workflows/vscode-test.yaml @@ -0,0 +1,26 @@ +name: "@vscode/test-electron" + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build: + strategy: + matrix: + os: [macos-11, ubuntu-latest, windows-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install Node.js + uses: actions/setup-node@v1 + with: + node-version: 10.x + - run: npm ci + - run: xvfb-run -a npm test + if: runner.os == 'Linux' + - run: npm test + if: runner.os != 'Linux' diff --git a/.vscode/launch.json b/.vscode/launch.json index 670d6e6..ff9c50d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -23,7 +23,7 @@ "request": "launch", "args": [ "--extensionDevelopmentPath=${workspaceFolder}", - "--extensionTestsPath=${workspaceFolder}/out/test/suite/index" + "--extensionTestsPath=${workspaceFolder}/out/test/suites/index" ], "outFiles": [ "${workspaceFolder}/out/test/**/*.js" diff --git a/src/extension.ts b/src/extension.ts index c8f6e4b..4f3b129 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -1,9 +1,14 @@ import * as vscode from 'vscode'; +import * as pl from './pylex'; + +let parser: pl.Parser = new pl.Parser(); export function activate(context: vscode.ExtensionContext) { console.log('Congratulations, your extension "mind-reader" is now active!'); vscode.window.showInformationMessage('Mind_Reader is loaded!'); + parser.parse('Beep Boop'); + // Increase Font Scale context.subscriptions.push( vscode.commands.registerCommand('mind-reader.increaseFontScale', () => { diff --git a/src/pylex/index.ts b/src/pylex/index.ts new file mode 100644 index 0000000..720a23b --- /dev/null +++ b/src/pylex/index.ts @@ -0,0 +1,7 @@ +// expose parser by default +export {default as Parser} from './parser'; +export {default as LineToken} from './token'; +export {default as Lexer} from './lexer'; +export {default as LexNode} from './node'; +export {TabInfo as TabInfo} from './token'; + diff --git a/src/pylex/lexer.ts b/src/pylex/lexer.ts new file mode 100644 index 0000000..02f831c --- /dev/null +++ b/src/pylex/lexer.ts @@ -0,0 +1,214 @@ +import { LineToken } from '.'; +import { Symbol, EOFTOKEN, TabInfo } from './token'; + +type Rule = { + pattern: RegExp, + type: Symbol, +}; + +/** + * List of recognition patterns, in order of priority + * The first item is a recognition pattern, used to recognize the token + * the second item is the token type + */ +const rules: Rule[] = [ + { + pattern: /^\s*def\s+(?[a-zA-Z_][a-zA-Z0-9_]*)\(/, + type: Symbol.FUNCTION + }, + { + pattern: /^\s*class\s+(?[a-zA-Z_][a-zA-Z0-9_]*)/, + type: Symbol.CLASS + }, + { + pattern: /^\s*if\s+(?[^:]+):\s*/, + type: Symbol.IF + }, + { + pattern: /^\s*elif\s+(?[^:]+):\s*$/, + type: Symbol.ELIF + }, + { + pattern: /^\s*else\s*:/, + type: Symbol.ELSE + }, + { + pattern: /^\s*for\s+(?[^:]+):\s*$/, + type: Symbol.FOR + }, + { + pattern: /^\s*while\s+(?[^:]+):\s*$/, + type: Symbol.WHILE + }, + { + pattern: /^\s*try\s*:/, + type: Symbol.TRY + }, + { + pattern: /^\s*except(\s*(?[^:]+))?:\s*$/, + type: Symbol.EXCEPT + }, + { + pattern: /^\s*finally\s*:\s*$/, + type: Symbol.FINALLY + }, + { + pattern: /^\s*with\s+(?[^:]+):\s*$/, + type: Symbol.WITH + }, +]; + +/** + * Line-By-Line Lexer + */ +export default class Lexer { + private textLines: string[] = []; // array of text lines + private pos: number = 0; + private _currToken: LineToken = EOFTOKEN; + + /** + * Calculates indentation level for a line. If using soft tabs, + * indent level rounds up (so, tabSize+1 spaces is 2 levels, + * 2*tabSize+1 is 3, etc.) + * + * @param `text` The line of text. + * @param `tabFmt` A tab information descriptor. + * @return The indent of `text` with consideration for `tabFmt`. + */ + static getIndent(text: string, tabFmt: TabInfo): number { + let leadingSpace: number = text.length - text.trimLeft().length; + let indent: number; + if (tabFmt.hard) { + // used tabs + indent = leadingSpace; + } else { + // use spaces + indent = Math.ceil(leadingSpace/tabFmt.size!); + } + + return indent; + } + + /** + * @param `text` The text to lex. + * @param `tabFmt` A tab information descriptor + */ + constructor(text?: string, private tabFmt?: TabInfo) { + // default is 4 wide expanded tabs + this.tabFmt = { + ...{size: 4, hard: false}, + ...tabFmt + }; + + if (text) { + // normalize linefeeds + text = text.replace('\r\n', '\n'); + } + this.restart(text); + } + + /** + * Restart lexer with new text. + * + * @param `text` The new text to lex. + */ + restart(text?: string): void { + this.pos = 0; + this._currToken = EOFTOKEN; // if no input, already on EOFTOKEN + if (text) { + this.textLines = text.split('\n'); + this.next(); // advance to the first token + } + } + + /** + * @return the current {@link LineToken}. + */ + currToken(): LineToken { return this._currToken; } + + /** + * Advance the position in the token stream. + * + * @return The new current token, after advancing + */ + next(): LineToken { + if (this._currToken === EOFTOKEN && this.pos > this.textLines.length) { + throw new Error('Cannot advance past end'); + } + + // Until a LineToken is found, or EOF + while (this.pos < this.textLines.length) { + let line: string = this.textLines[this.pos]; + let indent: number = Lexer.getIndent(line, this.tabFmt!); + let token: LineToken; + for (var r of rules) { + // Does line match pattern? + let match: RegExpMatchArray | null = line.match(r.pattern); + if (match) { + // Yes... + if (match.groups) { + token = new LineToken(r.type, this.pos, indent, match.groups["attr"]); + } else { + token = new LineToken(r.type, this.pos, indent); + } + + this._currToken = token; + this.pos++; + return this.currToken(); + } + } + // No rules matched + + // Skip this line if it is whitespace, comment, or empty + if (/^\s*(#.*)?$/.test(line)) { + this.pos++; + continue; + } + + // This is an INDENT token + token = new LineToken(Symbol.INDENT, this.pos, indent); + this._currToken = token; + this.pos++; + return this.currToken(); + } + + // Didn't return, must be EOF + this._currToken = EOFTOKEN; + this.pos++; + return this.currToken(); + } + + /** + * Move backwards in the token stream + * + * @param `n` The number of positions to retract. + * @return The new current token after retracting. + */ + retract(n: number = 1): LineToken { + if (this.pos - 1 - n < 0) { + // -1 because this.pos is currently on the next token + throw new RangeError('Cannot retract past start'); + } + + if (n <= 0) { + throw new RangeError('Retract distance must be positive'); + } + + if (this.pos - n === 0) { + // just restart + this.pos = 0; + return this.next(); + } + + let c = n + 1; + while (c > 0) { + this.pos--; + while (/^\s*(#.*)?$/.test(this.textLines[this.pos])) { + // Skip empty lines + this.pos--; + } + c--; + } + return this.next(); + } +} diff --git a/src/pylex/node.ts b/src/pylex/node.ts new file mode 100644 index 0000000..a51010f --- /dev/null +++ b/src/pylex/node.ts @@ -0,0 +1,82 @@ +import * as vscode from 'vscode'; + +import LineToken from './token'; + +/** + * A node in a Parse tree. + */ +export default class LexNode extends vscode.TreeItem { + + /** + * @param `label` A human-readable string describing this item + * @param `collapsibleState` {@link TreeItemCollapsibleState} of the tree item. + * @param `token` The token at this node. + * @param `_children` The children in this node's subtree. + * @param `_parent` The parent node of this node. + */ + constructor( + public readonly label: string, + public readonly collapsibleState: vscode.TreeItemCollapsibleState, + public readonly token: LineToken | null, + private _children: LexNode[] | null = null, + private _parent: LexNode | null = null, + ) { + super(label, collapsibleState); + this.tooltip = this.label; + if (this.token && this.token.linenr >= 0) { + this.tooltip += `: ${this.token.linenr+1}`; + } + } + + /** + * @return The children of this node. + */ + children(): LexNode[] | null { + return this._children; + } + + /** + * @return The parent of this node. + */ + parent(): LexNode | null { + return this._parent; + } + + /** + * Adopt child nodes. + * + * @param `child` Array of nodes to adopt. + */ + adopt(children: LexNode[]): void { + let parentedChildren = children.map(c => new LexNode( + c.label, + c.collapsibleState, + c.token, + c.children(), + this + )); + + // Are there any other children? + if (this._children) { + // Yes... + this._children = this._children.concat(children); + } else { + // No.... + this._children = parentedChildren; + } + } + + /** + * Return the root path for this node. + * + * @return A path of parent nodes from this node to the root of the tree. + */ + rootPath(): LexNode[] { + if (this._parent) { + return [new LexNode(this.label, this.collapsibleState, this.token, this._children, this._parent)].concat(this._parent.rootPath()); + } else { + return [new LexNode(this.label, this.collapsibleState, this.token, this._children, this._parent)]; + } + + } +} diff --git a/src/pylex/parser.ts b/src/pylex/parser.ts new file mode 100644 index 0000000..6d3c6d3 --- /dev/null +++ b/src/pylex/parser.ts @@ -0,0 +1,133 @@ +import * as vscode from 'vscode'; + +import { EOFTOKEN, Symbol, TabInfo } from './token'; +import Lexer from './lexer'; +import LexNode from './node'; + +/** + * A parse tree generator + */ +export default class Parser { + private lexer: Lexer; + private currIndent: number; + private root: LexNode; // Root of syntax tree + + /** + * @param `text` Text to parse. + * @param `tabFmt` A tab information descriptor + */ + constructor (private text?: string, private tabFmt?: TabInfo) {} + + /** + * Parse the passed text. + * + * @param `text` Text to parse. If undefined, use current value of `this.text` + * @param `tabFmt` A tab information descriptor + * @return A parse tree representing `text`. + */ + parse(text?: string, tabFmt?: TabInfo): LexNode { + if (text) { + // save text + this.text = text; + } else { + // default to this.text + // this might still be undefined + text = this.text; + } + + if (tabFmt) { + // save tabFmt + this.tabFmt = tabFmt; + } else { + // default to this.tabFmt + // this might still be undefined + tabFmt = this.tabFmt; + } + + // initialize root + this.lexer = new Lexer(this.text, this.tabFmt); + this.root = new LexNode( + "root", + vscode.TreeItemCollapsibleState.None, + null, + null, + null + ); + + // parse children + this.currIndent = 0; + const children = this._parse(this.root); + + if (children.length > 0) { + this.root.adopt(children); + } + return this.root; + } + + private _parse(parent: LexNode | null): LexNode[] { + let children: LexNode[] = []; + while (this.lexer.currToken() !== EOFTOKEN) { + if (this.lexer.currToken().indentLevel < this.currIndent) { + // go up 1 level of recursion at a time to unravel properly + this.currIndent--; + return children; + } else if (this.lexer.currToken().type === Symbol.INDENT) { + // regular code, advance and stay in same block + this.lexer.next(); + continue; + } else { + // new block starts here + const label = this.lexer.currToken().type + (this.lexer.currToken().attr === undefined ? "" : " " + this.lexer.currToken().attr); + let blockRoot = new LexNode( + label, + vscode.TreeItemCollapsibleState.None, + this.lexer.currToken(), + null, + parent + ); + this.lexer.next(); + this.currIndent++; + const blockChildren = this._parse(blockRoot); // Recursively parse all child blocks + if (blockChildren.length > 0) { + blockRoot.adopt(blockChildren); + } + children.push(blockRoot); + } + } + return children; + } + + /** + * Get an array of LexNodes representing the rootpath of LexNodes from the + * passed line number to the root of the document. A list of "this" inside + * "that" inside ... inside the document root. + * + * @param `lineNumber` The line number to query context for. + * @return An array of LexNodes for the root path containing `lineNumber` + */ + context(lineNumber: number): LexNode[] { + if (!this.root.children()) { + return []; + } + + // Returns the LexNode that is the parent + // of the queried line number + let find = (root: LexNode): LexNode | undefined => { + let prevChild: LexNode; + for (var child of root.children()!) { + if (lineNumber < child.token!.linenr) { + if (prevChild!.children()) { + return find(prevChild!); + } else { + return prevChild!; + } + } else { + prevChild = child; + } + } + }; + + let target = find(this.root); + return target!.rootPath(); + } +} diff --git a/src/pylex/token.ts b/src/pylex/token.ts new file mode 100644 index 0000000..a726fb3 --- /dev/null +++ b/src/pylex/token.ts @@ -0,0 +1,66 @@ +/* eslint-disable @typescript-eslint/naming-convention */ +/* ^ allow uppercase enum */ + +/** + * LineToken Symbol Types + */ +export enum Symbol { + FUNCTION = "function", + CLASS = "class", + IF = "if", + ELSE = "else", + ELIF = "elif", + FOR = "for", + WHILE = "while", + TRY = "try", + EXCEPT = "except", + FINALLY = "finally", + WITH = "with", + INDENT = "INDENT", // Indent token, default if not EOF, only contains indent information + EOF = "EOF" +} + +/** + * @typedef {Object} TabInfo + * @prop {number} size // The width of a tab in spaces + * @prop {boolean} hard // Whether to use literal tab characters + */ +export type TabInfo = { + size: number, + hard: boolean, +}; + +/** + * A token for a line in a Python file + */ +export default class LineToken { + + /** + * @param `type` The type of token for this line. + * @param `linenr` The line number (0-indexed) + * @param `indentLevel` The level of indentation. + * @param `attr` Additional item for tokens that might need it. + */ + constructor( + public readonly type: Symbol, + public readonly linenr: number, + public readonly indentLevel: number, + public readonly attr?: any // Any additional things a token might need (class name, control conidition) + ) { } + + /** + * @return A string representation of the token + */ + toString(): string { + return this.type + ", linenr:" + (this.linenr+1) + ", indentLevel: " + this.indentLevel + ", attr: " + this.attr; + } +} + +/** + * The End-Of-File token + * + * EOFTOKEN is returned when `next()` is called + * while the lexer is on the last token in the stream. + */ +const EOFTOKEN = new LineToken(Symbol.EOF, -1, -1); +export { EOFTOKEN }; diff --git a/src/test/runTest.ts b/src/test/runTest.ts index 56239d3..0f1b898 100644 --- a/src/test/runTest.ts +++ b/src/test/runTest.ts @@ -5,10 +5,12 @@ import { runTests } from '@vscode/test-electron'; async function main() { try { // The folder containing package.json + // Passed to `--extensionDevelopmentPath` const extensionDevelopmentPath: string = path.resolve(__dirname, '../../'); // The path to the test runner script - const extensionTestsPath: string = path.resolve(__dirname, './suite/index'); + // Passed to `--extensionTestsPath` + const extensionTestsPath: string = path.resolve(__dirname, './suites/index'); // Download VS Code, unzip it and run the integration test await runTests({ extensionDevelopmentPath, extensionTestsPath }); diff --git a/src/test/suite/extension.test.ts b/src/test/suite/extension.test.ts deleted file mode 100644 index f383478..0000000 --- a/src/test/suite/extension.test.ts +++ /dev/null @@ -1,13 +0,0 @@ -import * as assert from 'assert'; -import * as vscode from 'vscode'; -import { after } from 'mocha'; - -suite('Dummy Test Suite', () => { - after(() => { - vscode.window.showInformationMessage('All tests passed!'); - }); - - test('Dummy Test', () => { - assert.strictEqual(0 === 0, true); - }); -}); diff --git a/src/test/suite/index.ts b/src/test/suites/index.ts similarity index 99% rename from src/test/suite/index.ts rename to src/test/suites/index.ts index 088913c..087487b 100644 --- a/src/test/suite/index.ts +++ b/src/test/suites/index.ts @@ -35,4 +35,4 @@ export function run(): Promise { } }); }); -} +} \ No newline at end of file diff --git a/src/test/suites/pylex/lexer.test.ts b/src/test/suites/pylex/lexer.test.ts new file mode 100644 index 0000000..e0ce5a3 --- /dev/null +++ b/src/test/suites/pylex/lexer.test.ts @@ -0,0 +1,279 @@ +import * as assert from 'assert'; +import * as vscode from 'vscode'; +import { after } from 'mocha'; + +import Lexer from '../../../pylex/lexer'; +import LineToken from '../../../pylex/token'; +import { EOFTOKEN, Symbol } from '../../../pylex/token'; + +suite('Lexer Test Suite', () => { + after(() => { + vscode.window.showInformationMessage('All tests passed!'); + }); + + test('Empty String', () => { + let l: Lexer = new Lexer(undefined); + assert.deepStrictEqual(l.currToken(), EOFTOKEN); + }); + + test('Undefined', () => { + let l: Lexer = new Lexer(''); + assert.deepStrictEqual(l.currToken(), EOFTOKEN); + }); + + test('Whitespace', () => { + let l: Lexer = new Lexer(' \t\t'.repeat(4).repeat(4)); + assert.deepStrictEqual(l.currToken(), EOFTOKEN); + }); + + test('Non-Whitespace with no construct', () => { + let l: Lexer = new Lexer('foobar'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.INDENT, 0, 0)); + }); + + test('getIndent() accuracy, spaces', () => { + for (var i = 0; i < 100; i++) { + let l: Lexer = new Lexer(' '.repeat(i) + 'foobar'); + assert.strictEqual(l.currToken().indentLevel, i); + } + }); + + test('getIndent() accuracy, tabs', () => { + for (var i = 0; i < 100; i++) { + let l: Lexer = new Lexer('\t'.repeat(i) + 'foobar', {size: 4, hard: true}); + assert.strictEqual(l.currToken().indentLevel, i); + } + }); + + test('getIndent() accuracy, spaces with incomplete tab', () => { + for (var i = 0; i < 100; i++) { + for (var j = 1; j <= 3; j++) { + let l: Lexer = new Lexer(' '.repeat(i) + ' '.repeat(j) + 'foobar', {size: 4, hard: false}); + assert.strictEqual(l.currToken().indentLevel, i+1); + } + } + }); + + test('class definition', () => { + let l: Lexer = new Lexer('class Foobar(object):'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.CLASS, 0, 0, 'Foobar')); + }); + + test('function definition', () => { + let l: Lexer = new Lexer('def Barbaz(this, that, andTheOther):'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.FUNCTION, 0, 0, 'Barbaz')); + }); + + test('if statement', () => { + let l: Lexer = new Lexer('if True and bar == baz:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'True and bar == baz')); + }); + + test('elif statement', () => { + let l: Lexer = new Lexer('elif name == "bar" and True:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.ELIF, 0, 0, 'name == "bar" and True')); + }); + + test('else statement', () => { + let l: Lexer = new Lexer('else:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.ELSE, 0, 0)); + }); + + test('for loop', () => { + let l: Lexer = new Lexer('for pickle in pickleJars:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.FOR, 0, 0, 'pickle in pickleJars')); + }); + + test('while loop', () => { + let l: Lexer = new Lexer('while numCookies < capacity:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.WHILE, 0, 0, 'numCookies < capacity')); + }); + + test('try statement', () => { + let l: Lexer = new Lexer('try:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.TRY, 0, 0)); + }); + + test('except statement with attr', () => { + let l: Lexer = new Lexer('except NameError:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.EXCEPT, 0, 0, 'NameError')); + }); + + test('except statement with no attr', () => { + let l: Lexer = new Lexer('except:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.EXCEPT, 0, 0)); + }); + + test('finally statement', () => { + let l: Lexer = new Lexer('finally:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.FINALLY, 0, 0)); + }); + + test('with statement', () => { + let l: Lexer = new Lexer('with open(file) as f:'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.WITH, 0, 0, 'open(file) as f')); + }); + + test('restart()', () => { + let l: Lexer = new Lexer('with open(file as f:'); + l.restart('if is_old():'); + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'is_old()')); + }); + + test('next() ignores empty lines', () => { + let lines: string[] = [ + 'if wurst_available():', + '', + ' eat_wurst()' + ]; + let l: Lexer = new Lexer(lines.join('\n')); + + l.next(); + + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.INDENT, 2, 1)); + }); + + test('retract() ignores empty lines', () => { + let lines: string[] = [ + 'if wurst_available():', + '', + ' eat_wurst()' + ]; + let l: Lexer = new Lexer(lines.join('\n')); + + l.next(); + + l.retract(); + + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'wurst_available()')); + }); + + test('next() ignores whitespace lines', () => { + let lines: string[] = [ + 'if wurst_available():', + ' \t \t ', + ' eat_wurst()' + ]; + let l: Lexer = new Lexer(lines.join('\n')); + + l.next(); + + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.INDENT, 2, 1)); + }); + + test('retract() ignores whitespace lines', () => { + let lines: string[] = [ + 'if wurst_available():', + ' \t \t ', + ' eat_wurst()' + ]; + let l: Lexer = new Lexer(lines.join('\n')); + + // Advance to end of input + // Eliminates dependence on next() + // skipping whitespace + do {} while (l.next() !== EOFTOKEN); + + l.retract(); // retract past EOFTOKEn + l.retract(); + + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'wurst_available()')); + }); + + test('next() ignores comment lines', () => { + let lines: string[] = [ + 'if wurst_available():', + ' \t # I hate testing \t', + ' eat_wurst()' + ]; + let l: Lexer = new Lexer(lines.join('\n')); + + l.next(); + + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.INDENT, 2, 1)); + }); + + test('retract() ignores comment lines', () => { + let lines: string[] = [ + 'if wurst_available():', + ' \t # \t', + ' eat_wurst()' + ]; + let l: Lexer = new Lexer(lines.join('\n')); + + // Advance to end of input + // Eliminates dependence on next() + // skipping comment + do {} while (l.next() !== EOFTOKEN); + + l.retract(); // retract past EOFTOKEn + l.retract(); + + assert.deepStrictEqual(l.currToken(), new LineToken(Symbol.IF, 0, 0, 'wurst_available()')); + }); + + test('next() out of range', () => { + let l: Lexer = new Lexer('foo = zaboomafoo'); + l.next(); + assert.throws(() => l.next()); + }); + + test('retract() out of range', () => { + let l: Lexer = new Lexer('next_token = lexer.next()'); + assert.throws(() => l.retract()); + }); + + test('retract() validate argument', () => { + let l: Lexer = new Lexer(); + + // Negative + assert.throws(() => l.retract(-1)); + + // Zero, it doesn't make sense to retract 0 :P + assert.throws(() => l.retract(0)); + + }); + + test('retract() 1-100', () => { + let lines: string[] = Array.from(Array(100), (_, i) => 'line' + i); + let reference: LineToken[] = lines.map((_, i) => { + return new LineToken(Symbol.INDENT, i, 0); + }); + + for (var i = 0; i < 100; i++) { + let l: Lexer = new Lexer(lines.join('\n')); + + // advance to EOF + do {} while (l.next() !== EOFTOKEN); + + // try retract + l.retract(i+1); + + assert.deepStrictEqual(l.currToken(), reference[99-i]); + } + }); + + test('2 full lex and retract passes', () => { + let lines: string[] = Array.from(Array(100), (_, i)=> 'line' + i); + let reference: LineToken[] = lines.map((_, i) => { + return new LineToken(Symbol.INDENT, i, 0); + }); + + let l: Lexer = new Lexer(lines.join('\n')); + + // Twice + for (var _ of [0,1]) { + // advance to EOF + for (var i = 0; i < lines.length; i++) { + assert.deepStrictEqual(l.currToken(), reference[i]); + l.next(); + } + + // retract to start + for (var i = lines.length - 1; i >= 0; i--) { + l.retract(); + assert.deepStrictEqual(l.currToken(), reference[i]); + } + } + }); +}); diff --git a/src/test/suites/pylex/node.test.ts b/src/test/suites/pylex/node.test.ts new file mode 100644 index 0000000..85e7fec --- /dev/null +++ b/src/test/suites/pylex/node.test.ts @@ -0,0 +1,121 @@ +import * as assert from 'assert'; +import * as vscode from 'vscode'; +import { after } from 'mocha'; +import { deparent } from '../../util'; + +import LineToken from '../../../pylex/token'; +import { Symbol } from '../../../pylex/token'; +import LexNode from '../../../pylex/node'; + +suite('LexNode Test Suite', () => { + after(() => { + vscode.window.showInformationMessage('All tests passed!'); + }); + + test('children() of leaf', () => { + let n: LexNode = new LexNode('leafLexNode', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.INDENT, 0, 0)); + assert.strictEqual(n.children(), null); + }); + + test('children() of internal node', () => { + let children: LexNode[] = [ + new LexNode('leafLexNode1', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 2, 1)), + new LexNode('leafLexNode2', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 3, 1)), + new LexNode('leafLexNode3', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 4, 1)), + new LexNode('leafLexNode4', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 5, 1)), + new LexNode('leafLexNode5', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 6, 1)) + ]; + + let parent: LexNode = new LexNode( + 'internalLexNode', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.FUNCTION, 0, 0, 'foobar'), + children + ); + + assert.notStrictEqual(parent.children(), null); + assert.notStrictEqual(parent.children(), []); + assert.strictEqual(parent.children()!.length, children.length); + for (var i = 0; i < children.length; i++) {} + assert.strictEqual(parent.children()![i], children[i]); + } + ); + + test('adopt() to empty', () => { + let children: LexNode[] = [ + new LexNode('leafLexNode1', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 2, 1)), + new LexNode('leafLexNode2', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 3, 1)), + new LexNode('leafLexNode3', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 4, 1)), + new LexNode('leafLexNode4', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 5, 1)), + new LexNode('leafLexNode5', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 6, 1)) + ]; + + let testParent: LexNode = new LexNode( + 'internalLexNode', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.FUNCTION, 1, 0, 'foobar') + ); + + let referenceParent: LexNode = new LexNode( + 'internalLexNode', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.FUNCTION, 1, 0, 'foobar'), + children + ); + + // parentify reference childdren + referenceParent = new LexNode( + referenceParent.label, + referenceParent.collapsibleState, + referenceParent.token, + referenceParent.children()!.map(c => new LexNode(c.label, c.collapsibleState, c.token, null, referenceParent)) + ); + + testParent.adopt(children); + + assert.deepStrictEqual(deparent(testParent), deparent(referenceParent)); + }); + + test('adopt() to !empty', () => { + let children1: LexNode[] = [ + new LexNode('leafLexNode1', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 2, 1)), + ]; + + let children2: LexNode[] = [ + new LexNode('leafLexNode2', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 3, 1)), + new LexNode('leafLexNode3', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 4, 1)), + ]; + + let testParent: LexNode = new LexNode( + 'internalLexNode', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.FUNCTION, 1, 0, 'foobar'), + children1, + ); + + let referenceParent: LexNode = new LexNode( + 'internalLexNode', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.FUNCTION, 1, 0, 'foobar'), + children1.concat(children2), + ); + + testParent.adopt(children2); + + assert.deepStrictEqual(deparent(testParent), deparent(referenceParent)); + }); + + test('tooltip without line number', () => { + let testTooltip: string | vscode.MarkdownString | undefined = new LexNode('leafLexNode', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, -1, -11)).tooltip; + let referenceTooltip: string = "leafLexNode"; + assert.notStrictEqual(testTooltip, undefined); + assert.strictEqual(testTooltip, referenceTooltip); + }); + + test('tooltip with line number', () => { + let testTooltip: string | vscode.MarkdownString | undefined = new LexNode('leafLexNode', vscode.TreeItemCollapsibleState.None, new LineToken(Symbol.WHILE, 6, 1)).tooltip; + let referenceTooltip: string = "leafLexNode: 7"; // 7 because it's 0 indexed in memory, but editor lines start at 1 + assert.notStrictEqual(testTooltip, undefined); + assert.strictEqual(testTooltip, referenceTooltip); + }); +}); diff --git a/src/test/suites/pylex/parser.test.ts b/src/test/suites/pylex/parser.test.ts new file mode 100644 index 0000000..a11271b --- /dev/null +++ b/src/test/suites/pylex/parser.test.ts @@ -0,0 +1,239 @@ +import * as assert from 'assert'; +import * as vscode from 'vscode'; +import { after } from 'mocha'; +import { deparent, root } from '../../util'; + +import Parser from '../../../pylex/parser'; +import LexNode from '../../../pylex/node'; +import LineToken from '../../../pylex/token'; +import { Symbol } from '../../../pylex/token'; + +type ParserTest = { + name: string, + input: string[], + output: LexNode, +}; + +const tests: ParserTest[] = [ + { + name: 'No Input', + input: [ ], + output: root(null), + }, + + { + name: 'Single line without construct', + input: [ 'foo = "Yellow M&Ms make me angry >:(' ], + output: root(null), + }, + + { + name: 'Single line with construct', + input: [ 'for x of y:' ], + output: root([ + new LexNode( + 'for x of y', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.FOR, 0, 0, 'x of y') + ) + ]), + }, + + { + name: 'Sequential lines, without construct', + input: [ + 'bar = "Blue M&Ms make me happy <:)"', + 'reba = "A hard working gal"' + ], + output: root(null), + }, + + { + name: 'Sequential lines, with, then without construct', + input: [ + 'if radioshack:', + ' print radioshack.hours', + 'billy = "Scrubbly Bubbles!"' + ], + output: root([ + new LexNode('if radioshack', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 0, 0, 'radioshack')) + ]) + }, + + { + name: 'Sequential lines, without, then with construct', + input: [ + 'billy = "Scrubbly Bubbles!"', + 'if radioshack:', + ' print radioshack.hours' + ], + output: root([ + new LexNode('if radioshack', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 1, 0, 'radioshack')) + ]) + }, + + { + name: 'Sequential lines with constructs', + input: [ + 'if yummy:', + ' print("HOoray!")', + 'elif just_ok:', + ' print("Do you have anything else?")', + 'else:', + ' print("You really eat this?")', + ], + output: root([ + new LexNode('if yummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 0, 0, 'yummy')), + new LexNode('elif just_ok', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.ELIF, 2, 0, 'just_ok')), + new LexNode('else', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.ELSE, 4, 0)), + ]) + }, + + { + name: 'Singly Nested Block', + input: [ + 'if yummy:', + ' if in_my_tummy:', + ' exclaim("Scrumdiddlyumptious!")' + ], + output: root([ + new LexNode('if yummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 0, 0, 'yummy'), + [ + new LexNode('if in_my_tummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 1, 1, 'in_my_tummy')) + ] + ) + ]) + }, + + { + name: 'Singly Nested Block, then Block', + input: [ + 'if yummy:', + ' if in_my_tummy:', + ' exclaim("Scrumdiddlyumptious!")', + 'else:', + ' exclaim("DAESGUSTEN~)"' + ], + output: root([ + new LexNode('if yummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 0, 0, 'yummy'), + [ + new LexNode('if in_my_tummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 1, 1, 'in_my_tummy')) + ] + ), + new LexNode('else', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.ELSE, 3, 0), + ) + ]) + }, + + { + name: 'Doubly Nested Block', + input: [ + 'if yummy:', + ' if in_my_tummy:', + ' if looks_like_a_mummy:', + ' print("you have a spot on your tummy"', + 'else:', + ' print("Food is food...")' + ], + output: root([ + new LexNode('if yummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 0, 0, 'yummy'), + [ + new LexNode('if in_my_tummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 1, 1, 'in_my_tummy'), + [ + new LexNode('if looks_like_a_mummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 2, 2, 'looks_like_a_mummy')) + ] + ) + ] + ), + new LexNode('else', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.ELSE, 4, 0), + ) + ]) + }, + + { + name: 'Doubly Nested Block, with multiple indent resets', + input: [ + 'if yummy:', + ' if in_my_tummy:', + ' if looks_like_a_mummy:', + ' print("you have a spot on your tummy"', + ' else:', + ' print("eek! a zombie!)', + ' elif in_my_mouth:', + ' print("ill be in my tummy soon!"', + 'else:', + ' print("Food is food...")' + ], + output: root([ + new LexNode('if yummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 0, 0, 'yummy'), + [ + new LexNode('if in_my_tummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 1, 1, 'in_my_tummy'), + [ + new LexNode('if looks_like_a_mummy', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.IF, 2, 2, 'looks_like_a_mummy')), + new LexNode('else', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.ELSE, 4, 2)) + ] + ), + new LexNode('elif in_my_mouth', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.ELIF, 6, 1, 'in_my_mouth')) + ] + ), + new LexNode('else', + vscode.TreeItemCollapsibleState.None, + new LineToken(Symbol.ELSE, 8, 0) + ) + ]) + } +]; + +suite('Parser Test Suite', () => { + after(() => { + vscode.window.showInformationMessage('All tests passed!'); + }); + + for (var t of tests) { + let currTest = t; // without this, all test calls get the last test + test(currTest.name, () => { + let result: LexNode = deparent(new Parser(currTest.input.join('\n')).parse()); + process.stdout.write(Object.entries(result).toString()); + + assert.deepStrictEqual(result, currTest.output); + }); + } +}); diff --git a/src/test/util.ts b/src/test/util.ts new file mode 100644 index 0000000..8967c81 --- /dev/null +++ b/src/test/util.ts @@ -0,0 +1,57 @@ +import * as vscode from 'vscode'; + +import LexNode from '../pylex/node'; + +/** + * TODO: Eliminate need for me. + * Recursively deparents a LexNode tree. Needed + * because I wasn't able to iterate the circular parent-child + * relationship by hand + */ +function deparent(root: null): null; +function deparent(root: LexNode): LexNode; +function deparent(root: any): any { + if (root === null) { + return root; + } else { + if (root.children() !== null) { + return new LexNode( + root.label, + root.collapsibleState, + root.token, + root.children()!.map(deparent), + ); + } else { + return new LexNode( + root.label, + root.collapsibleState, + root.token, + null, + null + ); + } + } +} + +/** + * "Roots" a list of lexNodes to match the parser + * + * Required to properly test the output of the parser, + * since the parent child-relationship can't be modeled + * exhaustively otherwise + */ +function root(nodes: LexNode[] | null): LexNode { + return new LexNode( + "root", + vscode.TreeItemCollapsibleState.None, + null, + nodes, + null + ); +} + + +export { + deparent, + root, +};