From a619146afc9b0fd80e7a5587e1dcb5e36662be68 Mon Sep 17 00:00:00 2001
From: tel0065 <77864718+tel0065@users.noreply.github.com>
Date: Fri, 25 Mar 2022 10:44:37 -0500
Subject: [PATCH] lexter.ts - added types

---
 src/pylex/lexer.ts | 217 ++++++++++++++++++++++++++-------------------
 1 file changed, 124 insertions(+), 93 deletions(-)
diff --git a/src/pylex/lexer.ts b/src/pylex/lexer.ts
index 5fc4824..9b8e8cd 100644
--- a/src/pylex/lexer.ts
+++ b/src/pylex/lexer.ts
@@ -1,216 +1,195 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", {
-    value: true
-});
-const _1 = require(".");
-const token_1 = require("./token");
+import { LineToken } from '.';
+import { Symbol, EOFTOKEN, TabInfo } from './token';
+
+type Rule = {
+    pattern: RegExp,
+    type: Symbol,
+};
+
 /**
  * List of recognition patterns, in order of priority
  * The first item is a recognition pattern, used to recognize the token
  * the second item is the token type
  */
-const rules = [
+const rules: Rule[] = [
     {
         pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/,
-        type: token_1.Symbol.FUNCTION
+        type: Symbol.FUNCTION
     },
     {
         pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/,
-        type: token_1.Symbol.CLASS
+        type: Symbol.CLASS
     },
     {
         pattern: /^\s*if\s+(?<attr>[^:]+):\s*/,
-        type: token_1.Symbol.IF
+        type: Symbol.IF
     },
     {
         pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/,
-        type: token_1.Symbol.ELIF
+        type: Symbol.ELIF
     },
     {
         pattern: /^\s*else\s*:/,
-        type: token_1.Symbol.ELSE
+        type: Symbol.ELSE
     },
     {
         pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/,
-        type: token_1.Symbol.FOR
+        type: Symbol.FOR
     },
     {
         pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/,
-        type: token_1.Symbol.WHILE
+        type: Symbol.WHILE
     },
     {
         pattern: /^\s*try\s*:/,
-        type: token_1.Symbol.TRY
+        type: Symbol.TRY
     },
     {
         pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/,
-        type: token_1.Symbol.EXCEPT
+        type: Symbol.EXCEPT
     },
     {
         pattern: /^\s*finally\s*:\s*$/,
-        type: token_1.Symbol.FINALLY
+        type: Symbol.FINALLY
     },
     {
         pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/,
-        type: token_1.Symbol.WITH
+        type: Symbol.WITH
     },
 ];
+
 /**
  * Line-By-Line Lexer
  */
-class Lexer {
+export default class Lexer {
+    private textLines: string[] = []; // array of text lines
+    private pos: number = 0;
+    private _currToken: LineToken = EOFTOKEN;
+
     /**
      * @param `text` The text to lex.
      * @param `tabFmt` A tab information descriptor
      */
-    constructor(text, tabFmt) {
-        this.tabFmt = tabFmt;
-        this.textLines = []; // array of text lines
-        this.pos = 0;
-        this._currToken = token_1.EOFTOKEN;
+    constructor(text ? : string, private tabFmt ? : TabInfo) {
         // default is 4 wide expanded tabs
-        this.tabFmt = Object.assign({
-            size: 4,
-            hard: false
-        }, tabFmt);
+        this.tabFmt = {
+            ...{
+                size: 4,
+                hard: false
+            },
+            ...tabFmt
+        };
+
         if (text) {
             // normalize linefeeds
             text = text.replace('\r\n', '\n');
         }
         this.restart(text);
     }
-    /**
-     * Calculates indentation level for a line. If using soft tabs,
-     * indent level rounds up (so, tabSize+1 spaces is 2 levels,
-     * 2*tabSize+1 is 3, etc.)
-     *
-     * @param `text` The line of text.
-     * @param `tabFmt` A tab information descriptor.
-     * @return The indent of `text` with consideration for `tabFmt`.
-     */
-    static getIndent(text, tabFmt) {
-        let leadingSpace = text.length - text.trimLeft().length;
-        let indent;
-        if (tabFmt.hard) {
-            // used tabs
-            indent = leadingSpace;
-        } else {
-            // use spaces
-            indent = Math.ceil(leadingSpace / tabFmt.size);
-        }
-        return indent;
-    }
-    /**
-     * Calculates leading spaces for a line. 
-     * This method uses arithmetic to calculate the number of leading spaces
-     *  
-     * @param `text` The line of text.
-     * @return The number of leading spaces of `text`.
-     */
-    static getLeadingSpacesByArithmetic(textLine) {
-        const leadingSpaces = textLine.text.length - textLine.text.trimStart().length;
 
-        return leadingSpaces;
-    }
-    /**
-     * Calculates leading spaces for a line. 
-     * This method finds the index position of the first non-whitespace character
-     * Since the index is built using a 0-index, the position of this character
-     * will equal the number of spaces preceding the character.
-     *  
-     * @param `text` The line of text.
-     * @return The number of leading spaces of `text` with respect to the index position of the first non-whitespace character.
-     */
-    static getLeadingSpacesByIndex(textLine) {
-        const indexNum = textLine.firstNonWhitespaceCharacterIndex;
-
-        return indexNum;
-    }
     /**
      * Restart lexer with new text.
      *
      * @param `text` The new text to lex.
      */
-    restart(text) {
+    restart(text ? : string): void {
         this.pos = 0;
-        this._currToken = token_1.EOFTOKEN; // if no input, already on EOFTOKEN
+        this._currToken = EOFTOKEN; // if no input, already on EOFTOKEN
+        
         if (text) {
             this.textLines = text.split('\n');
             this.next(); // advance to the first token
         }
     }
+
     /**
      * @return the current {@link LineToken}.
      */
-    currToken() {
+    currToken(): LineToken {
         return this._currToken;
     }
+
     /**
      * Advance the position in the token stream.
      *
      * @return The new current token, after advancing
      */
-    next() {
-        if (this._currToken === token_1.EOFTOKEN && this.pos > this.textLines.length) {
+    next(): LineToken {
+        if (this._currToken === EOFTOKEN && this.pos > this.textLines.length) {
             throw new Error('Cannot advance past end');
         }
+
         // Until a LineToken is found, or EOF
         while (this.pos < this.textLines.length) {
-            let line = this.textLines[this.pos];
-            let indent = Lexer.getIndent(line, this.tabFmt);
-            let token;
+            let line: string = this.textLines[this.pos];
+            let indent: number = Lexer.getIndent(line, this.tabFmt!);
+            let token: LineToken;
+
             for (var r of rules) {
                 // Does line match pattern?
-                let match = line.match(r.pattern);
+                let match: RegExpMatchArray | null = line.match(r.pattern);
                 if (match) {
                     // Yes...
                     if (match.groups) {
-                        token = new _1.LineToken(r.type, this.pos, indent, match.groups["attr"]);
-                    } else {
-                        token = new _1.LineToken(r.type, this.pos, indent);
+                        token = new LineToken(r.type, this.pos, indent, match.groups["attr"]);
+                    } 
+                    else {
+                        token = new LineToken(r.type, this.pos, indent);
                     }
+
                     this._currToken = token;
                     this.pos++;
+
                     return this.currToken();
                 }
             }
             // No rules matched
+
             // TODO: move to rules
             if (/^\s*(#.*)?$/.test(line)) {
                 // "empty" line
-                token = new _1.LineToken(token_1.Symbol.EMPTY, this.pos, 999999);
-            } else {
+                token = new LineToken(Symbol.EMPTY, this.pos, 999999);
+            } 
+            else {
                 // This is an INDENT token
-                token = new _1.LineToken(token_1.Symbol.INDENT, this.pos, indent);
+                token = new LineToken(Symbol.INDENT, this.pos, indent);
             }
+
             this._currToken = token;
             this.pos++;
+
             return this.currToken();
         }
+
         // Didn't return, must be EOF
-        this._currToken = token_1.EOFTOKEN;
+        this._currToken = EOFTOKEN;
         this.pos++;
+
         return this.currToken();
     }
+
     /**
      * Move backwards in the token stream
      *
      * @param `n` The number of positions to retract.
      * @return The new current token after retracting.
      */
-    retract(n = 1) {
+    retract(n: number = 1): LineToken {
         if (this.pos - 1 - n < 0) {
             // -1 because this.pos is currently on the next token
             throw new RangeError('Cannot retract past start');
         }
+
         if (n <= 0) {
             throw new RangeError('Retract distance must be positive');
         }
+
         if (this.pos - n === 0) {
             // just restart
             this.pos = 0;
             return this.next();
         }
+
         let c = n + 1;
         while (c > 0) {
             this.pos--;
@@ -220,8 +199,60 @@ class Lexer {
             }
             c--;
         }
+
         return this.next();
     }
+
+    /**
+     * Calculates indentation level for a line. If using soft tabs,
+     * indent level rounds up (so, tabSize+1 spaces is 2 levels,
+     * 2*tabSize+1 is 3, etc.)
+     *
+     * @param `text` The line of text.
+     * @param `tabFmt` A tab information descriptor.
+     * @return The indent of `text` with consideration for `tabFmt`.
+     */
+    static getIndent(text: string, tabFmt: TabInfo): number {
+        let leadingSpace: number = text.length - text.trimStart().length;
+        let indent: number;
+
+        if (tabFmt.hard) {
+            // used tabs
+            indent = leadingSpace;
+        } 
+        else {
+            // use spaces
+            indent = Math.ceil(leadingSpace / tabFmt.size!);
+        }
+
+        return indent;
+    }
+
+    /**
+     * Calculates leading spaces for a line. 
+     * This method uses arithmetic to calculate the number of leading spaces
+     *  
+     * @param `line` The line of text.
+     * @return The number of leading spaces of `text`.
+     */
+    static getLeadingSpacesByArithmetic(line: any) {
+        const leadingSpaces: number = line.text.length - line.text.trimStart().length;
+
+        return leadingSpaces;
+    }
+
+    /**
+     * Calculates leading spaces for a line. 
+     * This method finds the index position of the first non-whitespace character
+     * Since the index is built using a 0-index, the position of this character
+     * will equal the number of spaces preceding the character.
+     *  
+     * @param `text` The line of text.
+     * @return The number of leading spaces of `text` with respect to the index position of the first non-whitespace character.
+     */
+    static getLeadingSpacesByIndex(text: any) {
+        const indexNum: number = text.firstNonWhitespaceCharacterIndex;
+
+        return indexNum;
+    }
 }
-exports.default = Lexer;
-//# sourceMappingURL=lexer.js.map