mirror of
				https://github.com/We-Dont-Byte/Mind_Reader.git
				synced 2025-02-04 10:38:42 +00:00 
			
		
		
		
	added 2 functions
added functions getLeadingSpacesByArithmetic and getLeadingSpacesByIndex to facilitate functionality of finding the number of leading spaces via two methods.
This commit is contained in:
		| @@ -1,71 +1,79 @@ | |||||||
| import { LineToken } from '.'; | "use strict"; | ||||||
| import { Symbol, EOFTOKEN, TabInfo } from './token'; | Object.defineProperty(exports, "__esModule", { value: true }); | ||||||
|  | const _1 = require("."); | ||||||
| type Rule = { | const token_1 = require("./token"); | ||||||
|   pattern: RegExp, |  | ||||||
|   type: Symbol, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * List of recognition patterns, in order of priority |  * List of recognition patterns, in order of priority | ||||||
|  * The first item is a recognition pattern, used to recognize the token |  * The first item is a recognition pattern, used to recognize the token | ||||||
|  * the second item is the token type |  * the second item is the token type | ||||||
|  */ |  */ | ||||||
| const rules: Rule[] = [ | const rules = [ | ||||||
|     { |     { | ||||||
|         pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/, |         pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/, | ||||||
|     type: Symbol.FUNCTION |         type: token_1.Symbol.FUNCTION | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/, |         pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/, | ||||||
|     type: Symbol.CLASS |         type: token_1.Symbol.CLASS | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*if\s+(?<attr>[^:]+):\s*/, |         pattern: /^\s*if\s+(?<attr>[^:]+):\s*/, | ||||||
|     type: Symbol.IF |         type: token_1.Symbol.IF | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/, |         pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/, | ||||||
|     type: Symbol.ELIF |         type: token_1.Symbol.ELIF | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*else\s*:/, |         pattern: /^\s*else\s*:/, | ||||||
|     type: Symbol.ELSE |         type: token_1.Symbol.ELSE | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/, |         pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/, | ||||||
|     type: Symbol.FOR |         type: token_1.Symbol.FOR | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/, |         pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/, | ||||||
|     type: Symbol.WHILE |         type: token_1.Symbol.WHILE | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*try\s*:/, |         pattern: /^\s*try\s*:/, | ||||||
|     type: Symbol.TRY |         type: token_1.Symbol.TRY | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/, |         pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/, | ||||||
|     type: Symbol.EXCEPT |         type: token_1.Symbol.EXCEPT | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*finally\s*:\s*$/, |         pattern: /^\s*finally\s*:\s*$/, | ||||||
|     type: Symbol.FINALLY |         type: token_1.Symbol.FINALLY | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
|         pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/, |         pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/, | ||||||
|     type: Symbol.WITH |         type: token_1.Symbol.WITH | ||||||
|     }, |     }, | ||||||
| ]; | ]; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Line-By-Line Lexer |  * Line-By-Line Lexer | ||||||
|  */ |  */ | ||||||
| export default class Lexer { | class Lexer { | ||||||
|   private textLines: string[] = []; // array of text lines |     /** | ||||||
|   private pos: number = 0; |      * @param `text` The text to lex. | ||||||
|   private _currToken: LineToken = EOFTOKEN; |      * @param `tabFmt` A tab information descriptor | ||||||
|  |      */ | ||||||
|  |     constructor(text, tabFmt) { | ||||||
|  |         this.tabFmt = tabFmt; | ||||||
|  |         this.textLines = []; // array of text lines | ||||||
|  |         this.pos = 0; | ||||||
|  |         this._currToken = token_1.EOFTOKEN; | ||||||
|  |         // default is 4 wide expanded tabs | ||||||
|  |         this.tabFmt = Object.assign({ size: 4, hard: false }, tabFmt); | ||||||
|  |         if (text) { | ||||||
|  |             // normalize linefeeds | ||||||
|  |             text = text.replace('\r\n', '\n'); | ||||||
|  |         } | ||||||
|  |         this.restart(text); | ||||||
|  |     } | ||||||
|     /** |     /** | ||||||
|      * Calculates indentation level for a line. If using soft tabs, |      * Calculates indentation level for a line. If using soft tabs, | ||||||
|      * indent level rounds up (so, tabSize+1 spaces is 2 levels, |      * indent level rounds up (so, tabSize+1 spaces is 2 levels, | ||||||
| @@ -75,132 +83,130 @@ export default class Lexer { | |||||||
|      * @param `tabFmt` A tab information descriptor. |      * @param `tabFmt` A tab information descriptor. | ||||||
|      * @return The indent of `text` with consideration for `tabFmt`. |      * @return The indent of `text` with consideration for `tabFmt`. | ||||||
|      */ |      */ | ||||||
|   static getIndent(text: string, tabFmt: TabInfo): number { |     static getIndent(text, tabFmt) { | ||||||
|     let leadingSpace: number = text.length - text.trimLeft().length; |         let leadingSpace = text.length - text.trimLeft().length; | ||||||
|     let indent: number; |         let indent; | ||||||
|         if (tabFmt.hard) { |         if (tabFmt.hard) { | ||||||
|             // used tabs |             // used tabs | ||||||
|             indent = leadingSpace; |             indent = leadingSpace; | ||||||
|     } else { |  | ||||||
|       // use spaces |  | ||||||
|       indent = Math.ceil(leadingSpace/tabFmt.size!); |  | ||||||
|         } |         } | ||||||
|  |         else { | ||||||
|  |             // use spaces | ||||||
|  |             indent = Math.ceil(leadingSpace / tabFmt.size); | ||||||
|  |         } | ||||||
|         return indent; |         return indent; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|    * @param `text` The text to lex. |      * Calculates leading spaces for a line.  | ||||||
|    * @param `tabFmt` A tab information descriptor |      * This method uses arithmetic to calculate the number of leading spaces | ||||||
|  |      *   | ||||||
|  |      * @param `text` The line of text. | ||||||
|  |      * @return The number of leading spaces of `text`. | ||||||
|      */ |      */ | ||||||
|   constructor(text?: string, private tabFmt?: TabInfo) { |     static getLeadingSpacesByArithmetic(textLine) { | ||||||
|     // default is 4 wide expanded tabs |         const leadingSpaces = textLine.text.length - textLine.text.trimStart().length; | ||||||
|     this.tabFmt = { |  | ||||||
|       ...{size: 4, hard: false}, |  | ||||||
|       ...tabFmt |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     if (text) { |         return leadingSpaces; | ||||||
|       // normalize linefeeds |  | ||||||
|       text = text.replace('\r\n', '\n'); |  | ||||||
|     } |  | ||||||
|     this.restart(text); |  | ||||||
|     } |     } | ||||||
|  |     /** | ||||||
|  |      * Calculates leading spaces for a line.  | ||||||
|  |      * This method finds the index position of the first non-whitespace character | ||||||
|  |      * Since the index is built using a 0-index, the position of this character | ||||||
|  |      * will equal the number of spaces preceding the character. | ||||||
|  |      *   | ||||||
|  |      * @param `text` The line of text. | ||||||
|  |      * @return The number of leading spaces of `text` with respect to the index position of the first non-whitespace character. | ||||||
|  |      */ | ||||||
|  |     static getLeadingSpacesByIndex(textLine) { | ||||||
|  |         const indexNum = textLine.firstNonWhitespaceCharacterIndex; | ||||||
|  |  | ||||||
|  |         return indexNum; | ||||||
|  |     } | ||||||
|     /** |     /** | ||||||
|      * Restart lexer with new text. |      * Restart lexer with new text. | ||||||
|      * |      * | ||||||
|      * @param `text` The new text to lex. |      * @param `text` The new text to lex. | ||||||
|      */ |      */ | ||||||
|   restart(text?: string): void { |     restart(text) { | ||||||
|         this.pos = 0; |         this.pos = 0; | ||||||
|     this._currToken = EOFTOKEN; // if no input, already on EOFTOKEN |         this._currToken = token_1.EOFTOKEN; // if no input, already on EOFTOKEN | ||||||
|         if (text) { |         if (text) { | ||||||
|             this.textLines = text.split('\n'); |             this.textLines = text.split('\n'); | ||||||
|             this.next(); // advance to the first token |             this.next(); // advance to the first token | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * @return the current {@link LineToken}. |      * @return the current {@link LineToken}. | ||||||
|      */ |      */ | ||||||
|   currToken(): LineToken { return this._currToken; } |     currToken() { return this._currToken; } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Advance the position in the token stream. |      * Advance the position in the token stream. | ||||||
|      * |      * | ||||||
|      * @return The new current token, after advancing |      * @return The new current token, after advancing | ||||||
|      */ |      */ | ||||||
|   next(): LineToken { |     next() { | ||||||
|     if (this._currToken === EOFTOKEN && this.pos > this.textLines.length) { |         if (this._currToken === token_1.EOFTOKEN && this.pos > this.textLines.length) { | ||||||
|             throw new Error('Cannot advance past end'); |             throw new Error('Cannot advance past end'); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Until a LineToken is found, or EOF |         // Until a LineToken is found, or EOF | ||||||
|         while (this.pos < this.textLines.length) { |         while (this.pos < this.textLines.length) { | ||||||
|       let line: string = this.textLines[this.pos]; |             let line = this.textLines[this.pos]; | ||||||
|       let indent: number = Lexer.getIndent(line, this.tabFmt!); |             let indent = Lexer.getIndent(line, this.tabFmt); | ||||||
|       let token: LineToken; |             let token; | ||||||
|             for (var r of rules) { |             for (var r of rules) { | ||||||
|                 // Does line match pattern? |                 // Does line match pattern? | ||||||
|         let match: RegExpMatchArray | null = line.match(r.pattern); |                 let match = line.match(r.pattern); | ||||||
|                 if (match) { |                 if (match) { | ||||||
|                     // Yes... |                     // Yes... | ||||||
|                     if (match.groups) { |                     if (match.groups) { | ||||||
|             token = new LineToken(r.type, this.pos, indent, match.groups["attr"]); |                         token = new _1.LineToken(r.type, this.pos, indent, match.groups["attr"]); | ||||||
|           } else { |                     } | ||||||
|             token = new LineToken(r.type, this.pos, indent); |                     else { | ||||||
|  |                         token = new _1.LineToken(r.type, this.pos, indent); | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
|                     this._currToken = token; |                     this._currToken = token; | ||||||
|                     this.pos++; |                     this.pos++; | ||||||
|                     return this.currToken(); |                     return this.currToken(); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             // No rules matched |             // No rules matched | ||||||
|  |  | ||||||
|             // TODO: move to rules |             // TODO: move to rules | ||||||
|             if (/^\s*(#.*)?$/.test(line)) { |             if (/^\s*(#.*)?$/.test(line)) { | ||||||
|                 // "empty" line |                 // "empty" line | ||||||
|         token = new LineToken(Symbol.EMPTY, this.pos, 999999); |                 token = new _1.LineToken(token_1.Symbol.EMPTY, this.pos, 999999); | ||||||
|       } else { |             } | ||||||
|         // This is an INDENT token |             else { | ||||||
|         token = new LineToken(Symbol.INDENT, this.pos, indent); |                 // This is an INDENT token | ||||||
|  |                 token = new _1.LineToken(token_1.Symbol.INDENT, this.pos, indent); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             this._currToken = token; |             this._currToken = token; | ||||||
|             this.pos++; |             this.pos++; | ||||||
|             return this.currToken(); |             return this.currToken(); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Didn't return, must be EOF |         // Didn't return, must be EOF | ||||||
|     this._currToken = EOFTOKEN; |         this._currToken = token_1.EOFTOKEN; | ||||||
|         this.pos++; |         this.pos++; | ||||||
|         return this.currToken(); |         return this.currToken(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Move backwards in the token stream |      * Move backwards in the token stream | ||||||
|      * |      * | ||||||
|      * @param `n` The number of positions to retract. |      * @param `n` The number of positions to retract. | ||||||
|      * @return The new current token after retracting. |      * @return The new current token after retracting. | ||||||
|      */ |      */ | ||||||
|   retract(n: number = 1): LineToken { |     retract(n = 1) { | ||||||
|         if (this.pos - 1 - n < 0) { |         if (this.pos - 1 - n < 0) { | ||||||
|             // -1 because this.pos is currently on the next token |             // -1 because this.pos is currently on the next token | ||||||
|             throw new RangeError('Cannot retract past start'); |             throw new RangeError('Cannot retract past start'); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (n <= 0) { |         if (n <= 0) { | ||||||
|             throw new RangeError('Retract distance must be positive'); |             throw new RangeError('Retract distance must be positive'); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (this.pos - n === 0) { |         if (this.pos - n === 0) { | ||||||
|             // just restart |             // just restart | ||||||
|             this.pos = 0; |             this.pos = 0; | ||||||
|             return this.next(); |             return this.next(); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let c = n + 1; |         let c = n + 1; | ||||||
|         while (c > 0) { |         while (c > 0) { | ||||||
|             this.pos--; |             this.pos--; | ||||||
| @@ -213,3 +219,5 @@ export default class Lexer { | |||||||
|         return this.next(); |         return this.next(); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | exports.default = Lexer; | ||||||
|  | //# sourceMappingURL=lexer.js.map | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user