mirror of
				https://github.com/We-Dont-Byte/Mind_Reader.git
				synced 2025-02-04 10:38:42 +00:00 
			
		
		
		
	added 2 functions
added functions getLeadingSpacesByArithmetic and getLeadingSpacesByIndex to facilitate functionality of finding the number of leading spaces via two methods.
This commit is contained in:
		| @@ -1,215 +1,223 @@ | ||||
| import { LineToken } from '.'; | ||||
| import { Symbol, EOFTOKEN, TabInfo } from './token'; | ||||
|  | ||||
| type Rule = { | ||||
|   pattern: RegExp, | ||||
|   type: Symbol, | ||||
| }; | ||||
|  | ||||
| "use strict"; | ||||
| Object.defineProperty(exports, "__esModule", { value: true }); | ||||
| const _1 = require("."); | ||||
| const token_1 = require("./token"); | ||||
| /** | ||||
|  * List of recognition patterns, in order of priority | ||||
|  * The first item is a recognition pattern, used to recognize the token | ||||
|  * the second item is the token type | ||||
|  */ | ||||
| const rules: Rule[] = [ | ||||
|   { | ||||
|     pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/, | ||||
|     type: Symbol.FUNCTION | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/, | ||||
|     type: Symbol.CLASS | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*if\s+(?<attr>[^:]+):\s*/, | ||||
|     type: Symbol.IF | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/, | ||||
|     type: Symbol.ELIF | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*else\s*:/, | ||||
|     type: Symbol.ELSE | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/, | ||||
|     type: Symbol.FOR | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/, | ||||
|     type: Symbol.WHILE | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*try\s*:/, | ||||
|     type: Symbol.TRY | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/, | ||||
|     type: Symbol.EXCEPT | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*finally\s*:\s*$/, | ||||
|     type: Symbol.FINALLY | ||||
|   }, | ||||
|   { | ||||
|     pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/, | ||||
|     type: Symbol.WITH | ||||
|   }, | ||||
| const rules = [ | ||||
|     { | ||||
|         pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/, | ||||
|         type: token_1.Symbol.FUNCTION | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/, | ||||
|         type: token_1.Symbol.CLASS | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*if\s+(?<attr>[^:]+):\s*/, | ||||
|         type: token_1.Symbol.IF | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/, | ||||
|         type: token_1.Symbol.ELIF | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*else\s*:/, | ||||
|         type: token_1.Symbol.ELSE | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/, | ||||
|         type: token_1.Symbol.FOR | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/, | ||||
|         type: token_1.Symbol.WHILE | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*try\s*:/, | ||||
|         type: token_1.Symbol.TRY | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/, | ||||
|         type: token_1.Symbol.EXCEPT | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*finally\s*:\s*$/, | ||||
|         type: token_1.Symbol.FINALLY | ||||
|     }, | ||||
|     { | ||||
|         pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/, | ||||
|         type: token_1.Symbol.WITH | ||||
|     }, | ||||
| ]; | ||||
|  | ||||
| /** | ||||
|  * Line-By-Line Lexer | ||||
|  */ | ||||
| export default class Lexer { | ||||
|   private textLines: string[] = []; // array of text lines | ||||
|   private pos: number = 0; | ||||
|   private _currToken: LineToken = EOFTOKEN; | ||||
|  | ||||
|   /** | ||||
|    * Calculates indentation level for a line. If using soft tabs, | ||||
|    * indent level rounds up (so, tabSize+1 spaces is 2 levels, | ||||
|    * 2*tabSize+1 is 3, etc.) | ||||
|    * | ||||
|    * @param `text` The line of text. | ||||
|    * @param `tabFmt` A tab information descriptor. | ||||
|    * @return The indent of `text` with consideration for `tabFmt`. | ||||
|    */ | ||||
|   static getIndent(text: string, tabFmt: TabInfo): number { | ||||
|     let leadingSpace: number = text.length - text.trimLeft().length; | ||||
|     let indent: number; | ||||
|     if (tabFmt.hard) { | ||||
|       // used tabs | ||||
|       indent = leadingSpace; | ||||
|     } else { | ||||
|       // use spaces | ||||
|       indent = Math.ceil(leadingSpace/tabFmt.size!); | ||||
|     } | ||||
|  | ||||
|     return indent; | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|    * @param `text` The text to lex. | ||||
|    * @param `tabFmt` A tab information descriptor | ||||
|    */ | ||||
|   constructor(text?: string, private tabFmt?: TabInfo) { | ||||
|     // default is 4 wide expanded tabs | ||||
|     this.tabFmt = { | ||||
|       ...{size: 4, hard: false}, | ||||
|       ...tabFmt | ||||
|     }; | ||||
|  | ||||
|     if (text) { | ||||
|       // normalize linefeeds | ||||
|       text = text.replace('\r\n', '\n'); | ||||
|     } | ||||
|     this.restart(text); | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|    * Restart lexer with new text. | ||||
|    * | ||||
|    * @param `text` The new text to lex. | ||||
|    */ | ||||
|   restart(text?: string): void { | ||||
|     this.pos = 0; | ||||
|     this._currToken = EOFTOKEN; // if no input, already on EOFTOKEN | ||||
|     if (text) { | ||||
|       this.textLines = text.split('\n'); | ||||
|       this.next(); // advance to the first token | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|    * @return the current {@link LineToken}. | ||||
|    */ | ||||
|   currToken(): LineToken { return this._currToken; } | ||||
|  | ||||
|   /** | ||||
|    * Advance the position in the token stream. | ||||
|    * | ||||
|    * @return The new current token, after advancing | ||||
|    */ | ||||
|   next(): LineToken { | ||||
|     if (this._currToken === EOFTOKEN && this.pos > this.textLines.length) { | ||||
|       throw new Error('Cannot advance past end'); | ||||
|     } | ||||
|  | ||||
|     // Until a LineToken is found, or EOF | ||||
|     while (this.pos < this.textLines.length) { | ||||
|       let line: string = this.textLines[this.pos]; | ||||
|       let indent: number = Lexer.getIndent(line, this.tabFmt!); | ||||
|       let token: LineToken; | ||||
|       for (var r of rules) { | ||||
|         // Does line match pattern? | ||||
|         let match: RegExpMatchArray | null = line.match(r.pattern); | ||||
|         if (match) { | ||||
|           // Yes... | ||||
|           if (match.groups) { | ||||
|             token = new LineToken(r.type, this.pos, indent, match.groups["attr"]); | ||||
|           } else { | ||||
|             token = new LineToken(r.type, this.pos, indent); | ||||
|           } | ||||
|  | ||||
|           this._currToken = token; | ||||
|           this.pos++; | ||||
|           return this.currToken(); | ||||
| class Lexer { | ||||
|     /** | ||||
|      * @param `text` The text to lex. | ||||
|      * @param `tabFmt` A tab information descriptor | ||||
|      */ | ||||
|     constructor(text, tabFmt) { | ||||
|         this.tabFmt = tabFmt; | ||||
|         this.textLines = []; // array of text lines | ||||
|         this.pos = 0; | ||||
|         this._currToken = token_1.EOFTOKEN; | ||||
|         // default is 4 wide expanded tabs | ||||
|         this.tabFmt = Object.assign({ size: 4, hard: false }, tabFmt); | ||||
|         if (text) { | ||||
|             // normalize linefeeds | ||||
|             text = text.replace('\r\n', '\n'); | ||||
|         } | ||||
|       } | ||||
|       // No rules matched | ||||
|  | ||||
|       // TODO: move to rules | ||||
|       if (/^\s*(#.*)?$/.test(line)) { | ||||
|         // "empty" line | ||||
|         token = new LineToken(Symbol.EMPTY, this.pos, 999999); | ||||
|       } else { | ||||
|         // This is an INDENT token | ||||
|         token = new LineToken(Symbol.INDENT, this.pos, indent); | ||||
|       } | ||||
|  | ||||
|       this._currToken = token; | ||||
|       this.pos++; | ||||
|       return this.currToken(); | ||||
|         this.restart(text); | ||||
|     } | ||||
|  | ||||
|     // Didn't return, must be EOF | ||||
|     this._currToken = EOFTOKEN; | ||||
|     this.pos++; | ||||
|     return this.currToken(); | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|    * Move backwards in the token stream | ||||
|    * | ||||
|    * @param `n` The number of positions to retract. | ||||
|    * @return The new current token after retracting. | ||||
|    */ | ||||
|   retract(n: number = 1): LineToken { | ||||
|     if (this.pos - 1 - n < 0) { | ||||
|       // -1 because this.pos is currently on the next token | ||||
|       throw new RangeError('Cannot retract past start'); | ||||
|     /** | ||||
|      * Calculates indentation level for a line. If using soft tabs, | ||||
|      * indent level rounds up (so, tabSize+1 spaces is 2 levels, | ||||
|      * 2*tabSize+1 is 3, etc.) | ||||
|      * | ||||
|      * @param `text` The line of text. | ||||
|      * @param `tabFmt` A tab information descriptor. | ||||
|      * @return The indent of `text` with consideration for `tabFmt`. | ||||
|      */ | ||||
|     static getIndent(text, tabFmt) { | ||||
|         let leadingSpace = text.length - text.trimLeft().length; | ||||
|         let indent; | ||||
|         if (tabFmt.hard) { | ||||
|             // used tabs | ||||
|             indent = leadingSpace; | ||||
|         } | ||||
|         else { | ||||
|             // use spaces | ||||
|             indent = Math.ceil(leadingSpace / tabFmt.size); | ||||
|         } | ||||
|         return indent; | ||||
|     } | ||||
|     /** | ||||
|      * Calculates leading spaces for a line.  | ||||
|      * This method uses arithmetic to calculate the number of leading spaces | ||||
|      *   | ||||
|      * @param `text` The line of text. | ||||
|      * @return The number of leading spaces of `text`. | ||||
|      */ | ||||
|     static getLeadingSpacesByArithmetic(textLine) { | ||||
|         const leadingSpaces = textLine.text.length - textLine.text.trimStart().length; | ||||
|  | ||||
|     if (n <= 0) { | ||||
|       throw new RangeError('Retract distance must be positive'); | ||||
|         return leadingSpaces; | ||||
|     } | ||||
|     /** | ||||
|      * Calculates leading spaces for a line.  | ||||
|      * This method finds the index position of the first non-whitespace character | ||||
|      * Since the index is built using a 0-index, the position of this character | ||||
|      * will equal the number of spaces preceding the character. | ||||
|      *   | ||||
|      * @param `text` The line of text. | ||||
|      * @return The number of leading spaces of `text` with respect to the index position of the first non-whitespace character. | ||||
|      */ | ||||
|     static getLeadingSpacesByIndex(textLine) { | ||||
|         const indexNum = textLine.firstNonWhitespaceCharacterIndex; | ||||
|  | ||||
|     if (this.pos - n === 0) { | ||||
|       // just restart | ||||
|       this.pos = 0; | ||||
|       return this.next(); | ||||
|         return indexNum; | ||||
|     } | ||||
|  | ||||
|     let c = n + 1; | ||||
|     while (c > 0) { | ||||
|       this.pos--; | ||||
|       while (/^\s*(#.*)?$/.test(this.textLines[this.pos])) { | ||||
|         // Skip empty lines | ||||
|         this.pos--; | ||||
|       } | ||||
|       c--; | ||||
|     /** | ||||
|      * Restart lexer with new text. | ||||
|      * | ||||
|      * @param `text` The new text to lex. | ||||
|      */ | ||||
|     restart(text) { | ||||
|         this.pos = 0; | ||||
|         this._currToken = token_1.EOFTOKEN; // if no input, already on EOFTOKEN | ||||
|         if (text) { | ||||
|             this.textLines = text.split('\n'); | ||||
|             this.next(); // advance to the first token | ||||
|         } | ||||
|     } | ||||
|     /** | ||||
|      * @return the current {@link LineToken}. | ||||
|      */ | ||||
|     currToken() { return this._currToken; } | ||||
|     /** | ||||
|      * Advance the position in the token stream. | ||||
|      * | ||||
|      * @return The new current token, after advancing | ||||
|      */ | ||||
|     next() { | ||||
|         if (this._currToken === token_1.EOFTOKEN && this.pos > this.textLines.length) { | ||||
|             throw new Error('Cannot advance past end'); | ||||
|         } | ||||
|         // Until a LineToken is found, or EOF | ||||
|         while (this.pos < this.textLines.length) { | ||||
|             let line = this.textLines[this.pos]; | ||||
|             let indent = Lexer.getIndent(line, this.tabFmt); | ||||
|             let token; | ||||
|             for (var r of rules) { | ||||
|                 // Does line match pattern? | ||||
|                 let match = line.match(r.pattern); | ||||
|                 if (match) { | ||||
|                     // Yes... | ||||
|                     if (match.groups) { | ||||
|                         token = new _1.LineToken(r.type, this.pos, indent, match.groups["attr"]); | ||||
|                     } | ||||
|                     else { | ||||
|                         token = new _1.LineToken(r.type, this.pos, indent); | ||||
|                     } | ||||
|                     this._currToken = token; | ||||
|                     this.pos++; | ||||
|                     return this.currToken(); | ||||
|                 } | ||||
|             } | ||||
|             // No rules matched | ||||
|             // TODO: move to rules | ||||
|             if (/^\s*(#.*)?$/.test(line)) { | ||||
|                 // "empty" line | ||||
|                 token = new _1.LineToken(token_1.Symbol.EMPTY, this.pos, 999999); | ||||
|             } | ||||
|             else { | ||||
|                 // This is an INDENT token | ||||
|                 token = new _1.LineToken(token_1.Symbol.INDENT, this.pos, indent); | ||||
|             } | ||||
|             this._currToken = token; | ||||
|             this.pos++; | ||||
|             return this.currToken(); | ||||
|         } | ||||
|         // Didn't return, must be EOF | ||||
|         this._currToken = token_1.EOFTOKEN; | ||||
|         this.pos++; | ||||
|         return this.currToken(); | ||||
|     } | ||||
|     /** | ||||
|      * Move backwards in the token stream | ||||
|      * | ||||
|      * @param `n` The number of positions to retract. | ||||
|      * @return The new current token after retracting. | ||||
|      */ | ||||
|     retract(n = 1) { | ||||
|         if (this.pos - 1 - n < 0) { | ||||
|             // -1 because this.pos is currently on the next token | ||||
|             throw new RangeError('Cannot retract past start'); | ||||
|         } | ||||
|         if (n <= 0) { | ||||
|             throw new RangeError('Retract distance must be positive'); | ||||
|         } | ||||
|         if (this.pos - n === 0) { | ||||
|             // just restart | ||||
|             this.pos = 0; | ||||
|             return this.next(); | ||||
|         } | ||||
|         let c = n + 1; | ||||
|         while (c > 0) { | ||||
|             this.pos--; | ||||
|             while (/^\s*(#.*)?$/.test(this.textLines[this.pos])) { | ||||
|                 // Skip empty lines | ||||
|                 this.pos--; | ||||
|             } | ||||
|             c--; | ||||
|         } | ||||
|         return this.next(); | ||||
|     } | ||||
|     return this.next(); | ||||
|   } | ||||
| } | ||||
| exports.default = Lexer; | ||||
| //# sourceMappingURL=lexer.js.map | ||||
|   | ||||
		Reference in New Issue
	
	Block a user