mirror of
				https://github.com/We-Dont-Byte/Mind_Reader.git
				synced 2025-02-04 10:38:42 +00:00 
			
		
		
		
	added 2 functions
added functions getLeadingSpacesByArithmetic and getLeadingSpacesByIndex to facilitate functionality of finding the number of leading spaces via two methods.
This commit is contained in:
		| @@ -1,215 +1,223 @@ | |||||||
| import { LineToken } from '.'; | "use strict"; | ||||||
| import { Symbol, EOFTOKEN, TabInfo } from './token'; | Object.defineProperty(exports, "__esModule", { value: true }); | ||||||
|  | const _1 = require("."); | ||||||
| type Rule = { | const token_1 = require("./token"); | ||||||
|   pattern: RegExp, |  | ||||||
|   type: Symbol, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * List of recognition patterns, in order of priority |  * List of recognition patterns, in order of priority | ||||||
|  * The first item is a recognition pattern, used to recognize the token |  * The first item is a recognition pattern, used to recognize the token | ||||||
|  * the second item is the token type |  * the second item is the token type | ||||||
|  */ |  */ | ||||||
| const rules: Rule[] = [ | const rules = [ | ||||||
|   { |     { | ||||||
|     pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/, |         pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/, | ||||||
|     type: Symbol.FUNCTION |         type: token_1.Symbol.FUNCTION | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/, |         pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/, | ||||||
|     type: Symbol.CLASS |         type: token_1.Symbol.CLASS | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*if\s+(?<attr>[^:]+):\s*/, |         pattern: /^\s*if\s+(?<attr>[^:]+):\s*/, | ||||||
|     type: Symbol.IF |         type: token_1.Symbol.IF | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/, |         pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/, | ||||||
|     type: Symbol.ELIF |         type: token_1.Symbol.ELIF | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*else\s*:/, |         pattern: /^\s*else\s*:/, | ||||||
|     type: Symbol.ELSE |         type: token_1.Symbol.ELSE | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/, |         pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/, | ||||||
|     type: Symbol.FOR |         type: token_1.Symbol.FOR | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/, |         pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/, | ||||||
|     type: Symbol.WHILE |         type: token_1.Symbol.WHILE | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*try\s*:/, |         pattern: /^\s*try\s*:/, | ||||||
|     type: Symbol.TRY |         type: token_1.Symbol.TRY | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/, |         pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/, | ||||||
|     type: Symbol.EXCEPT |         type: token_1.Symbol.EXCEPT | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*finally\s*:\s*$/, |         pattern: /^\s*finally\s*:\s*$/, | ||||||
|     type: Symbol.FINALLY |         type: token_1.Symbol.FINALLY | ||||||
|   }, |     }, | ||||||
|   { |     { | ||||||
|     pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/, |         pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/, | ||||||
|     type: Symbol.WITH |         type: token_1.Symbol.WITH | ||||||
|   }, |     }, | ||||||
| ]; | ]; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Line-By-Line Lexer |  * Line-By-Line Lexer | ||||||
|  */ |  */ | ||||||
| export default class Lexer { | class Lexer { | ||||||
|   private textLines: string[] = []; // array of text lines |     /** | ||||||
|   private pos: number = 0; |      * @param `text` The text to lex. | ||||||
|   private _currToken: LineToken = EOFTOKEN; |      * @param `tabFmt` A tab information descriptor | ||||||
|  |      */ | ||||||
|   /** |     constructor(text, tabFmt) { | ||||||
|    * Calculates indentation level for a line. If using soft tabs, |         this.tabFmt = tabFmt; | ||||||
|    * indent level rounds up (so, tabSize+1 spaces is 2 levels, |         this.textLines = []; // array of text lines | ||||||
|    * 2*tabSize+1 is 3, etc.) |         this.pos = 0; | ||||||
|    * |         this._currToken = token_1.EOFTOKEN; | ||||||
|    * @param `text` The line of text. |         // default is 4 wide expanded tabs | ||||||
|    * @param `tabFmt` A tab information descriptor. |         this.tabFmt = Object.assign({ size: 4, hard: false }, tabFmt); | ||||||
|    * @return The indent of `text` with consideration for `tabFmt`. |         if (text) { | ||||||
|    */ |             // normalize linefeeds | ||||||
|   static getIndent(text: string, tabFmt: TabInfo): number { |             text = text.replace('\r\n', '\n'); | ||||||
|     let leadingSpace: number = text.length - text.trimLeft().length; |  | ||||||
|     let indent: number; |  | ||||||
|     if (tabFmt.hard) { |  | ||||||
|       // used tabs |  | ||||||
|       indent = leadingSpace; |  | ||||||
|     } else { |  | ||||||
|       // use spaces |  | ||||||
|       indent = Math.ceil(leadingSpace/tabFmt.size!); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return indent; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * @param `text` The text to lex. |  | ||||||
|    * @param `tabFmt` A tab information descriptor |  | ||||||
|    */ |  | ||||||
|   constructor(text?: string, private tabFmt?: TabInfo) { |  | ||||||
|     // default is 4 wide expanded tabs |  | ||||||
|     this.tabFmt = { |  | ||||||
|       ...{size: 4, hard: false}, |  | ||||||
|       ...tabFmt |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     if (text) { |  | ||||||
|       // normalize linefeeds |  | ||||||
|       text = text.replace('\r\n', '\n'); |  | ||||||
|     } |  | ||||||
|     this.restart(text); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * Restart lexer with new text. |  | ||||||
|    * |  | ||||||
|    * @param `text` The new text to lex. |  | ||||||
|    */ |  | ||||||
|   restart(text?: string): void { |  | ||||||
|     this.pos = 0; |  | ||||||
|     this._currToken = EOFTOKEN; // if no input, already on EOFTOKEN |  | ||||||
|     if (text) { |  | ||||||
|       this.textLines = text.split('\n'); |  | ||||||
|       this.next(); // advance to the first token |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * @return the current {@link LineToken}. |  | ||||||
|    */ |  | ||||||
|   currToken(): LineToken { return this._currToken; } |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * Advance the position in the token stream. |  | ||||||
|    * |  | ||||||
|    * @return The new current token, after advancing |  | ||||||
|    */ |  | ||||||
|   next(): LineToken { |  | ||||||
|     if (this._currToken === EOFTOKEN && this.pos > this.textLines.length) { |  | ||||||
|       throw new Error('Cannot advance past end'); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Until a LineToken is found, or EOF |  | ||||||
|     while (this.pos < this.textLines.length) { |  | ||||||
|       let line: string = this.textLines[this.pos]; |  | ||||||
|       let indent: number = Lexer.getIndent(line, this.tabFmt!); |  | ||||||
|       let token: LineToken; |  | ||||||
|       for (var r of rules) { |  | ||||||
|         // Does line match pattern? |  | ||||||
|         let match: RegExpMatchArray | null = line.match(r.pattern); |  | ||||||
|         if (match) { |  | ||||||
|           // Yes... |  | ||||||
|           if (match.groups) { |  | ||||||
|             token = new LineToken(r.type, this.pos, indent, match.groups["attr"]); |  | ||||||
|           } else { |  | ||||||
|             token = new LineToken(r.type, this.pos, indent); |  | ||||||
|           } |  | ||||||
|  |  | ||||||
|           this._currToken = token; |  | ||||||
|           this.pos++; |  | ||||||
|           return this.currToken(); |  | ||||||
|         } |         } | ||||||
|       } |         this.restart(text); | ||||||
|       // No rules matched |  | ||||||
|  |  | ||||||
|       // TODO: move to rules |  | ||||||
|       if (/^\s*(#.*)?$/.test(line)) { |  | ||||||
|         // "empty" line |  | ||||||
|         token = new LineToken(Symbol.EMPTY, this.pos, 999999); |  | ||||||
|       } else { |  | ||||||
|         // This is an INDENT token |  | ||||||
|         token = new LineToken(Symbol.INDENT, this.pos, indent); |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       this._currToken = token; |  | ||||||
|       this.pos++; |  | ||||||
|       return this.currToken(); |  | ||||||
|     } |     } | ||||||
|  |     /** | ||||||
|     // Didn't return, must be EOF |      * Calculates indentation level for a line. If using soft tabs, | ||||||
|     this._currToken = EOFTOKEN; |      * indent level rounds up (so, tabSize+1 spaces is 2 levels, | ||||||
|     this.pos++; |      * 2*tabSize+1 is 3, etc.) | ||||||
|     return this.currToken(); |      * | ||||||
|   } |      * @param `text` The line of text. | ||||||
|  |      * @param `tabFmt` A tab information descriptor. | ||||||
|   /** |      * @return The indent of `text` with consideration for `tabFmt`. | ||||||
|    * Move backwards in the token stream |      */ | ||||||
|    * |     static getIndent(text, tabFmt) { | ||||||
|    * @param `n` The number of positions to retract. |         let leadingSpace = text.length - text.trimLeft().length; | ||||||
|    * @return The new current token after retracting. |         let indent; | ||||||
|    */ |         if (tabFmt.hard) { | ||||||
|   retract(n: number = 1): LineToken { |             // used tabs | ||||||
|     if (this.pos - 1 - n < 0) { |             indent = leadingSpace; | ||||||
|       // -1 because this.pos is currently on the next token |         } | ||||||
|       throw new RangeError('Cannot retract past start'); |         else { | ||||||
|  |             // use spaces | ||||||
|  |             indent = Math.ceil(leadingSpace / tabFmt.size); | ||||||
|  |         } | ||||||
|  |         return indent; | ||||||
|     } |     } | ||||||
|  |     /** | ||||||
|  |      * Calculates leading spaces for a line.  | ||||||
|  |      * This method uses arithmetic to calculate the number of leading spaces | ||||||
|  |      *   | ||||||
|  |      * @param `text` The line of text. | ||||||
|  |      * @return The number of leading spaces of `text`. | ||||||
|  |      */ | ||||||
|  |     static getLeadingSpacesByArithmetic(textLine) { | ||||||
|  |         const leadingSpaces = textLine.text.length - textLine.text.trimStart().length; | ||||||
|  |  | ||||||
|     if (n <= 0) { |         return leadingSpaces; | ||||||
|       throw new RangeError('Retract distance must be positive'); |  | ||||||
|     } |     } | ||||||
|  |     /** | ||||||
|  |      * Calculates leading spaces for a line.  | ||||||
|  |      * This method finds the index position of the first non-whitespace character | ||||||
|  |      * Since the index is built using a 0-index, the position of this character | ||||||
|  |      * will equal the number of spaces preceding the character. | ||||||
|  |      *   | ||||||
|  |      * @param `text` The line of text. | ||||||
|  |      * @return The number of leading spaces of `text` with respect to the index position of the first non-whitespace character. | ||||||
|  |      */ | ||||||
|  |     static getLeadingSpacesByIndex(textLine) { | ||||||
|  |         const indexNum = textLine.firstNonWhitespaceCharacterIndex; | ||||||
|  |  | ||||||
|     if (this.pos - n === 0) { |         return indexNum; | ||||||
|       // just restart |  | ||||||
|       this.pos = 0; |  | ||||||
|       return this.next(); |  | ||||||
|     } |     } | ||||||
|  |     /** | ||||||
|     let c = n + 1; |      * Restart lexer with new text. | ||||||
|     while (c > 0) { |      * | ||||||
|       this.pos--; |      * @param `text` The new text to lex. | ||||||
|       while (/^\s*(#.*)?$/.test(this.textLines[this.pos])) { |      */ | ||||||
|         // Skip empty lines |     restart(text) { | ||||||
|         this.pos--; |         this.pos = 0; | ||||||
|       } |         this._currToken = token_1.EOFTOKEN; // if no input, already on EOFTOKEN | ||||||
|       c--; |         if (text) { | ||||||
|  |             this.textLines = text.split('\n'); | ||||||
|  |             this.next(); // advance to the first token | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     /** | ||||||
|  |      * @return the current {@link LineToken}. | ||||||
|  |      */ | ||||||
|  |     currToken() { return this._currToken; } | ||||||
|  |     /** | ||||||
|  |      * Advance the position in the token stream. | ||||||
|  |      * | ||||||
|  |      * @return The new current token, after advancing | ||||||
|  |      */ | ||||||
|  |     next() { | ||||||
|  |         if (this._currToken === token_1.EOFTOKEN && this.pos > this.textLines.length) { | ||||||
|  |             throw new Error('Cannot advance past end'); | ||||||
|  |         } | ||||||
|  |         // Until a LineToken is found, or EOF | ||||||
|  |         while (this.pos < this.textLines.length) { | ||||||
|  |             let line = this.textLines[this.pos]; | ||||||
|  |             let indent = Lexer.getIndent(line, this.tabFmt); | ||||||
|  |             let token; | ||||||
|  |             for (var r of rules) { | ||||||
|  |                 // Does line match pattern? | ||||||
|  |                 let match = line.match(r.pattern); | ||||||
|  |                 if (match) { | ||||||
|  |                     // Yes... | ||||||
|  |                     if (match.groups) { | ||||||
|  |                         token = new _1.LineToken(r.type, this.pos, indent, match.groups["attr"]); | ||||||
|  |                     } | ||||||
|  |                     else { | ||||||
|  |                         token = new _1.LineToken(r.type, this.pos, indent); | ||||||
|  |                     } | ||||||
|  |                     this._currToken = token; | ||||||
|  |                     this.pos++; | ||||||
|  |                     return this.currToken(); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             // No rules matched | ||||||
|  |             // TODO: move to rules | ||||||
|  |             if (/^\s*(#.*)?$/.test(line)) { | ||||||
|  |                 // "empty" line | ||||||
|  |                 token = new _1.LineToken(token_1.Symbol.EMPTY, this.pos, 999999); | ||||||
|  |             } | ||||||
|  |             else { | ||||||
|  |                 // This is an INDENT token | ||||||
|  |                 token = new _1.LineToken(token_1.Symbol.INDENT, this.pos, indent); | ||||||
|  |             } | ||||||
|  |             this._currToken = token; | ||||||
|  |             this.pos++; | ||||||
|  |             return this.currToken(); | ||||||
|  |         } | ||||||
|  |         // Didn't return, must be EOF | ||||||
|  |         this._currToken = token_1.EOFTOKEN; | ||||||
|  |         this.pos++; | ||||||
|  |         return this.currToken(); | ||||||
|  |     } | ||||||
|  |     /** | ||||||
|  |      * Move backwards in the token stream | ||||||
|  |      * | ||||||
|  |      * @param `n` The number of positions to retract. | ||||||
|  |      * @return The new current token after retracting. | ||||||
|  |      */ | ||||||
|  |     retract(n = 1) { | ||||||
|  |         if (this.pos - 1 - n < 0) { | ||||||
|  |             // -1 because this.pos is currently on the next token | ||||||
|  |             throw new RangeError('Cannot retract past start'); | ||||||
|  |         } | ||||||
|  |         if (n <= 0) { | ||||||
|  |             throw new RangeError('Retract distance must be positive'); | ||||||
|  |         } | ||||||
|  |         if (this.pos - n === 0) { | ||||||
|  |             // just restart | ||||||
|  |             this.pos = 0; | ||||||
|  |             return this.next(); | ||||||
|  |         } | ||||||
|  |         let c = n + 1; | ||||||
|  |         while (c > 0) { | ||||||
|  |             this.pos--; | ||||||
|  |             while (/^\s*(#.*)?$/.test(this.textLines[this.pos])) { | ||||||
|  |                 // Skip empty lines | ||||||
|  |                 this.pos--; | ||||||
|  |             } | ||||||
|  |             c--; | ||||||
|  |         } | ||||||
|  |         return this.next(); | ||||||
|     } |     } | ||||||
|     return this.next(); |  | ||||||
|   } |  | ||||||
| } | } | ||||||
|  | exports.default = Lexer; | ||||||
|  | //# sourceMappingURL=lexer.js.map | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user