mirror of
				https://github.com/We-Dont-Byte/Mind_Reader.git
				synced 2025-02-04 10:38:42 +00:00 
			
		
		
		
	added 2 functions
added functions getLeadingSpacesByArithmetic and getLeadingSpacesByIndex to facilitate functionality of finding the number of leading spaces via two methods.
This commit is contained in:
		@@ -1,215 +1,223 @@
 | 
			
		||||
import { LineToken } from '.';
 | 
			
		||||
import { Symbol, EOFTOKEN, TabInfo } from './token';
 | 
			
		||||
 | 
			
		||||
type Rule = {
 | 
			
		||||
  pattern: RegExp,
 | 
			
		||||
  type: Symbol,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
"use strict";
 | 
			
		||||
Object.defineProperty(exports, "__esModule", { value: true });
 | 
			
		||||
const _1 = require(".");
 | 
			
		||||
const token_1 = require("./token");
 | 
			
		||||
/**
 | 
			
		||||
 * List of recognition patterns, in order of priority
 | 
			
		||||
 * The first item is a recognition pattern, used to recognize the token
 | 
			
		||||
 * the second item is the token type
 | 
			
		||||
 */
 | 
			
		||||
const rules: Rule[] = [
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/,
 | 
			
		||||
    type: Symbol.FUNCTION
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/,
 | 
			
		||||
    type: Symbol.CLASS
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*if\s+(?<attr>[^:]+):\s*/,
 | 
			
		||||
    type: Symbol.IF
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/,
 | 
			
		||||
    type: Symbol.ELIF
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*else\s*:/,
 | 
			
		||||
    type: Symbol.ELSE
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/,
 | 
			
		||||
    type: Symbol.FOR
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/,
 | 
			
		||||
    type: Symbol.WHILE
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*try\s*:/,
 | 
			
		||||
    type: Symbol.TRY
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/,
 | 
			
		||||
    type: Symbol.EXCEPT
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*finally\s*:\s*$/,
 | 
			
		||||
    type: Symbol.FINALLY
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/,
 | 
			
		||||
    type: Symbol.WITH
 | 
			
		||||
  },
 | 
			
		||||
const rules = [
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/,
 | 
			
		||||
        type: token_1.Symbol.FUNCTION
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/,
 | 
			
		||||
        type: token_1.Symbol.CLASS
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*if\s+(?<attr>[^:]+):\s*/,
 | 
			
		||||
        type: token_1.Symbol.IF
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/,
 | 
			
		||||
        type: token_1.Symbol.ELIF
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*else\s*:/,
 | 
			
		||||
        type: token_1.Symbol.ELSE
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/,
 | 
			
		||||
        type: token_1.Symbol.FOR
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/,
 | 
			
		||||
        type: token_1.Symbol.WHILE
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*try\s*:/,
 | 
			
		||||
        type: token_1.Symbol.TRY
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/,
 | 
			
		||||
        type: token_1.Symbol.EXCEPT
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*finally\s*:\s*$/,
 | 
			
		||||
        type: token_1.Symbol.FINALLY
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/,
 | 
			
		||||
        type: token_1.Symbol.WITH
 | 
			
		||||
    },
 | 
			
		||||
];
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Line-By-Line Lexer
 | 
			
		||||
 */
 | 
			
		||||
export default class Lexer {
 | 
			
		||||
  private textLines: string[] = []; // array of text lines
 | 
			
		||||
  private pos: number = 0;
 | 
			
		||||
  private _currToken: LineToken = EOFTOKEN;
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Calculates indentation level for a line. If using soft tabs,
 | 
			
		||||
   * indent level rounds up (so, tabSize+1 spaces is 2 levels,
 | 
			
		||||
   * 2*tabSize+1 is 3, etc.)
 | 
			
		||||
   *
 | 
			
		||||
   * @param `text` The line of text.
 | 
			
		||||
   * @param `tabFmt` A tab information descriptor.
 | 
			
		||||
   * @return The indent of `text` with consideration for `tabFmt`.
 | 
			
		||||
   */
 | 
			
		||||
  static getIndent(text: string, tabFmt: TabInfo): number {
 | 
			
		||||
    let leadingSpace: number = text.length - text.trimLeft().length;
 | 
			
		||||
    let indent: number;
 | 
			
		||||
    if (tabFmt.hard) {
 | 
			
		||||
      // used tabs
 | 
			
		||||
      indent = leadingSpace;
 | 
			
		||||
    } else {
 | 
			
		||||
      // use spaces
 | 
			
		||||
      indent = Math.ceil(leadingSpace/tabFmt.size!);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return indent;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * @param `text` The text to lex.
 | 
			
		||||
   * @param `tabFmt` A tab information descriptor
 | 
			
		||||
   */
 | 
			
		||||
  constructor(text?: string, private tabFmt?: TabInfo) {
 | 
			
		||||
    // default is 4 wide expanded tabs
 | 
			
		||||
    this.tabFmt = {
 | 
			
		||||
      ...{size: 4, hard: false},
 | 
			
		||||
      ...tabFmt
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if (text) {
 | 
			
		||||
      // normalize linefeeds
 | 
			
		||||
      text = text.replace('\r\n', '\n');
 | 
			
		||||
    }
 | 
			
		||||
    this.restart(text);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Restart lexer with new text.
 | 
			
		||||
   *
 | 
			
		||||
   * @param `text` The new text to lex.
 | 
			
		||||
   */
 | 
			
		||||
  restart(text?: string): void {
 | 
			
		||||
    this.pos = 0;
 | 
			
		||||
    this._currToken = EOFTOKEN; // if no input, already on EOFTOKEN
 | 
			
		||||
    if (text) {
 | 
			
		||||
      this.textLines = text.split('\n');
 | 
			
		||||
      this.next(); // advance to the first token
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * @return the current {@link LineToken}.
 | 
			
		||||
   */
 | 
			
		||||
  currToken(): LineToken { return this._currToken; }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Advance the position in the token stream.
 | 
			
		||||
   *
 | 
			
		||||
   * @return The new current token, after advancing
 | 
			
		||||
   */
 | 
			
		||||
  next(): LineToken {
 | 
			
		||||
    if (this._currToken === EOFTOKEN && this.pos > this.textLines.length) {
 | 
			
		||||
      throw new Error('Cannot advance past end');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Until a LineToken is found, or EOF
 | 
			
		||||
    while (this.pos < this.textLines.length) {
 | 
			
		||||
      let line: string = this.textLines[this.pos];
 | 
			
		||||
      let indent: number = Lexer.getIndent(line, this.tabFmt!);
 | 
			
		||||
      let token: LineToken;
 | 
			
		||||
      for (var r of rules) {
 | 
			
		||||
        // Does line match pattern?
 | 
			
		||||
        let match: RegExpMatchArray | null = line.match(r.pattern);
 | 
			
		||||
        if (match) {
 | 
			
		||||
          // Yes...
 | 
			
		||||
          if (match.groups) {
 | 
			
		||||
            token = new LineToken(r.type, this.pos, indent, match.groups["attr"]);
 | 
			
		||||
          } else {
 | 
			
		||||
            token = new LineToken(r.type, this.pos, indent);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          this._currToken = token;
 | 
			
		||||
          this.pos++;
 | 
			
		||||
          return this.currToken();
 | 
			
		||||
class Lexer {
 | 
			
		||||
    /**
 | 
			
		||||
     * @param `text` The text to lex.
 | 
			
		||||
     * @param `tabFmt` A tab information descriptor
 | 
			
		||||
     */
 | 
			
		||||
    constructor(text, tabFmt) {
 | 
			
		||||
        this.tabFmt = tabFmt;
 | 
			
		||||
        this.textLines = []; // array of text lines
 | 
			
		||||
        this.pos = 0;
 | 
			
		||||
        this._currToken = token_1.EOFTOKEN;
 | 
			
		||||
        // default is 4 wide expanded tabs
 | 
			
		||||
        this.tabFmt = Object.assign({ size: 4, hard: false }, tabFmt);
 | 
			
		||||
        if (text) {
 | 
			
		||||
            // normalize linefeeds
 | 
			
		||||
            text = text.replace('\r\n', '\n');
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      // No rules matched
 | 
			
		||||
 | 
			
		||||
      // TODO: move to rules
 | 
			
		||||
      if (/^\s*(#.*)?$/.test(line)) {
 | 
			
		||||
        // "empty" line
 | 
			
		||||
        token = new LineToken(Symbol.EMPTY, this.pos, 999999);
 | 
			
		||||
      } else {
 | 
			
		||||
        // This is an INDENT token
 | 
			
		||||
        token = new LineToken(Symbol.INDENT, this.pos, indent);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      this._currToken = token;
 | 
			
		||||
      this.pos++;
 | 
			
		||||
      return this.currToken();
 | 
			
		||||
        this.restart(text);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Didn't return, must be EOF
 | 
			
		||||
    this._currToken = EOFTOKEN;
 | 
			
		||||
    this.pos++;
 | 
			
		||||
    return this.currToken();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Move backwards in the token stream
 | 
			
		||||
   *
 | 
			
		||||
   * @param `n` The number of positions to retract.
 | 
			
		||||
   * @return The new current token after retracting.
 | 
			
		||||
   */
 | 
			
		||||
  retract(n: number = 1): LineToken {
 | 
			
		||||
    if (this.pos - 1 - n < 0) {
 | 
			
		||||
      // -1 because this.pos is currently on the next token
 | 
			
		||||
      throw new RangeError('Cannot retract past start');
 | 
			
		||||
    /**
 | 
			
		||||
     * Calculates indentation level for a line. If using soft tabs,
 | 
			
		||||
     * indent level rounds up (so, tabSize+1 spaces is 2 levels,
 | 
			
		||||
     * 2*tabSize+1 is 3, etc.)
 | 
			
		||||
     *
 | 
			
		||||
     * @param `text` The line of text.
 | 
			
		||||
     * @param `tabFmt` A tab information descriptor.
 | 
			
		||||
     * @return The indent of `text` with consideration for `tabFmt`.
 | 
			
		||||
     */
 | 
			
		||||
    static getIndent(text, tabFmt) {
 | 
			
		||||
        let leadingSpace = text.length - text.trimLeft().length;
 | 
			
		||||
        let indent;
 | 
			
		||||
        if (tabFmt.hard) {
 | 
			
		||||
            // used tabs
 | 
			
		||||
            indent = leadingSpace;
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            // use spaces
 | 
			
		||||
            indent = Math.ceil(leadingSpace / tabFmt.size);
 | 
			
		||||
        }
 | 
			
		||||
        return indent;
 | 
			
		||||
    }
 | 
			
		||||
    /**
 | 
			
		||||
     * Calculates leading spaces for a line. 
 | 
			
		||||
     * This method uses arithmetic to calculate the number of leading spaces
 | 
			
		||||
     *  
 | 
			
		||||
     * @param `text` The line of text.
 | 
			
		||||
     * @return The number of leading spaces of `text`.
 | 
			
		||||
     */
 | 
			
		||||
    static getLeadingSpacesByArithmetic(textLine) {
 | 
			
		||||
        const leadingSpaces = textLine.text.length - textLine.text.trimStart().length;
 | 
			
		||||
 | 
			
		||||
    if (n <= 0) {
 | 
			
		||||
      throw new RangeError('Retract distance must be positive');
 | 
			
		||||
        return leadingSpaces;
 | 
			
		||||
    }
 | 
			
		||||
    /**
 | 
			
		||||
     * Calculates leading spaces for a line. 
 | 
			
		||||
     * This method finds the index position of the first non-whitespace character
 | 
			
		||||
     * Since the index is built using a 0-index, the position of this character
 | 
			
		||||
     * will equal the number of spaces preceding the character.
 | 
			
		||||
     *  
 | 
			
		||||
     * @param `text` The line of text.
 | 
			
		||||
     * @return The number of leading spaces of `text` with respect to the index position of the first non-whitespace character.
 | 
			
		||||
     */
 | 
			
		||||
    static getLeadingSpacesByIndex(textLine) {
 | 
			
		||||
        const indexNum = textLine.firstNonWhitespaceCharacterIndex;
 | 
			
		||||
 | 
			
		||||
    if (this.pos - n === 0) {
 | 
			
		||||
      // just restart
 | 
			
		||||
      this.pos = 0;
 | 
			
		||||
      return this.next();
 | 
			
		||||
        return indexNum;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let c = n + 1;
 | 
			
		||||
    while (c > 0) {
 | 
			
		||||
      this.pos--;
 | 
			
		||||
      while (/^\s*(#.*)?$/.test(this.textLines[this.pos])) {
 | 
			
		||||
        // Skip empty lines
 | 
			
		||||
        this.pos--;
 | 
			
		||||
      }
 | 
			
		||||
      c--;
 | 
			
		||||
    /**
 | 
			
		||||
     * Restart lexer with new text.
 | 
			
		||||
     *
 | 
			
		||||
     * @param `text` The new text to lex.
 | 
			
		||||
     */
 | 
			
		||||
    restart(text) {
 | 
			
		||||
        this.pos = 0;
 | 
			
		||||
        this._currToken = token_1.EOFTOKEN; // if no input, already on EOFTOKEN
 | 
			
		||||
        if (text) {
 | 
			
		||||
            this.textLines = text.split('\n');
 | 
			
		||||
            this.next(); // advance to the first token
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    /**
 | 
			
		||||
     * @return the current {@link LineToken}.
 | 
			
		||||
     */
 | 
			
		||||
    currToken() { return this._currToken; }
 | 
			
		||||
    /**
 | 
			
		||||
     * Advance the position in the token stream.
 | 
			
		||||
     *
 | 
			
		||||
     * @return The new current token, after advancing
 | 
			
		||||
     */
 | 
			
		||||
    next() {
 | 
			
		||||
        if (this._currToken === token_1.EOFTOKEN && this.pos > this.textLines.length) {
 | 
			
		||||
            throw new Error('Cannot advance past end');
 | 
			
		||||
        }
 | 
			
		||||
        // Until a LineToken is found, or EOF
 | 
			
		||||
        while (this.pos < this.textLines.length) {
 | 
			
		||||
            let line = this.textLines[this.pos];
 | 
			
		||||
            let indent = Lexer.getIndent(line, this.tabFmt);
 | 
			
		||||
            let token;
 | 
			
		||||
            for (var r of rules) {
 | 
			
		||||
                // Does line match pattern?
 | 
			
		||||
                let match = line.match(r.pattern);
 | 
			
		||||
                if (match) {
 | 
			
		||||
                    // Yes...
 | 
			
		||||
                    if (match.groups) {
 | 
			
		||||
                        token = new _1.LineToken(r.type, this.pos, indent, match.groups["attr"]);
 | 
			
		||||
                    }
 | 
			
		||||
                    else {
 | 
			
		||||
                        token = new _1.LineToken(r.type, this.pos, indent);
 | 
			
		||||
                    }
 | 
			
		||||
                    this._currToken = token;
 | 
			
		||||
                    this.pos++;
 | 
			
		||||
                    return this.currToken();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            // No rules matched
 | 
			
		||||
            // TODO: move to rules
 | 
			
		||||
            if (/^\s*(#.*)?$/.test(line)) {
 | 
			
		||||
                // "empty" line
 | 
			
		||||
                token = new _1.LineToken(token_1.Symbol.EMPTY, this.pos, 999999);
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                // This is an INDENT token
 | 
			
		||||
                token = new _1.LineToken(token_1.Symbol.INDENT, this.pos, indent);
 | 
			
		||||
            }
 | 
			
		||||
            this._currToken = token;
 | 
			
		||||
            this.pos++;
 | 
			
		||||
            return this.currToken();
 | 
			
		||||
        }
 | 
			
		||||
        // Didn't return, must be EOF
 | 
			
		||||
        this._currToken = token_1.EOFTOKEN;
 | 
			
		||||
        this.pos++;
 | 
			
		||||
        return this.currToken();
 | 
			
		||||
    }
 | 
			
		||||
    /**
 | 
			
		||||
     * Move backwards in the token stream
 | 
			
		||||
     *
 | 
			
		||||
     * @param `n` The number of positions to retract.
 | 
			
		||||
     * @return The new current token after retracting.
 | 
			
		||||
     */
 | 
			
		||||
    retract(n = 1) {
 | 
			
		||||
        if (this.pos - 1 - n < 0) {
 | 
			
		||||
            // -1 because this.pos is currently on the next token
 | 
			
		||||
            throw new RangeError('Cannot retract past start');
 | 
			
		||||
        }
 | 
			
		||||
        if (n <= 0) {
 | 
			
		||||
            throw new RangeError('Retract distance must be positive');
 | 
			
		||||
        }
 | 
			
		||||
        if (this.pos - n === 0) {
 | 
			
		||||
            // just restart
 | 
			
		||||
            this.pos = 0;
 | 
			
		||||
            return this.next();
 | 
			
		||||
        }
 | 
			
		||||
        let c = n + 1;
 | 
			
		||||
        while (c > 0) {
 | 
			
		||||
            this.pos--;
 | 
			
		||||
            while (/^\s*(#.*)?$/.test(this.textLines[this.pos])) {
 | 
			
		||||
                // Skip empty lines
 | 
			
		||||
                this.pos--;
 | 
			
		||||
            }
 | 
			
		||||
            c--;
 | 
			
		||||
        }
 | 
			
		||||
        return this.next();
 | 
			
		||||
    }
 | 
			
		||||
    return this.next();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
exports.default = Lexer;
 | 
			
		||||
//# sourceMappingURL=lexer.js.map
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user