lexter.ts - added types

This commit is contained in:
tel0065 2022-03-25 10:44:37 -05:00 committed by GitHub
parent c087f1666a
commit a619146afc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,216 +1,195 @@
"use strict"; import { LineToken } from '.';
Object.defineProperty(exports, "__esModule", { import { Symbol, EOFTOKEN, TabInfo } from './token';
value: true
}); type Rule = {
const _1 = require("."); pattern: RegExp,
const token_1 = require("./token"); type: Symbol,
};
/** /**
* List of recognition patterns, in order of priority * List of recognition patterns, in order of priority
* The first item is a recognition pattern, used to recognize the token * The first item is a recognition pattern, used to recognize the token
* the second item is the token type * the second item is the token type
*/ */
const rules = [ const rules: Rule[] = [
{ {
pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/, pattern: /^\s*def\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)\(/,
type: token_1.Symbol.FUNCTION type: Symbol.FUNCTION
}, },
{ {
pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/, pattern: /^\s*class\s+(?<attr>[a-zA-Z_][a-zA-Z0-9_]*)/,
type: token_1.Symbol.CLASS type: Symbol.CLASS
}, },
{ {
pattern: /^\s*if\s+(?<attr>[^:]+):\s*/, pattern: /^\s*if\s+(?<attr>[^:]+):\s*/,
type: token_1.Symbol.IF type: Symbol.IF
}, },
{ {
pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/, pattern: /^\s*elif\s+(?<attr>[^:]+):\s*$/,
type: token_1.Symbol.ELIF type: Symbol.ELIF
}, },
{ {
pattern: /^\s*else\s*:/, pattern: /^\s*else\s*:/,
type: token_1.Symbol.ELSE type: Symbol.ELSE
}, },
{ {
pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/, pattern: /^\s*for\s+(?<attr>[^:]+):\s*$/,
type: token_1.Symbol.FOR type: Symbol.FOR
}, },
{ {
pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/, pattern: /^\s*while\s+(?<attr>[^:]+):\s*$/,
type: token_1.Symbol.WHILE type: Symbol.WHILE
}, },
{ {
pattern: /^\s*try\s*:/, pattern: /^\s*try\s*:/,
type: token_1.Symbol.TRY type: Symbol.TRY
}, },
{ {
pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/, pattern: /^\s*except(\s*(?<attr>[^:]+))?:\s*$/,
type: token_1.Symbol.EXCEPT type: Symbol.EXCEPT
}, },
{ {
pattern: /^\s*finally\s*:\s*$/, pattern: /^\s*finally\s*:\s*$/,
type: token_1.Symbol.FINALLY type: Symbol.FINALLY
}, },
{ {
pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/, pattern: /^\s*with\s+(?<attr>[^:]+):\s*$/,
type: token_1.Symbol.WITH type: Symbol.WITH
}, },
]; ];
/** /**
* Line-By-Line Lexer * Line-By-Line Lexer
*/ */
class Lexer { export default class Lexer {
private textLines: string[] = []; // array of text lines
private pos: number = 0;
private _currToken: LineToken = EOFTOKEN;
/** /**
* @param `text` The text to lex. * @param `text` The text to lex.
* @param `tabFmt` A tab information descriptor * @param `tabFmt` A tab information descriptor
*/ */
constructor(text, tabFmt) { constructor(text ? : string, private tabFmt ? : TabInfo) {
this.tabFmt = tabFmt;
this.textLines = []; // array of text lines
this.pos = 0;
this._currToken = token_1.EOFTOKEN;
// default is 4 wide expanded tabs // default is 4 wide expanded tabs
this.tabFmt = Object.assign({ this.tabFmt = {
...{
size: 4, size: 4,
hard: false hard: false
}, tabFmt); },
...tabFmt
};
if (text) { if (text) {
// normalize linefeeds // normalize linefeeds
text = text.replace('\r\n', '\n'); text = text.replace('\r\n', '\n');
} }
this.restart(text); this.restart(text);
} }
/**
* Calculates indentation level for a line. If using soft tabs,
* indent level rounds up (so, tabSize+1 spaces is 2 levels,
* 2*tabSize+1 is 3, etc.)
*
* @param `text` The line of text.
* @param `tabFmt` A tab information descriptor.
* @return The indent of `text` with consideration for `tabFmt`.
*/
static getIndent(text, tabFmt) {
let leadingSpace = text.length - text.trimLeft().length;
let indent;
if (tabFmt.hard) {
// used tabs
indent = leadingSpace;
} else {
// use spaces
indent = Math.ceil(leadingSpace / tabFmt.size);
}
return indent;
}
/**
* Calculates leading spaces for a line.
* This method uses arithmetic to calculate the number of leading spaces
*
* @param `text` The line of text.
* @return The number of leading spaces of `text`.
*/
static getLeadingSpacesByArithmetic(textLine) {
const leadingSpaces = textLine.text.length - textLine.text.trimStart().length;
return leadingSpaces;
}
/**
* Calculates leading spaces for a line.
* This method finds the index position of the first non-whitespace character
* Since the index is built using a 0-index, the position of this character
* will equal the number of spaces preceding the character.
*
* @param `text` The line of text.
* @return The number of leading spaces of `text` with respect to the index position of the first non-whitespace character.
*/
static getLeadingSpacesByIndex(textLine) {
const indexNum = textLine.firstNonWhitespaceCharacterIndex;
return indexNum;
}
/** /**
* Restart lexer with new text. * Restart lexer with new text.
* *
* @param `text` The new text to lex. * @param `text` The new text to lex.
*/ */
restart(text) { restart(text ? : string): void {
this.pos = 0; this.pos = 0;
this._currToken = token_1.EOFTOKEN; // if no input, already on EOFTOKEN this._currToken = EOFTOKEN; // if no input, already on EOFTOKEN
if (text) { if (text) {
this.textLines = text.split('\n'); this.textLines = text.split('\n');
this.next(); // advance to the first token this.next(); // advance to the first token
} }
} }
/** /**
* @return the current {@link LineToken}. * @return the current {@link LineToken}.
*/ */
currToken() { currToken(): LineToken {
return this._currToken; return this._currToken;
} }
/** /**
* Advance the position in the token stream. * Advance the position in the token stream.
* *
* @return The new current token, after advancing * @return The new current token, after advancing
*/ */
next() { next(): LineToken {
if (this._currToken === token_1.EOFTOKEN && this.pos > this.textLines.length) { if (this._currToken === EOFTOKEN && this.pos > this.textLines.length) {
throw new Error('Cannot advance past end'); throw new Error('Cannot advance past end');
} }
// Until a LineToken is found, or EOF // Until a LineToken is found, or EOF
while (this.pos < this.textLines.length) { while (this.pos < this.textLines.length) {
let line = this.textLines[this.pos]; let line: string = this.textLines[this.pos];
let indent = Lexer.getIndent(line, this.tabFmt); let indent: number = Lexer.getIndent(line, this.tabFmt!);
let token; let token: LineToken;
for (var r of rules) { for (var r of rules) {
// Does line match pattern? // Does line match pattern?
let match = line.match(r.pattern); let match: RegExpMatchArray | null = line.match(r.pattern);
if (match) { if (match) {
// Yes... // Yes...
if (match.groups) { if (match.groups) {
token = new _1.LineToken(r.type, this.pos, indent, match.groups["attr"]); token = new LineToken(r.type, this.pos, indent, match.groups["attr"]);
} else {
token = new _1.LineToken(r.type, this.pos, indent);
} }
else {
token = new LineToken(r.type, this.pos, indent);
}
this._currToken = token; this._currToken = token;
this.pos++; this.pos++;
return this.currToken(); return this.currToken();
} }
} }
// No rules matched // No rules matched
// TODO: move to rules // TODO: move to rules
if (/^\s*(#.*)?$/.test(line)) { if (/^\s*(#.*)?$/.test(line)) {
// "empty" line // "empty" line
token = new _1.LineToken(token_1.Symbol.EMPTY, this.pos, 999999); token = new LineToken(Symbol.EMPTY, this.pos, 999999);
} else {
// This is an INDENT token
token = new _1.LineToken(token_1.Symbol.INDENT, this.pos, indent);
} }
else {
// This is an INDENT token
token = new LineToken(Symbol.INDENT, this.pos, indent);
}
this._currToken = token; this._currToken = token;
this.pos++; this.pos++;
return this.currToken(); return this.currToken();
} }
// Didn't return, must be EOF // Didn't return, must be EOF
this._currToken = token_1.EOFTOKEN; this._currToken = EOFTOKEN;
this.pos++; this.pos++;
return this.currToken(); return this.currToken();
} }
/** /**
* Move backwards in the token stream * Move backwards in the token stream
* *
* @param `n` The number of positions to retract. * @param `n` The number of positions to retract.
* @return The new current token after retracting. * @return The new current token after retracting.
*/ */
retract(n = 1) { retract(n: number = 1): LineToken {
if (this.pos - 1 - n < 0) { if (this.pos - 1 - n < 0) {
// -1 because this.pos is currently on the next token // -1 because this.pos is currently on the next token
throw new RangeError('Cannot retract past start'); throw new RangeError('Cannot retract past start');
} }
if (n <= 0) { if (n <= 0) {
throw new RangeError('Retract distance must be positive'); throw new RangeError('Retract distance must be positive');
} }
if (this.pos - n === 0) { if (this.pos - n === 0) {
// just restart // just restart
this.pos = 0; this.pos = 0;
return this.next(); return this.next();
} }
let c = n + 1; let c = n + 1;
while (c > 0) { while (c > 0) {
this.pos--; this.pos--;
@ -220,8 +199,60 @@ class Lexer {
} }
c--; c--;
} }
return this.next(); return this.next();
} }
/**
* Calculates indentation level for a line. If using soft tabs,
* indent level rounds up (so, tabSize+1 spaces is 2 levels,
* 2*tabSize+1 is 3, etc.)
*
* @param `text` The line of text.
* @param `tabFmt` A tab information descriptor.
* @return The indent of `text` with consideration for `tabFmt`.
*/
static getIndent(text: string, tabFmt: TabInfo): number {
let leadingSpace: number = text.length - text.trimStart().length;
let indent: number;
if (tabFmt.hard) {
// used tabs
indent = leadingSpace;
}
else {
// use spaces
indent = Math.ceil(leadingSpace / tabFmt.size!);
}
return indent;
}
/**
* Calculates leading spaces for a line.
* This method uses arithmetic to calculate the number of leading spaces
*
* @param `line` The line of text.
* @return The number of leading spaces of `text`.
*/
static getLeadingSpacesByArithmetic(line: any) {
const leadingSpaces: number = line.text.length - line.text.trimStart().length;
return leadingSpaces;
}
/**
* Calculates leading spaces for a line.
* This method finds the index position of the first non-whitespace character
* Since the index is built using a 0-index, the position of this character
* will equal the number of spaces preceding the character.
*
* @param `text` The line of text.
* @return The number of leading spaces of `text` with respect to the index position of the first non-whitespace character.
*/
static getLeadingSpacesByIndex(text: any) {
const indexNum: number = text.firstNonWhitespaceCharacterIndex;
return indexNum;
}
} }
exports.default = Lexer;
//# sourceMappingURL=lexer.js.map