474 lines
13 KiB
TypeScript
474 lines
13 KiB
TypeScript
import {Injectable} from '@angular/core';
|
|
import {SetWrapper} from '../../src/facade/collection';
|
|
import {NumberWrapper, StringJoiner, StringWrapper, isPresent} from '../../src/facade/lang';
|
|
import {BaseException} from '../../src/facade/exceptions';
|
|
|
|
export enum TokenType {
|
|
Character,
|
|
Identifier,
|
|
Keyword,
|
|
String,
|
|
Operator,
|
|
Number
|
|
}
|
|
|
|
@Injectable()
|
|
export class Lexer {
|
|
tokenize(text: string): any[] {
|
|
var scanner = new _Scanner(text);
|
|
var tokens = [];
|
|
var token = scanner.scanToken();
|
|
while (token != null) {
|
|
tokens.push(token);
|
|
token = scanner.scanToken();
|
|
}
|
|
return tokens;
|
|
}
|
|
}
|
|
|
|
export class Token {
|
|
constructor(public index: number, public type: TokenType, public numValue: number,
|
|
public strValue: string) {}
|
|
|
|
isCharacter(code: number): boolean {
|
|
return (this.type == TokenType.Character && this.numValue == code);
|
|
}
|
|
|
|
isNumber(): boolean { return (this.type == TokenType.Number); }
|
|
|
|
isString(): boolean { return (this.type == TokenType.String); }
|
|
|
|
isOperator(operater: string): boolean {
|
|
return (this.type == TokenType.Operator && this.strValue == operater);
|
|
}
|
|
|
|
isIdentifier(): boolean { return (this.type == TokenType.Identifier); }
|
|
|
|
isKeyword(): boolean { return (this.type == TokenType.Keyword); }
|
|
|
|
isKeywordDeprecatedVar(): boolean {
|
|
return (this.type == TokenType.Keyword && this.strValue == "var");
|
|
}
|
|
|
|
isKeywordLet(): boolean { return (this.type == TokenType.Keyword && this.strValue == "let"); }
|
|
|
|
isKeywordNull(): boolean { return (this.type == TokenType.Keyword && this.strValue == "null"); }
|
|
|
|
isKeywordUndefined(): boolean {
|
|
return (this.type == TokenType.Keyword && this.strValue == "undefined");
|
|
}
|
|
|
|
isKeywordTrue(): boolean { return (this.type == TokenType.Keyword && this.strValue == "true"); }
|
|
|
|
isKeywordFalse(): boolean { return (this.type == TokenType.Keyword && this.strValue == "false"); }
|
|
|
|
toNumber(): number {
|
|
// -1 instead of NULL ok?
|
|
return (this.type == TokenType.Number) ? this.numValue : -1;
|
|
}
|
|
|
|
toString(): string {
|
|
switch (this.type) {
|
|
case TokenType.Character:
|
|
case TokenType.Identifier:
|
|
case TokenType.Keyword:
|
|
case TokenType.Operator:
|
|
case TokenType.String:
|
|
return this.strValue;
|
|
case TokenType.Number:
|
|
return this.numValue.toString();
|
|
default:
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
function newCharacterToken(index: number, code: number): Token {
|
|
return new Token(index, TokenType.Character, code, StringWrapper.fromCharCode(code));
|
|
}
|
|
|
|
function newIdentifierToken(index: number, text: string): Token {
|
|
return new Token(index, TokenType.Identifier, 0, text);
|
|
}
|
|
|
|
function newKeywordToken(index: number, text: string): Token {
|
|
return new Token(index, TokenType.Keyword, 0, text);
|
|
}
|
|
|
|
function newOperatorToken(index: number, text: string): Token {
|
|
return new Token(index, TokenType.Operator, 0, text);
|
|
}
|
|
|
|
function newStringToken(index: number, text: string): Token {
|
|
return new Token(index, TokenType.String, 0, text);
|
|
}
|
|
|
|
function newNumberToken(index: number, n: number): Token {
|
|
return new Token(index, TokenType.Number, n, "");
|
|
}
|
|
|
|
|
|
export var EOF: Token = new Token(-1, TokenType.Character, 0, "");
|
|
|
|
export const $EOF = /*@ts2dart_const*/ 0;
|
|
export const $TAB = /*@ts2dart_const*/ 9;
|
|
export const $LF = /*@ts2dart_const*/ 10;
|
|
export const $VTAB = /*@ts2dart_const*/ 11;
|
|
export const $FF = /*@ts2dart_const*/ 12;
|
|
export const $CR = /*@ts2dart_const*/ 13;
|
|
export const $SPACE = /*@ts2dart_const*/ 32;
|
|
export const $BANG = /*@ts2dart_const*/ 33;
|
|
export const $DQ = /*@ts2dart_const*/ 34;
|
|
export const $HASH = /*@ts2dart_const*/ 35;
|
|
export const $$ = /*@ts2dart_const*/ 36;
|
|
export const $PERCENT = /*@ts2dart_const*/ 37;
|
|
export const $AMPERSAND = /*@ts2dart_const*/ 38;
|
|
export const $SQ = /*@ts2dart_const*/ 39;
|
|
export const $LPAREN = /*@ts2dart_const*/ 40;
|
|
export const $RPAREN = /*@ts2dart_const*/ 41;
|
|
export const $STAR = /*@ts2dart_const*/ 42;
|
|
export const $PLUS = /*@ts2dart_const*/ 43;
|
|
export const $COMMA = /*@ts2dart_const*/ 44;
|
|
export const $MINUS = /*@ts2dart_const*/ 45;
|
|
export const $PERIOD = /*@ts2dart_const*/ 46;
|
|
export const $SLASH = /*@ts2dart_const*/ 47;
|
|
export const $COLON = /*@ts2dart_const*/ 58;
|
|
export const $SEMICOLON = /*@ts2dart_const*/ 59;
|
|
export const $LT = /*@ts2dart_const*/ 60;
|
|
export const $EQ = /*@ts2dart_const*/ 61;
|
|
export const $GT = /*@ts2dart_const*/ 62;
|
|
export const $QUESTION = /*@ts2dart_const*/ 63;
|
|
|
|
const $0 = /*@ts2dart_const*/ 48;
|
|
const $9 = /*@ts2dart_const*/ 57;
|
|
|
|
const $A = /*@ts2dart_const*/ 65, $E = /*@ts2dart_const*/ 69, $Z = /*@ts2dart_const*/ 90;
|
|
|
|
export const $LBRACKET = /*@ts2dart_const*/ 91;
|
|
export const $BACKSLASH = /*@ts2dart_const*/ 92;
|
|
export const $RBRACKET = /*@ts2dart_const*/ 93;
|
|
const $CARET = /*@ts2dart_const*/ 94;
|
|
const $_ = /*@ts2dart_const*/ 95;
|
|
export const $BT = /*@ts2dart_const*/ 96;
|
|
const $a = /*@ts2dart_const*/ 97, $e = /*@ts2dart_const*/ 101, $f = /*@ts2dart_const*/ 102;
|
|
const $n = /*@ts2dart_const*/ 110, $r = /*@ts2dart_const*/ 114, $t = /*@ts2dart_const*/ 116,
|
|
$u = /*@ts2dart_const*/ 117, $v = /*@ts2dart_const*/ 118, $z = /*@ts2dart_const*/ 122;
|
|
|
|
export const $LBRACE = /*@ts2dart_const*/ 123;
|
|
export const $BAR = /*@ts2dart_const*/ 124;
|
|
export const $RBRACE = /*@ts2dart_const*/ 125;
|
|
const $NBSP = /*@ts2dart_const*/ 160;
|
|
|
|
export class ScannerError extends BaseException {
|
|
constructor(public message) { super(); }
|
|
|
|
toString(): string { return this.message; }
|
|
}
|
|
|
|
class _Scanner {
|
|
length: number;
|
|
peek: number = 0;
|
|
index: number = -1;
|
|
|
|
constructor(public input: string) {
|
|
this.length = input.length;
|
|
this.advance();
|
|
}
|
|
|
|
advance() {
|
|
this.peek =
|
|
++this.index >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index);
|
|
}
|
|
|
|
scanToken(): Token {
|
|
var input = this.input, length = this.length, peek = this.peek, index = this.index;
|
|
|
|
// Skip whitespace.
|
|
while (peek <= $SPACE) {
|
|
if (++index >= length) {
|
|
peek = $EOF;
|
|
break;
|
|
} else {
|
|
peek = StringWrapper.charCodeAt(input, index);
|
|
}
|
|
}
|
|
|
|
this.peek = peek;
|
|
this.index = index;
|
|
|
|
if (index >= length) {
|
|
return null;
|
|
}
|
|
|
|
// Handle identifiers and numbers.
|
|
if (isIdentifierStart(peek)) return this.scanIdentifier();
|
|
if (isDigit(peek)) return this.scanNumber(index);
|
|
|
|
var start: number = index;
|
|
switch (peek) {
|
|
case $PERIOD:
|
|
this.advance();
|
|
return isDigit(this.peek) ? this.scanNumber(start) : newCharacterToken(start, $PERIOD);
|
|
case $LPAREN:
|
|
case $RPAREN:
|
|
case $LBRACE:
|
|
case $RBRACE:
|
|
case $LBRACKET:
|
|
case $RBRACKET:
|
|
case $COMMA:
|
|
case $COLON:
|
|
case $SEMICOLON:
|
|
return this.scanCharacter(start, peek);
|
|
case $SQ:
|
|
case $DQ:
|
|
return this.scanString();
|
|
case $HASH:
|
|
case $PLUS:
|
|
case $MINUS:
|
|
case $STAR:
|
|
case $SLASH:
|
|
case $PERCENT:
|
|
case $CARET:
|
|
return this.scanOperator(start, StringWrapper.fromCharCode(peek));
|
|
case $QUESTION:
|
|
return this.scanComplexOperator(start, '?', $PERIOD, '.');
|
|
case $LT:
|
|
case $GT:
|
|
return this.scanComplexOperator(start, StringWrapper.fromCharCode(peek), $EQ, '=');
|
|
case $BANG:
|
|
case $EQ:
|
|
return this.scanComplexOperator(start, StringWrapper.fromCharCode(peek), $EQ, '=', $EQ,
|
|
'=');
|
|
case $AMPERSAND:
|
|
return this.scanComplexOperator(start, '&', $AMPERSAND, '&');
|
|
case $BAR:
|
|
return this.scanComplexOperator(start, '|', $BAR, '|');
|
|
case $NBSP:
|
|
while (isWhitespace(this.peek)) this.advance();
|
|
return this.scanToken();
|
|
}
|
|
|
|
this.error(`Unexpected character [${StringWrapper.fromCharCode(peek)}]`, 0);
|
|
return null;
|
|
}
|
|
|
|
scanCharacter(start: number, code: number): Token {
|
|
this.advance();
|
|
return newCharacterToken(start, code);
|
|
}
|
|
|
|
|
|
scanOperator(start: number, str: string): Token {
|
|
this.advance();
|
|
return newOperatorToken(start, str);
|
|
}
|
|
|
|
/**
|
|
* Tokenize a 2/3 char long operator
|
|
*
|
|
* @param start start index in the expression
|
|
* @param one first symbol (always part of the operator)
|
|
* @param twoCode code point for the second symbol
|
|
* @param two second symbol (part of the operator when the second code point matches)
|
|
* @param threeCode code point for the third symbol
|
|
* @param three third symbol (part of the operator when provided and matches source expression)
|
|
* @returns {Token}
|
|
*/
|
|
scanComplexOperator(start: number, one: string, twoCode: number, two: string, threeCode?: number,
|
|
three?: string): Token {
|
|
this.advance();
|
|
var str: string = one;
|
|
if (this.peek == twoCode) {
|
|
this.advance();
|
|
str += two;
|
|
}
|
|
if (isPresent(threeCode) && this.peek == threeCode) {
|
|
this.advance();
|
|
str += three;
|
|
}
|
|
return newOperatorToken(start, str);
|
|
}
|
|
|
|
scanIdentifier(): Token {
|
|
var start: number = this.index;
|
|
this.advance();
|
|
while (isIdentifierPart(this.peek)) this.advance();
|
|
var str: string = this.input.substring(start, this.index);
|
|
if (SetWrapper.has(KEYWORDS, str)) {
|
|
return newKeywordToken(start, str);
|
|
} else {
|
|
return newIdentifierToken(start, str);
|
|
}
|
|
}
|
|
|
|
scanNumber(start: number): Token {
|
|
var simple: boolean = (this.index === start);
|
|
this.advance(); // Skip initial digit.
|
|
while (true) {
|
|
if (isDigit(this.peek)) {
|
|
// Do nothing.
|
|
} else if (this.peek == $PERIOD) {
|
|
simple = false;
|
|
} else if (isExponentStart(this.peek)) {
|
|
this.advance();
|
|
if (isExponentSign(this.peek)) this.advance();
|
|
if (!isDigit(this.peek)) this.error('Invalid exponent', -1);
|
|
simple = false;
|
|
} else {
|
|
break;
|
|
}
|
|
this.advance();
|
|
}
|
|
var str: string = this.input.substring(start, this.index);
|
|
// TODO
|
|
var value: number =
|
|
simple ? NumberWrapper.parseIntAutoRadix(str) : NumberWrapper.parseFloat(str);
|
|
return newNumberToken(start, value);
|
|
}
|
|
|
|
scanString(): Token {
|
|
var start: number = this.index;
|
|
var quote: number = this.peek;
|
|
this.advance(); // Skip initial quote.
|
|
|
|
var buffer: StringJoiner;
|
|
var marker: number = this.index;
|
|
var input: string = this.input;
|
|
|
|
while (this.peek != quote) {
|
|
if (this.peek == $BACKSLASH) {
|
|
if (buffer == null) buffer = new StringJoiner();
|
|
buffer.add(input.substring(marker, this.index));
|
|
this.advance();
|
|
var unescapedCode: number;
|
|
if (this.peek == $u) {
|
|
// 4 character hex code for unicode character.
|
|
var hex: string = input.substring(this.index + 1, this.index + 5);
|
|
try {
|
|
unescapedCode = NumberWrapper.parseInt(hex, 16);
|
|
} catch (e) {
|
|
this.error(`Invalid unicode escape [\\u${hex}]`, 0);
|
|
}
|
|
for (var i: number = 0; i < 5; i++) {
|
|
this.advance();
|
|
}
|
|
} else {
|
|
unescapedCode = unescape(this.peek);
|
|
this.advance();
|
|
}
|
|
buffer.add(StringWrapper.fromCharCode(unescapedCode));
|
|
marker = this.index;
|
|
} else if (this.peek == $EOF) {
|
|
this.error('Unterminated quote', 0);
|
|
} else {
|
|
this.advance();
|
|
}
|
|
}
|
|
|
|
var last: string = input.substring(marker, this.index);
|
|
this.advance(); // Skip terminating quote.
|
|
|
|
// Compute the unescaped string value.
|
|
var unescaped: string = last;
|
|
if (buffer != null) {
|
|
buffer.add(last);
|
|
unescaped = buffer.toString();
|
|
}
|
|
return newStringToken(start, unescaped);
|
|
}
|
|
|
|
error(message: string, offset: number) {
|
|
var position: number = this.index + offset;
|
|
throw new ScannerError(
|
|
`Lexer Error: ${message} at column ${position} in expression [${this.input}]`);
|
|
}
|
|
}
|
|
|
|
function isWhitespace(code: number): boolean {
|
|
return (code >= $TAB && code <= $SPACE) || (code == $NBSP);
|
|
}
|
|
|
|
function isIdentifierStart(code: number): boolean {
|
|
return ($a <= code && code <= $z) || ($A <= code && code <= $Z) || (code == $_) || (code == $$);
|
|
}
|
|
|
|
export function isIdentifier(input: string): boolean {
|
|
if (input.length == 0) return false;
|
|
var scanner = new _Scanner(input);
|
|
if (!isIdentifierStart(scanner.peek)) return false;
|
|
scanner.advance();
|
|
while (scanner.peek !== $EOF) {
|
|
if (!isIdentifierPart(scanner.peek)) return false;
|
|
scanner.advance();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
function isIdentifierPart(code: number): boolean {
|
|
return ($a <= code && code <= $z) || ($A <= code && code <= $Z) || ($0 <= code && code <= $9) ||
|
|
(code == $_) || (code == $$);
|
|
}
|
|
|
|
function isDigit(code: number): boolean {
|
|
return $0 <= code && code <= $9;
|
|
}
|
|
|
|
function isExponentStart(code: number): boolean {
|
|
return code == $e || code == $E;
|
|
}
|
|
|
|
function isExponentSign(code: number): boolean {
|
|
return code == $MINUS || code == $PLUS;
|
|
}
|
|
|
|
export function isQuote(code: number): boolean {
|
|
return code === $SQ || code === $DQ || code === $BT;
|
|
}
|
|
|
|
function unescape(code: number): number {
|
|
switch (code) {
|
|
case $n:
|
|
return $LF;
|
|
case $f:
|
|
return $FF;
|
|
case $r:
|
|
return $CR;
|
|
case $t:
|
|
return $TAB;
|
|
case $v:
|
|
return $VTAB;
|
|
default:
|
|
return code;
|
|
}
|
|
}
|
|
|
|
var OPERATORS = SetWrapper.createFromList([
|
|
'+',
|
|
'-',
|
|
'*',
|
|
'/',
|
|
'%',
|
|
'^',
|
|
'=',
|
|
'==',
|
|
'!=',
|
|
'===',
|
|
'!==',
|
|
'<',
|
|
'>',
|
|
'<=',
|
|
'>=',
|
|
'&&',
|
|
'||',
|
|
'&',
|
|
'|',
|
|
'!',
|
|
'?',
|
|
'#',
|
|
'?.'
|
|
]);
|
|
|
|
|
|
var KEYWORDS =
|
|
SetWrapper.createFromList(['var', 'let', 'null', 'undefined', 'true', 'false', 'if', 'else']);
|