/** * @license * Copyright Google Inc. All Rights Reserved. * * Use of this source code is governed by an MIT-style license that can be * found in the LICENSE file at https://angular.io/license */ import * as chars from '../chars'; import {NumberWrapper, isPresent} from '../facade/lang'; import {CompilerInjectable} from '../injectable'; export enum TokenType { Character, Identifier, Keyword, String, Operator, Number, Error } const KEYWORDS = ['var', 'let', 'null', 'undefined', 'true', 'false', 'if', 'else', 'this']; @CompilerInjectable() export class Lexer { tokenize(text: string): Token[] { const scanner = new _Scanner(text); const tokens: Token[] = []; let token = scanner.scanToken(); while (token != null) { tokens.push(token); token = scanner.scanToken(); } return tokens; } } export class Token { constructor( public index: number, public type: TokenType, public numValue: number, public strValue: string) {} isCharacter(code: number): boolean { return this.type == TokenType.Character && this.numValue == code; } isNumber(): boolean { return this.type == TokenType.Number; } isString(): boolean { return this.type == TokenType.String; } isOperator(operater: string): boolean { return this.type == TokenType.Operator && this.strValue == operater; } isIdentifier(): boolean { return this.type == TokenType.Identifier; } isKeyword(): boolean { return this.type == TokenType.Keyword; } isKeywordLet(): boolean { return this.type == TokenType.Keyword && this.strValue == 'let'; } isKeywordNull(): boolean { return this.type == TokenType.Keyword && this.strValue == 'null'; } isKeywordUndefined(): boolean { return this.type == TokenType.Keyword && this.strValue == 'undefined'; } isKeywordTrue(): boolean { return this.type == TokenType.Keyword && this.strValue == 'true'; } isKeywordFalse(): boolean { return this.type == TokenType.Keyword && this.strValue == 'false'; } isKeywordThis(): boolean { return this.type == TokenType.Keyword && this.strValue == 'this'; } isError(): boolean { return this.type == TokenType.Error; } toNumber(): number { return this.type == TokenType.Number ? this.numValue : -1; } toString(): string { switch (this.type) { case TokenType.Character: case TokenType.Identifier: case TokenType.Keyword: case TokenType.Operator: case TokenType.String: case TokenType.Error: return this.strValue; case TokenType.Number: return this.numValue.toString(); default: return null; } } } function newCharacterToken(index: number, code: number): Token { return new Token(index, TokenType.Character, code, String.fromCharCode(code)); } function newIdentifierToken(index: number, text: string): Token { return new Token(index, TokenType.Identifier, 0, text); } function newKeywordToken(index: number, text: string): Token { return new Token(index, TokenType.Keyword, 0, text); } function newOperatorToken(index: number, text: string): Token { return new Token(index, TokenType.Operator, 0, text); } function newStringToken(index: number, text: string): Token { return new Token(index, TokenType.String, 0, text); } function newNumberToken(index: number, n: number): Token { return new Token(index, TokenType.Number, n, ''); } function newErrorToken(index: number, message: string): Token { return new Token(index, TokenType.Error, 0, message); } export const EOF: Token = new Token(-1, TokenType.Character, 0, ''); class _Scanner { length: number; peek: number = 0; index: number = -1; constructor(public input: string) { this.length = input.length; this.advance(); } advance() { this.peek = ++this.index >= this.length ? chars.$EOF : this.input.charCodeAt(this.index); } scanToken(): Token { const input = this.input, length = this.length; let peek = this.peek, index = this.index; // Skip whitespace. while (peek <= chars.$SPACE) { if (++index >= length) { peek = chars.$EOF; break; } else { peek = input.charCodeAt(index); } } this.peek = peek; this.index = index; if (index >= length) { return null; } // Handle identifiers and numbers. if (isIdentifierStart(peek)) return this.scanIdentifier(); if (chars.isDigit(peek)) return this.scanNumber(index); const start: number = index; switch (peek) { case chars.$PERIOD: this.advance(); return chars.isDigit(this.peek) ? this.scanNumber(start) : newCharacterToken(start, chars.$PERIOD); case chars.$LPAREN: case chars.$RPAREN: case chars.$LBRACE: case chars.$RBRACE: case chars.$LBRACKET: case chars.$RBRACKET: case chars.$COMMA: case chars.$COLON: case chars.$SEMICOLON: return this.scanCharacter(start, peek); case chars.$SQ: case chars.$DQ: return this.scanString(); case chars.$HASH: case chars.$PLUS: case chars.$MINUS: case chars.$STAR: case chars.$SLASH: case chars.$PERCENT: case chars.$CARET: return this.scanOperator(start, String.fromCharCode(peek)); case chars.$QUESTION: return this.scanComplexOperator(start, '?', chars.$PERIOD, '.'); case chars.$LT: case chars.$GT: return this.scanComplexOperator(start, String.fromCharCode(peek), chars.$EQ, '='); case chars.$BANG: case chars.$EQ: return this.scanComplexOperator( start, String.fromCharCode(peek), chars.$EQ, '=', chars.$EQ, '='); case chars.$AMPERSAND: return this.scanComplexOperator(start, '&', chars.$AMPERSAND, '&'); case chars.$BAR: return this.scanComplexOperator(start, '|', chars.$BAR, '|'); case chars.$NBSP: while (chars.isWhitespace(this.peek)) this.advance(); return this.scanToken(); } this.advance(); return this.error(`Unexpected character [${String.fromCharCode(peek)}]`, 0); } scanCharacter(start: number, code: number): Token { this.advance(); return newCharacterToken(start, code); } scanOperator(start: number, str: string): Token { this.advance(); return newOperatorToken(start, str); } /** * Tokenize a 2/3 char long operator * * @param start start index in the expression * @param one first symbol (always part of the operator) * @param twoCode code point for the second symbol * @param two second symbol (part of the operator when the second code point matches) * @param threeCode code point for the third symbol * @param three third symbol (part of the operator when provided and matches source expression) * @returns {Token} */ scanComplexOperator( start: number, one: string, twoCode: number, two: string, threeCode?: number, three?: string): Token { this.advance(); let str: string = one; if (this.peek == twoCode) { this.advance(); str += two; } if (isPresent(threeCode) && this.peek == threeCode) { this.advance(); str += three; } return newOperatorToken(start, str); } scanIdentifier(): Token { const start: number = this.index; this.advance(); while (isIdentifierPart(this.peek)) this.advance(); const str: string = this.input.substring(start, this.index); return KEYWORDS.indexOf(str) > -1 ? newKeywordToken(start, str) : newIdentifierToken(start, str); } scanNumber(start: number): Token { let simple: boolean = (this.index === start); this.advance(); // Skip initial digit. while (true) { if (chars.isDigit(this.peek)) { // Do nothing. } else if (this.peek == chars.$PERIOD) { simple = false; } else if (isExponentStart(this.peek)) { this.advance(); if (isExponentSign(this.peek)) this.advance(); if (!chars.isDigit(this.peek)) return this.error('Invalid exponent', -1); simple = false; } else { break; } this.advance(); } const str: string = this.input.substring(start, this.index); const value: number = simple ? NumberWrapper.parseIntAutoRadix(str) : parseFloat(str); return newNumberToken(start, value); } scanString(): Token { const start: number = this.index; const quote: number = this.peek; this.advance(); // Skip initial quote. let buffer: string = ''; let marker: number = this.index; const input: string = this.input; while (this.peek != quote) { if (this.peek == chars.$BACKSLASH) { buffer += input.substring(marker, this.index); this.advance(); let unescapedCode: number; if (this.peek == chars.$u) { // 4 character hex code for unicode character. const hex: string = input.substring(this.index + 1, this.index + 5); if (/^[0-9a-f]+$/i.test(hex)) { unescapedCode = parseInt(hex, 16); } else { return this.error(`Invalid unicode escape [\\u${hex}]`, 0); } for (let i: number = 0; i < 5; i++) { this.advance(); } } else { unescapedCode = unescape(this.peek); this.advance(); } buffer += String.fromCharCode(unescapedCode); marker = this.index; } else if (this.peek == chars.$EOF) { return this.error('Unterminated quote', 0); } else { this.advance(); } } const last: string = input.substring(marker, this.index); this.advance(); // Skip terminating quote. return newStringToken(start, buffer + last); } error(message: string, offset: number): Token { const position: number = this.index + offset; return newErrorToken( position, `Lexer Error: ${message} at column ${position} in expression [${this.input}]`); } } function isIdentifierStart(code: number): boolean { return (chars.$a <= code && code <= chars.$z) || (chars.$A <= code && code <= chars.$Z) || (code == chars.$_) || (code == chars.$$); } export function isIdentifier(input: string): boolean { if (input.length == 0) return false; const scanner = new _Scanner(input); if (!isIdentifierStart(scanner.peek)) return false; scanner.advance(); while (scanner.peek !== chars.$EOF) { if (!isIdentifierPart(scanner.peek)) return false; scanner.advance(); } return true; } function isIdentifierPart(code: number): boolean { return chars.isAsciiLetter(code) || chars.isDigit(code) || (code == chars.$_) || (code == chars.$$); } function isExponentStart(code: number): boolean { return code == chars.$e || code == chars.$E; } function isExponentSign(code: number): boolean { return code == chars.$MINUS || code == chars.$PLUS; } export function isQuote(code: number): boolean { return code === chars.$SQ || code === chars.$DQ || code === chars.$BT; } function unescape(code: number): number { switch (code) { case chars.$n: return chars.$LF; case chars.$f: return chars.$FF; case chars.$r: return chars.$CR; case chars.$t: return chars.$TAB; case chars.$v: return chars.$VTAB; default: return code; } }