From eeb560ac8895c147790d55f522f3bc3e48941dea Mon Sep 17 00:00:00 2001 From: Pete Bacon Darwin Date: Fri, 8 Feb 2019 22:10:19 +0000 Subject: [PATCH] feat(compiler): support tokenizing a sub-section of an input string (#28055) The lexer that does the tokenizing can now process only a part the source string, by passing a `range` property in the `options` argument. The locations of the nodes that are tokenized will now take into account the position of the span in the context of the original source string. This `range` option is, in turn, exposed from the template parser as well. Being able to process parts of files helps to enable SourceMap support when compiling inline component templates. PR Close #28055 --- packages/compiler/src/ml_parser/lexer.ts | 60 +++++++++++++++---- .../compiler/src/render3/view/template.ts | 6 ++ .../compiler/test/ml_parser/lexer_spec.ts | 22 +++++++ 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts index 5f8b479232..30bfdea9aa 100644 --- a/packages/compiler/src/ml_parser/lexer.ts +++ b/packages/compiler/src/ml_parser/lexer.ts @@ -50,6 +50,13 @@ export class TokenizeResult { constructor(public tokens: Token[], public errors: TokenError[]) {} } +export interface LexerRange { + startPos: number; + startLine: number; + startCol: number; + endPos: number; +} + /** * Options that modify how the text is tokenized. */ @@ -58,6 +65,11 @@ export interface TokenizeOptions { tokenizeExpansionForms?: boolean; /** How to tokenize interpolation markers. */ interpolationConfig?: InterpolationConfig; + /** + * The start and end point of the text to parse within the `source` string. + * The entire `source` string is parsed if this is not provided. + * */ + range?: LexerRange; } export function tokenize( @@ -84,14 +96,14 @@ class _ControlFlowError { // See http://www.w3.org/TR/html51/syntax.html#writing class _Tokenizer { private _input: string; - private _length: number; + private _end: number; private _tokenizeIcu: boolean; private _interpolationConfig: InterpolationConfig; private _peek: number = -1; private _nextPeek: number = -1; - private _index: number = -1; - private _line: number = 0; - private _column: number = -1; + private _index: number; + private _line: number; + private _column: number; private _currentTokenStart: ParseLocation|null = null; private _currentTokenType: TokenType|null = null; private _expansionCaseStack: TokenType[] = []; @@ -112,8 +124,26 @@ class _Tokenizer { this._tokenizeIcu = options.tokenizeExpansionForms || false; this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG; this._input = _file.content; - this._length = _file.content.length; - this._advance(); + if (options.range) { + this._end = options.range.endPos; + this._index = options.range.startPos; + this._line = options.range.startLine; + this._column = options.range.startCol; + } else { + this._end = this._input.length; + this._index = 0; + this._line = 0; + this._column = 0; + } + try { + this._initPeek(); + } catch (e) { + if (e instanceof _ControlFlowError) { + this.errors.push(e.error); + } else { + throw e; + } + } } private _processCarriageReturns(content: string): string { @@ -232,8 +262,8 @@ class _Tokenizer { return new _ControlFlowError(error); } - private _advance() { - if (this._index >= this._length) { + private _advance(processingEscapeSequence?: boolean) { + if (this._index >= this._end) { throw this._createError(_unexpectedCharacterErrorMsg(chars.$EOF), this._getSpan()); } if (this._peek === chars.$LF) { @@ -243,9 +273,17 @@ class _Tokenizer { this._column++; } this._index++; - this._peek = this._index >= this._length ? chars.$EOF : this._input.charCodeAt(this._index); + this._initPeek(processingEscapeSequence); + } + + /** + * Initialize the _peek and _nextPeek properties based on the current _index. + * @param processingEscapeSequence whether we are in the middle of processing an escape sequence. + */ + private _initPeek(processingEscapeSequence?: boolean) { + this._peek = this._index >= this._end ? chars.$EOF : this._input.charCodeAt(this._index); this._nextPeek = - this._index + 1 >= this._length ? chars.$EOF : this._input.charCodeAt(this._index + 1); + this._index + 1 >= this._end ? chars.$EOF : this._input.charCodeAt(this._index + 1); } private _attemptCharCode(charCode: number): boolean { @@ -274,7 +312,7 @@ class _Tokenizer { private _attemptStr(chars: string): boolean { const len = chars.length; - if (this._index + len > this._length) { + if (this._index + len > this._end) { return false; } const initialPosition = this._savePosition(); diff --git a/packages/compiler/src/render3/view/template.ts b/packages/compiler/src/render3/view/template.ts index a86532ee4b..6ffc1d3a75 100644 --- a/packages/compiler/src/render3/view/template.ts +++ b/packages/compiler/src/render3/view/template.ts @@ -18,6 +18,7 @@ import * as html from '../../ml_parser/ast'; import {HtmlParser} from '../../ml_parser/html_parser'; import {WhitespaceVisitor} from '../../ml_parser/html_whitespaces'; import {DEFAULT_INTERPOLATION_CONFIG, InterpolationConfig} from '../../ml_parser/interpolation_config'; +import {LexerRange} from '../../ml_parser/lexer'; import {isNgContainer as checkIsNgContainer, splitNsName} from '../../ml_parser/tags'; import {mapLiteral} from '../../output/map_util'; import * as o from '../../output/output_ast'; @@ -1574,6 +1575,11 @@ export interface ParseTemplateOptions { * How to parse interpolation markers. */ interpolationConfig?: InterpolationConfig; + /** + * The start and end point of the text to parse within the `source` string. + * The entire `source` string is parsed if this is not provided. + * */ + range?: LexerRange; } /** diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts index 861b1508d1..1f9b5068ba 100644 --- a/packages/compiler/test/ml_parser/lexer_spec.ts +++ b/packages/compiler/test/ml_parser/lexer_spec.ts @@ -55,6 +55,28 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u }); }); + describe('content ranges', () => { + it('should only process the text within the range', () => { + expect(tokenizeAndHumanizeSourceSpans( + 'pre 1\npre 2\npre 3 `line 1\nline 2\nline 3` post 1\n post 2\n post 3', + {range: {startPos: 19, startLine: 2, startCol: 7, endPos: 39}})) + .toEqual([ + [lex.TokenType.TEXT, 'line 1\nline 2\nline 3'], + [lex.TokenType.EOF, ''], + ]); + }); + + it('should take into account preceding (non-processed) lines and columns', () => { + expect(tokenizeAndHumanizeLineColumn( + 'pre 1\npre 2\npre 3 `line 1\nline 2\nline 3` post 1\n post 2\n post 3', + {range: {startPos: 19, startLine: 2, startCol: 7, endPos: 39}})) + .toEqual([ + [lex.TokenType.TEXT, '2:7'], + [lex.TokenType.EOF, '4:6'], + ]); + }); + }); + describe('comments', () => { it('should parse comments', () => { expect(tokenizeAndHumanizeParts('')).toEqual([