From eeb560ac8895c147790d55f522f3bc3e48941dea Mon Sep 17 00:00:00 2001
From: Pete Bacon Darwin <pete@bacondarwin.com>
Date: Fri, 8 Feb 2019 22:10:19 +0000
Subject: [PATCH] feat(compiler): support tokenizing a sub-section of an input
 string (#28055)

The lexer that does the tokenizing can now process only a part the source
string, by passing a `range` property in the `options` argument. The
locations of the nodes that are tokenized will now take into account the
position of the span in the context of the original source string.

This `range` option is, in turn, exposed from the template parser as well.

Being able to process parts of files helps to enable SourceMap support
when compiling inline component templates.

PR Close #28055
---
 packages/compiler/src/ml_parser/lexer.ts      | 60 +++++++++++++++----
 .../compiler/src/render3/view/template.ts     |  6 ++
 .../compiler/test/ml_parser/lexer_spec.ts     | 22 +++++++
 3 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts
index 5f8b479232..30bfdea9aa 100644
--- a/packages/compiler/src/ml_parser/lexer.ts
+++ b/packages/compiler/src/ml_parser/lexer.ts
@@ -50,6 +50,13 @@ export class TokenizeResult {
   constructor(public tokens: Token[], public errors: TokenError[]) {}
 }
 
+export interface LexerRange {
+  startPos: number;
+  startLine: number;
+  startCol: number;
+  endPos: number;
+}
+
 /**
  * Options that modify how the text is tokenized.
  */
@@ -58,6 +65,11 @@ export interface TokenizeOptions {
   tokenizeExpansionForms?: boolean;
   /** How to tokenize interpolation markers. */
   interpolationConfig?: InterpolationConfig;
+  /**
+   * The start and end point of the text to parse within the `source` string.
+   * The entire `source` string is parsed if this is not provided.
+   * */
+  range?: LexerRange;
 }
 
 export function tokenize(
@@ -84,14 +96,14 @@ class _ControlFlowError {
 // See http://www.w3.org/TR/html51/syntax.html#writing
 class _Tokenizer {
   private _input: string;
-  private _length: number;
+  private _end: number;
   private _tokenizeIcu: boolean;
   private _interpolationConfig: InterpolationConfig;
   private _peek: number = -1;
   private _nextPeek: number = -1;
-  private _index: number = -1;
-  private _line: number = 0;
-  private _column: number = -1;
+  private _index: number;
+  private _line: number;
+  private _column: number;
   private _currentTokenStart: ParseLocation|null = null;
   private _currentTokenType: TokenType|null = null;
   private _expansionCaseStack: TokenType[] = [];
@@ -112,8 +124,26 @@ class _Tokenizer {
     this._tokenizeIcu = options.tokenizeExpansionForms || false;
     this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG;
     this._input = _file.content;
-    this._length = _file.content.length;
-    this._advance();
+    if (options.range) {
+      this._end = options.range.endPos;
+      this._index = options.range.startPos;
+      this._line = options.range.startLine;
+      this._column = options.range.startCol;
+    } else {
+      this._end = this._input.length;
+      this._index = 0;
+      this._line = 0;
+      this._column = 0;
+    }
+    try {
+      this._initPeek();
+    } catch (e) {
+      if (e instanceof _ControlFlowError) {
+        this.errors.push(e.error);
+      } else {
+        throw e;
+      }
+    }
   }
 
   private _processCarriageReturns(content: string): string {
@@ -232,8 +262,8 @@ class _Tokenizer {
     return new _ControlFlowError(error);
   }
 
-  private _advance() {
-    if (this._index >= this._length) {
+  private _advance(processingEscapeSequence?: boolean) {
+    if (this._index >= this._end) {
       throw this._createError(_unexpectedCharacterErrorMsg(chars.$EOF), this._getSpan());
     }
     if (this._peek === chars.$LF) {
@@ -243,9 +273,17 @@ class _Tokenizer {
       this._column++;
     }
     this._index++;
-    this._peek = this._index >= this._length ? chars.$EOF : this._input.charCodeAt(this._index);
+    this._initPeek(processingEscapeSequence);
+  }
+
+  /**
+   * Initialize the _peek and _nextPeek properties based on the current _index.
+   * @param processingEscapeSequence whether we are in the middle of processing an escape sequence.
+   */
+  private _initPeek(processingEscapeSequence?: boolean) {
+    this._peek = this._index >= this._end ? chars.$EOF : this._input.charCodeAt(this._index);
     this._nextPeek =
-        this._index + 1 >= this._length ? chars.$EOF : this._input.charCodeAt(this._index + 1);
+        this._index + 1 >= this._end ? chars.$EOF : this._input.charCodeAt(this._index + 1);
   }
 
   private _attemptCharCode(charCode: number): boolean {
@@ -274,7 +312,7 @@ class _Tokenizer {
 
   private _attemptStr(chars: string): boolean {
     const len = chars.length;
-    if (this._index + len > this._length) {
+    if (this._index + len > this._end) {
       return false;
     }
     const initialPosition = this._savePosition();
diff --git a/packages/compiler/src/render3/view/template.ts b/packages/compiler/src/render3/view/template.ts
index a86532ee4b..6ffc1d3a75 100644
--- a/packages/compiler/src/render3/view/template.ts
+++ b/packages/compiler/src/render3/view/template.ts
@@ -18,6 +18,7 @@ import * as html from '../../ml_parser/ast';
 import {HtmlParser} from '../../ml_parser/html_parser';
 import {WhitespaceVisitor} from '../../ml_parser/html_whitespaces';
 import {DEFAULT_INTERPOLATION_CONFIG, InterpolationConfig} from '../../ml_parser/interpolation_config';
+import {LexerRange} from '../../ml_parser/lexer';
 import {isNgContainer as checkIsNgContainer, splitNsName} from '../../ml_parser/tags';
 import {mapLiteral} from '../../output/map_util';
 import * as o from '../../output/output_ast';
@@ -1574,6 +1575,11 @@ export interface ParseTemplateOptions {
    * How to parse interpolation markers.
    */
   interpolationConfig?: InterpolationConfig;
+  /**
+   * The start and end point of the text to parse within the `source` string.
+   * The entire `source` string is parsed if this is not provided.
+   * */
+  range?: LexerRange;
 }
 
 /**
diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts
index 861b1508d1..1f9b5068ba 100644
--- a/packages/compiler/test/ml_parser/lexer_spec.ts
+++ b/packages/compiler/test/ml_parser/lexer_spec.ts
@@ -55,6 +55,28 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
       });
     });
 
+    describe('content ranges', () => {
+      it('should only process the text within the range', () => {
+        expect(tokenizeAndHumanizeSourceSpans(
+                   'pre 1\npre 2\npre 3 `line 1\nline 2\nline 3` post 1\n post 2\n post 3',
+                   {range: {startPos: 19, startLine: 2, startCol: 7, endPos: 39}}))
+            .toEqual([
+              [lex.TokenType.TEXT, 'line 1\nline 2\nline 3'],
+              [lex.TokenType.EOF, ''],
+            ]);
+      });
+
+      it('should take into account preceding (non-processed) lines and columns', () => {
+        expect(tokenizeAndHumanizeLineColumn(
+                   'pre 1\npre 2\npre 3 `line 1\nline 2\nline 3` post 1\n post 2\n post 3',
+                   {range: {startPos: 19, startLine: 2, startCol: 7, endPos: 39}}))
+            .toEqual([
+              [lex.TokenType.TEXT, '2:7'],
+              [lex.TokenType.EOF, '4:6'],
+            ]);
+      });
+    });
+
     describe('comments', () => {
       it('should parse comments', () => {
         expect(tokenizeAndHumanizeParts('<!--t\ne\rs\r\nt-->')).toEqual([