refactor(compiler): capture fullStart locations when tokenizing (#39486)

This commit ensures that when leading whitespace is skipped by the tokenizer, the original start location (before skipping) is captured in the `fullStart` property of the token's source-span. PR Close #39486
2020-10-28 21:37:24 +00:00 · 2020-10-28 21:37:24 +00:00 · 43d8e9aad2
commit 43d8e9aad2
parent 8d90c1ad97
2 changed files with 27 additions and 13 deletions
--- a/packages/compiler/src/ml_parser/lexer.ts
+++ b/packages/compiler/src/ml_parser/lexer.ts
@ -917,19 +917,20 @@ class PlainCharacterCursor implements CharacterCursor {

  getSpan(start?: this, leadingTriviaCodePoints?: number[]): ParseSourceSpan {
    start = start || this;
-    let cloned = false;
+    let fullStart = start;
    if (leadingTriviaCodePoints) {
      while (this.diff(start) > 0 && leadingTriviaCodePoints.indexOf(start.peek()) !== -1) {
-        if (!cloned) {
+        if (fullStart === start) {
          start = start.clone() as this;
-          cloned = true;
        }
        start.advance();
      }
    }
-    return new ParseSourceSpan(
-        new ParseLocation(start.file, start.state.offset, start.state.line, start.state.column),
-        new ParseLocation(this.file, this.state.offset, this.state.line, this.state.column));
+    const startLocation = this.locationFromCursor(start);
+    const endLocation = this.locationFromCursor(this);
+    const fullStartLocation =
+        fullStart !== start ? this.locationFromCursor(fullStart) : startLocation;
+    return new ParseSourceSpan(startLocation, endLocation, fullStartLocation);
  }

  getChars(start: this): string {
@ -959,6 +960,11 @@ class PlainCharacterCursor implements CharacterCursor {
  protected updatePeek(state: CursorState): void {
    state.peek = state.offset >= this.end ? chars.$EOF : this.charAt(state.offset);
  }
+
+  private locationFromCursor(cursor: this): ParseLocation {
+    return new ParseLocation(
+        cursor.file, cursor.state.offset, cursor.state.line, cursor.state.column);
+  }
 }

 class EscapedCharacterCursor extends PlainCharacterCursor {
--- a/packages/compiler/test/ml_parser/lexer_spec.ts
+++ b/packages/compiler/test/ml_parser/lexer_spec.ts
@ -54,14 +54,14 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
      });

      it('should skip over leading trivia for source-span start', () => {
-        expect(tokenizeAndHumanizeLineColumn(
-                   '<t>\n \t a</t>', {leadingTriviaChars: ['\n', ' ', '\t']}))
+        expect(
+            tokenizeAndHumanizeFullStart('<t>\n \t a</t>', {leadingTriviaChars: ['\n', ' ', '\t']}))
            .toEqual([
-              [lex.TokenType.TAG_OPEN_START, '0:0'],
-              [lex.TokenType.TAG_OPEN_END, '0:2'],
-              [lex.TokenType.TEXT, '1:3'],
-              [lex.TokenType.TAG_CLOSE, '1:4'],
-              [lex.TokenType.EOF, '1:8'],
+              [lex.TokenType.TAG_OPEN_START, '0:0', '0:0'],
+              [lex.TokenType.TAG_OPEN_END, '0:2', '0:2'],
+              [lex.TokenType.TEXT, '1:3', '0:3'],
+              [lex.TokenType.TAG_CLOSE, '1:4', '1:4'],
+              [lex.TokenType.EOF, '1:8', '1:8'],
            ]);
      });
    });
@ -1560,6 +1560,14 @@ function tokenizeAndHumanizeLineColumn(input: string, options?: lex.TokenizeOpti
      .tokens.map(token => [<any>token.type, humanizeLineColumn(token.sourceSpan.start)]);
 }

+function tokenizeAndHumanizeFullStart(input: string, options?: lex.TokenizeOptions): any[] {
+  return tokenizeWithoutErrors(input, options)
+      .tokens.map(
+          token =>
+              [<any>token.type, humanizeLineColumn(token.sourceSpan.start),
+               humanizeLineColumn(token.sourceSpan.fullStart)]);
+}
+
 function tokenizeAndHumanizeErrors(input: string, options?: lex.TokenizeOptions): any[] {
  return lex.tokenize(input, 'someUrl', getHtmlTagDefinition, options)
      .errors.map(e => [<any>e.tokenType, e.msg, humanizeLineColumn(e.span.start)]);