feat(HtmlLexer): allow "<" in text tokens

fixes #5550
2015-12-06 13:11:00 -08:00 · 2015-12-06 13:11:00 -08:00 · aecf68117a
commit aecf68117a
parent 3a438615c3
2 changed files with 85 additions and 36 deletions
--- a/modules/angular2/src/compiler/html_lexer.ts
+++ b/modules/angular2/src/compiler/html_lexer.ts
@ -6,6 +6,7 @@ import {
  CONST_EXPR,
  serializeEnum
 } from 'angular2/src/facade/lang';
+import {ListWrapper} from 'angular2/src/facade/collection';
 import {ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan} from './parse_util';
 import {getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES} from './html_tags';

@ -161,7 +162,7 @@ class _HtmlTokenizer {
    }
    this._beginToken(HtmlTokenType.EOF);
    this._endToken([]);
-    return new HtmlTokenizeResult(this.tokens, this.errors);
+    return new HtmlTokenizeResult(mergeTextTokens(this.tokens), this.errors);
  }

  private _getLocation(): ParseLocation {
@ -374,21 +375,37 @@ class _HtmlTokenizer {
  }

  private _consumeTagOpen(start: ParseLocation) {
-    this._attemptUntilFn(isNotWhitespace);
-    var nameStart = this.index;
-    this._consumeTagOpenStart(start);
-    var lowercaseTagName = this.inputLowercase.substring(nameStart, this.index);
-    this._attemptUntilFn(isNotWhitespace);
-    while (this.peek !== $SLASH && this.peek !== $GT) {
-      this._consumeAttributeName();
+    let savedPos = this._savePosition();
+    let lowercaseTagName;
+    try {
      this._attemptUntilFn(isNotWhitespace);
-      if (this._attemptChar($EQ)) {
+      var nameStart = this.index;
+      this._consumeTagOpenStart(start);
+      lowercaseTagName = this.inputLowercase.substring(nameStart, this.index);
+      this._attemptUntilFn(isNotWhitespace);
+      while (this.peek !== $SLASH && this.peek !== $GT) {
+        this._consumeAttributeName();
+        this._attemptUntilFn(isNotWhitespace);
+        if (this._attemptChar($EQ)) {
+          this._attemptUntilFn(isNotWhitespace);
+          this._consumeAttributeValue();
+        }
        this._attemptUntilFn(isNotWhitespace);
-        this._consumeAttributeValue();
      }
-      this._attemptUntilFn(isNotWhitespace);
+      this._consumeTagOpenEnd();
+    } catch (e) {
+      if (e instanceof ControlFlowError) {
+        // When the start tag is invalid, assume we want a "<"
+        this._restorePosition(savedPos);
+        // Back to back text tokens are merged at the end
+        this._beginToken(HtmlTokenType.TEXT, start);
+        this._endToken(['<']);
+        return;
+      }
+
+      throw e;
    }
-    this._consumeTagOpenEnd();
+
    var contentTokenType = getHtmlTagDefinition(lowercaseTagName).contentType;
    if (contentTokenType === HtmlTagContentType.RAW_TEXT) {
      this._consumeRawTextWithTagClose(lowercaseTagName, false);
@ -470,13 +487,20 @@ class _HtmlTokenizer {
    this._endToken([this._processCarriageReturns(parts.join(''))]);
  }

-  private _savePosition(): number[] { return [this.peek, this.index, this.column, this.line]; }
+  private _savePosition(): number[] {
+    return [this.peek, this.index, this.column, this.line, this.tokens.length];
+  }

  private _restorePosition(position: number[]): void {
    this.peek = position[0];
    this.index = position[1];
    this.column = position[2];
    this.line = position[3];
+    let nbTokens = position[4];
+    if (nbTokens < this.tokens.length) {
+      // remove any extra tokens
+      this.tokens = ListWrapper.slice(this.tokens, 0, nbTokens);
+    }
  }
 }

@ -516,3 +540,21 @@ function isAsciiLetter(code: number): boolean {
 function isAsciiHexDigit(code: number): boolean {
  return code >= $a && code <= $f || code >= $0 && code <= $9;
 }
+
+function mergeTextTokens(srcTokens: HtmlToken[]): HtmlToken[] {
+  let dstTokens = [];
+  let lastDstToken: HtmlToken;
+  for (let i = 0; i < srcTokens.length; i++) {
+    let token = srcTokens[i];
+    if (isPresent(lastDstToken) && lastDstToken.type == HtmlTokenType.TEXT &&
+        token.type == HtmlTokenType.TEXT) {
+      lastDstToken.parts[0] += token.parts[0];
+      lastDstToken.sourceSpan.end = token.sourceSpan.end;
+    } else {
+      lastDstToken = token;
+      dstTokens.push(lastDstToken);
+    }
+  }
+
+  return dstTokens;
+}
--- a/modules/angular2/test/compiler/html_lexer_spec.ts
+++ b/modules/angular2/test/compiler/html_lexer_spec.ts
@ -192,15 +192,6 @@ export function main() {
            ]);
      });

-      it('should report missing name after <', () => {
-        expect(tokenizeAndHumanizeErrors('<'))
-            .toEqual([[HtmlTokenType.TAG_OPEN_START, 'Unexpected character "EOF"', '0:1']]);
-      });
-
-      it('should report missing >', () => {
-        expect(tokenizeAndHumanizeErrors('<name'))
-            .toEqual([[HtmlTokenType.TAG_OPEN_START, 'Unexpected character "EOF"', '0:5']]);
-      });
    });

    describe('attributes', () => {
@ -335,20 +326,6 @@ export function main() {
            ]);
      });

-      it('should report missing value after =', () => {
-        expect(tokenizeAndHumanizeErrors('<name a='))
-            .toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:8']]);
-      });
-
-      it('should report missing end quote for \'', () => {
-        expect(tokenizeAndHumanizeErrors('<name a=\''))
-            .toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:9']]);
-      });
-
-      it('should report missing end quote for "', () => {
-        expect(tokenizeAndHumanizeErrors('<name a="'))
-            .toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:9']]);
-      });
    });

    describe('closing tags', () => {
@ -448,6 +425,36 @@ export function main() {
        expect(tokenizeAndHumanizeSourceSpans('a'))
            .toEqual([[HtmlTokenType.TEXT, 'a'], [HtmlTokenType.EOF, '']]);
      });
+
+      it('should allow "<" in text nodes', () => {
+        expect(tokenizeAndHumanizeParts('{{ a < b ? c : d }}'))
+            .toEqual([[HtmlTokenType.TEXT, '{{ a < b ? c : d }}'], [HtmlTokenType.EOF]]);
+
+        expect(tokenizeAndHumanizeSourceSpans('<p>a<b</p>'))
+            .toEqual([
+              [HtmlTokenType.TAG_OPEN_START, '<p'],
+              [HtmlTokenType.TAG_OPEN_END, '>'],
+              [HtmlTokenType.TEXT, 'a<b'],
+              [HtmlTokenType.TAG_CLOSE, '</p>'],
+              [HtmlTokenType.EOF, ''],
+            ]);
+      });
+
+      // TODO(vicb): make the lexer aware of Angular expressions
+      // see https://github.com/angular/angular/issues/5679
+      it('should parse valid start tag in interpolation', () => {
+        expect(tokenizeAndHumanizeParts('{{ a <b && c > d }}'))
+            .toEqual([
+              [HtmlTokenType.TEXT, '{{ a '],
+              [HtmlTokenType.TAG_OPEN_START, null, 'b'],
+              [HtmlTokenType.ATTR_NAME, null, '&&'],
+              [HtmlTokenType.ATTR_NAME, null, 'c'],
+              [HtmlTokenType.TAG_OPEN_END],
+              [HtmlTokenType.TEXT, ' d }}'],
+              [HtmlTokenType.EOF]
+            ]);
+      });
+
    });

    describe('raw text', () => {