diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts index c6e7242b9f..e7bbb08aeb 100644 --- a/packages/compiler/src/ml_parser/lexer.ts +++ b/packages/compiler/src/ml_parser/lexer.ts @@ -17,6 +17,7 @@ export enum TokenType { TAG_OPEN_END, TAG_OPEN_END_VOID, TAG_CLOSE, + INCOMPLETE_TAG_OPEN, TEXT, ESCAPABLE_RAW_TEXT, RAW_TEXT, @@ -511,8 +512,6 @@ class _Tokenizer { let tagName: string; let prefix: string; let openTagToken: Token|undefined; - let tokensBeforeTagOpen = this.tokens.length; - const innerStart = this._cursor.clone(); try { if (!chars.isAsciiLetter(this._cursor.peek())) { throw this._createError( @@ -523,7 +522,8 @@ class _Tokenizer { prefix = openTagToken.parts[0]; tagName = openTagToken.parts[1]; this._attemptCharCodeUntilFn(isNotWhitespace); - while (this._cursor.peek() !== chars.$SLASH && this._cursor.peek() !== chars.$GT) { + while (this._cursor.peek() !== chars.$SLASH && this._cursor.peek() !== chars.$GT && + this._cursor.peek() !== chars.$LT) { this._consumeAttributeName(); this._attemptCharCodeUntilFn(isNotWhitespace); if (this._attemptCharCode(chars.$EQ)) { @@ -535,14 +535,15 @@ class _Tokenizer { this._consumeTagOpenEnd(); } catch (e) { if (e instanceof _ControlFlowError) { - // When the start tag is invalid (including invalid "attributes"), assume we want a "<" - this._cursor = innerStart; if (openTagToken) { - this.tokens.length = tokensBeforeTagOpen; + // We errored before we could close the opening tag, so it is incomplete. + openTagToken.type = TokenType.INCOMPLETE_TAG_OPEN; + } else { + // When the start tag is invalid, assume we want a "<" as text. + // Back to back text tokens are merged at the end. + this._beginToken(TokenType.TEXT, start); + this._endToken(['<']); } - // Back to back text tokens are merged at the end - this._beginToken(TokenType.TEXT, start); - this._endToken(['<']); return; } @@ -772,8 +773,8 @@ function isNotWhitespace(code: number): boolean { } function isNameEnd(code: number): boolean { - return chars.isWhitespace(code) || code === chars.$GT || code === chars.$SLASH || - code === chars.$SQ || code === chars.$DQ || code === chars.$EQ; + return chars.isWhitespace(code) || code === chars.$GT || code === chars.$LT || + code === chars.$SLASH || code === chars.$SQ || code === chars.$DQ || code === chars.$EQ; } function isPrefixEnd(code: number): boolean { diff --git a/packages/compiler/src/ml_parser/parser.ts b/packages/compiler/src/ml_parser/parser.ts index 5c5a3f9805..1194a8b7ba 100644 --- a/packages/compiler/src/ml_parser/parser.ts +++ b/packages/compiler/src/ml_parser/parser.ts @@ -56,7 +56,8 @@ class _TreeBuilder { build(): void { while (this._peek.type !== lex.TokenType.EOF) { - if (this._peek.type === lex.TokenType.TAG_OPEN_START) { + if (this._peek.type === lex.TokenType.TAG_OPEN_START || + this._peek.type === lex.TokenType.INCOMPLETE_TAG_OPEN) { this._consumeStartTag(this._advance()); } else if (this._peek.type === lex.TokenType.TAG_CLOSE) { this._consumeEndTag(this._advance()); @@ -233,8 +234,7 @@ class _TreeBuilder { } private _consumeStartTag(startTagToken: lex.Token) { - const prefix = startTagToken.parts[0]; - const name = startTagToken.parts[1]; + const [prefix, name] = startTagToken.parts; const attrs: html.Attribute[] = []; while (this._peek.type === lex.TokenType.ATTR_NAME) { attrs.push(this._consumeAttr(this._advance())); @@ -266,6 +266,12 @@ class _TreeBuilder { // Elements that are self-closed have their `endSourceSpan` set to the full span, as the // element start tag also represents the end tag. this._popElement(fullName, span); + } else if (startTagToken.type === lex.TokenType.INCOMPLETE_TAG_OPEN) { + // We already know the opening tag is not complete, so it is unlikely it has a corresponding + // close tag. Let's optimistically parse it as a full element and emit an error. + this._popElement(fullName, null); + this.errors.push( + TreeError.create(fullName, span, `Opening tag "${fullName}" not terminated.`)); } } @@ -295,7 +301,13 @@ class _TreeBuilder { } } - private _popElement(fullName: string, endSourceSpan: ParseSourceSpan): boolean { + /** + * Closes the nearest element with the tag name `fullName` in the parse tree. + * `endSourceSpan` is the span of the closing tag, or null if the element does + * not have a closing tag (for example, this happens when an incomplete + * opening tag is recovered). + */ + private _popElement(fullName: string, endSourceSpan: ParseSourceSpan|null): boolean { for (let stackIndex = this._elementStack.length - 1; stackIndex >= 0; stackIndex--) { const el = this._elementStack[stackIndex]; if (el.name == fullName) { @@ -303,7 +315,7 @@ class _TreeBuilder { // removed from the element stack at this point are closed implicitly, so they won't get // an end source span (as there is no explicit closing element). el.endSourceSpan = endSourceSpan; - el.sourceSpan.end = endSourceSpan.end || el.sourceSpan.end; + el.sourceSpan.end = endSourceSpan !== null ? endSourceSpan.end : el.sourceSpan.end; this._elementStack.splice(stackIndex, this._elementStack.length - stackIndex); return true; diff --git a/packages/compiler/test/ml_parser/html_parser_spec.ts b/packages/compiler/test/ml_parser/html_parser_spec.ts index 6dd3f9e97e..87dee2f8aa 100644 --- a/packages/compiler/test/ml_parser/html_parser_spec.ts +++ b/packages/compiler/test/ml_parser/html_parser_spec.ts @@ -11,7 +11,7 @@ import {HtmlParser, ParseTreeResult, TreeError} from '../../src/ml_parser/html_p import {TokenType} from '../../src/ml_parser/lexer'; import {ParseError} from '../../src/parse_util'; -import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn} from './ast_spec_utils'; +import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn, humanizeNodes} from './ast_spec_utils'; { describe('HtmlParser', () => { @@ -622,7 +622,7 @@ import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn} from './ast_spe `{a, select, b {foo} % { bar {% bar}}`, 'TestComp', {tokenizeExpansionForms: true}); expect(humanizeErrors(p.errors)).toEqual([ [ - 6, + TokenType.RAW_TEXT, 'Unexpected character "EOF" (Do you have an unescaped "{" in your template? Use "{{ \'{\' }}") to escape it.)', '0:36' ], @@ -840,14 +840,66 @@ import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn} from './ast_spe ]]); }); - it('should report subsequent open tags without proper close tag', () => { - const errors = parser.parse('', 'TestComp').errors; - expect(errors.length).toEqual(1); - expect(humanizeErrors(errors)).toEqual([[ - 'div', - 'Unexpected closing tag "div". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags', - '0:4' - ]]); + describe('incomplete element tag', () => { + it('should parse and report incomplete tags after the tag name', () => { + const {errors, rootNodes} = parser.parse('
', 'TestComp'); + + expect(humanizeNodes(rootNodes, true)).toEqual([ + [html.Element, 'div', 0, '
', '', ''], + [html.Element, 'div', 1, '
{ + const {errors, rootNodes} = + parser.parse('
', 'TestComp'); + + expect(humanizeNodes(rootNodes, true)).toEqual([ + [html.Element, 'div', 0, '
', '', ''], + ]); + + expect(humanizeErrors(errors)).toEqual([ + ['div', 'Opening tag "div" not terminated.', '0:0'], + ]); + }); + + it('should parse and report incomplete tags after quote', () => { + const {errors, rootNodes} = parser.parse('
', 'TestComp'); + + expect(humanizeNodes(rootNodes, true)).toEqual([ + [html.Element, 'div', 0, '
', '', ''], + ]); + + expect(humanizeErrors(errors)).toEqual([ + ['div', 'Opening tag "div" not terminated.', '0:0'], + ]); + }); + + it('should report subsequent open tags without proper close tag', () => { + const errors = parser.parse('', 'TestComp').errors; + expect(errors.length).toEqual(2); + expect(humanizeErrors(errors)).toEqual([ + ['div', 'Opening tag "div" not terminated.', '0:0'], + // TODO(ayazhafiz): the following error is unnecessary and can be pruned if we keep + // track of the incomplete tag names. + [ + 'div', + 'Unexpected closing tag "div". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags', + '0:4' + ] + ]); + }); }); it('should report closing tag for void elements', () => { diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts index f2a650a558..1b5e3e3bc4 100644 --- a/packages/compiler/test/ml_parser/lexer_spec.ts +++ b/packages/compiler/test/ml_parser/lexer_spec.ts @@ -232,6 +232,45 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.EOF, ''], ]); }); + + describe('tags', () => { + it('after tag name', () => { + expect(tokenizeAndHumanizeSourceSpans('')).toEqual([ + [lex.TokenType.INCOMPLETE_TAG_OPEN, ''], + [lex.TokenType.INCOMPLETE_TAG_OPEN, ''], + [lex.TokenType.EOF, ''], + ]); + }); + + it('in attribute', () => { + expect(tokenizeAndHumanizeSourceSpans('
')).toEqual([ + [lex.TokenType.INCOMPLETE_TAG_OPEN, ''], + [lex.TokenType.TAG_CLOSE, ''], + [lex.TokenType.EOF, ''], + ]); + }); + + it('after quote', () => { + expect(tokenizeAndHumanizeSourceSpans('
')).toEqual([ + [lex.TokenType.INCOMPLETE_TAG_OPEN, ''], + [lex.TokenType.TAG_CLOSE, ''], + [lex.TokenType.EOF, ''], + ]); + }); + }); }); describe('attributes', () => { @@ -554,7 +593,8 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u expect(tokenizeAndHumanizeSourceSpans('

a')).toEqual([ [lex.TokenType.TAG_OPEN_START, ''], - [lex.TokenType.TEXT, 'a'], [lex.TokenType.EOF, ''], ]); @@ -579,25 +619,41 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u it('should parse start tags quotes in place of an attribute name as text', () => { expect(tokenizeAndHumanizeParts('')).toEqual([ - [lex.TokenType.TEXT, ''], + [lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'], + [lex.TokenType.TEXT, '">'], [lex.TokenType.EOF], ]); expect(tokenizeAndHumanizeParts('')).toEqual([ - [lex.TokenType.TEXT, ''], + [lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'], + [lex.TokenType.TEXT, '\'>'], [lex.TokenType.EOF], ]); }); - it('should parse start tags quotes in place of an attribute name (after a valid attribute) as text', + it('should parse start tags quotes in place of an attribute name (after a valid attribute)', () => { expect(tokenizeAndHumanizeParts('')).toEqual([ - [lex.TokenType.TEXT, ''], + [lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'], + [lex.TokenType.ATTR_NAME, '', 'a'], + [lex.TokenType.ATTR_QUOTE, '"'], + [lex.TokenType.ATTR_VALUE, 'b'], + [lex.TokenType.ATTR_QUOTE, '"'], + // TODO(ayazhafiz): the " symbol should be a synthetic attribute, + // allowing us to complete the opening tag correctly. + [lex.TokenType.TEXT, '">'], [lex.TokenType.EOF], ]); expect(tokenizeAndHumanizeParts('')).toEqual([ - [lex.TokenType.TEXT, ''], + [lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'], + [lex.TokenType.ATTR_NAME, '', 'a'], + [lex.TokenType.ATTR_QUOTE, '\''], + [lex.TokenType.ATTR_VALUE, 'b'], + [lex.TokenType.ATTR_QUOTE, '\''], + // TODO(ayazhafiz): the ' symbol should be a synthetic attribute, + // allowing us to complete the opening tag correctly. + [lex.TokenType.TEXT, '\'>'], [lex.TokenType.EOF], ]); });