diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts index d7306a2389..d62a54f576 100644 --- a/packages/compiler/src/ml_parser/lexer.ts +++ b/packages/compiler/src/ml_parser/lexer.ts @@ -29,8 +29,7 @@ export enum TokenType { CDATA_END, ATTR_NAME, ATTR_QUOTE, - ATTR_VALUE_TEXT, - ATTR_VALUE_INTERPOLATION, + ATTR_VALUE, DOC_TYPE, EXPANSION_FORM_START, EXPANSION_CASE_VALUE, @@ -229,8 +228,7 @@ class _Tokenizer { this._consumeTagOpen(start); } } else if (!(this._tokenizeIcu && this._tokenizeExpansionForm())) { - this._consumeWithInterpolation( - TokenType.TEXT, TokenType.INTERPOLATION, () => this._isTextEnd()); + this._consumeText(); } } catch (e) { this.handleError(e); @@ -597,25 +595,29 @@ class _Tokenizer { private _consumeAttributeValue() { let value: string; if (this._cursor.peek() === chars.$SQ || this._cursor.peek() === chars.$DQ) { + this._beginToken(TokenType.ATTR_QUOTE); const quoteChar = this._cursor.peek(); - this._consumeQuote(quoteChar); - this._consumeWithInterpolation( - TokenType.ATTR_VALUE_TEXT, TokenType.ATTR_VALUE_INTERPOLATION, - () => this._cursor.peek() === quoteChar); - this._consumeQuote(quoteChar); + this._cursor.advance(); + this._endToken([String.fromCodePoint(quoteChar)]); + this._beginToken(TokenType.ATTR_VALUE); + const parts: string[] = []; + while (this._cursor.peek() !== quoteChar) { + parts.push(this._readChar(true)); + } + value = parts.join(''); + this._endToken([this._processCarriageReturns(value)]); + this._beginToken(TokenType.ATTR_QUOTE); + this._cursor.advance(); + this._endToken([String.fromCodePoint(quoteChar)]); } else { - const endPredicate = () => isNameEnd(this._cursor.peek()); - this._consumeWithInterpolation( - TokenType.ATTR_VALUE_TEXT, TokenType.ATTR_VALUE_INTERPOLATION, endPredicate); + this._beginToken(TokenType.ATTR_VALUE); + const valueStart = this._cursor.clone(); + this._requireCharCodeUntilFn(isNameEnd, 1); + value = this._cursor.getChars(valueStart); + this._endToken([this._processCarriageReturns(value)]); } } - private _consumeQuote(quoteChar: number) { - this._beginToken(TokenType.ATTR_QUOTE); - this._requireCharCode(quoteChar); - this._endToken([String.fromCodePoint(quoteChar)]); - } - private _consumeTagOpenEnd() { const tokenType = this._attemptCharCode(chars.$SLASH) ? TokenType.TAG_OPEN_END_VOID : TokenType.TAG_OPEN_END; @@ -694,31 +696,21 @@ class _Tokenizer { this._expansionCaseStack.pop(); } - /** - * Consume a string that may contain interpolation expressions. - * The first token consumed will be of `tokenType` and then there will be alternating - * `interpolationTokenType` and `tokenType` tokens until the `endPredicate()` returns true. - * - * @param textTokenType the kind of tokens to interleave around interpolation tokens. - * @param interpolationTokenType the kind of tokens that contain interpolation. - * @param endPredicate a function that should return true when we should stop consuming. - */ - private _consumeWithInterpolation( - textTokenType: TokenType, interpolationTokenType: TokenType, endPredicate: () => boolean) { - this._beginToken(textTokenType); + private _consumeText() { + this._beginToken(TokenType.TEXT); const parts: string[] = []; - while (!endPredicate()) { + do { const current = this._cursor.clone(); if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) { this._endToken([this._processCarriageReturns(parts.join(''))], current); - this._consumeInterpolation(interpolationTokenType, current); + this._consumeInterpolation(current); parts.length = 0; - this._beginToken(textTokenType); + this._beginToken(TokenType.TEXT); } else { parts.push(this._readChar(true)); } - } + } while (!this._isTextEnd()); // It is possible that an interpolation was started but not ended inside this text token. // Make sure that we reset the state of the lexer correctly. @@ -727,15 +719,14 @@ class _Tokenizer { this._endToken([this._processCarriageReturns(parts.join(''))]); } - private _consumeInterpolation( - interpolationTokenType: TokenType, interpolationStart: CharacterCursor) { + private _consumeInterpolation(interpolationStart: CharacterCursor) { const parts: string[] = []; - this._beginToken(interpolationTokenType, interpolationStart); + this._beginToken(TokenType.INTERPOLATION, interpolationStart); parts.push(this._interpolationConfig.start); // Find the end of the interpolation, ignoring content inside quotes. const expressionStart = this._cursor.clone(); - let inQuote: number|null = null; + let inQuote: string|null = null; let inComment = false; while (this._cursor.peek() !== chars.$EOF) { const current = this._cursor.clone(); @@ -761,15 +752,14 @@ class _Tokenizer { } } - const char = this._cursor.peek(); - this._cursor.advance(); - if (char === chars.$BACKSLASH) { + const char = this._readChar(true); + if (char === '\\') { // Skip the next character because it was escaped. - this._cursor.advance(); + this._readChar(true); } else if (char === inQuote) { // Exiting the current quoted string inQuote = null; - } else if (!inComment && chars.isQuote(char)) { + } else if (!inComment && /['"`]/.test(char)) { // Entering a new quoted string inQuote = char; } diff --git a/packages/compiler/src/ml_parser/parser.ts b/packages/compiler/src/ml_parser/parser.ts index 9ac0b944e4..fd01357d43 100644 --- a/packages/compiler/src/ml_parser/parser.ts +++ b/packages/compiler/src/ml_parser/parser.ts @@ -6,7 +6,7 @@ * found in the LICENSE file at https://angular.io/license */ -import {ParseError, ParseLocation, ParseSourceSpan} from '../parse_util'; +import {ParseError, ParseSourceSpan} from '../parse_util'; import * as html from './ast'; import {NAMED_ENTITIES} from './entities'; @@ -362,49 +362,27 @@ class _TreeBuilder { private _consumeAttr(attrName: lex.Token): html.Attribute { const fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]); - let attrEnd = attrName.sourceSpan.end; - - // Consume any quote + let end = attrName.sourceSpan.end; + let value = ''; + let valueSpan: ParseSourceSpan = undefined!; if (this._peek.type === lex.TokenType.ATTR_QUOTE) { this._advance(); } - - // Consume the value - let value = ''; - let valueStartSpan: ParseSourceSpan|undefined = undefined; - let valueEnd: ParseLocation|undefined = undefined; - if (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT) { - valueStartSpan = this._peek.sourceSpan; - valueEnd = this._peek.sourceSpan.end; - // For now we are recombining text and interpolation tokens - while (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT || - this._peek.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) { - let valueToken = this._advance(); - if (valueToken.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) { - // For backward compatibility we decode HTML entities that appear in interpolation - // expressions. This is arguably a bug, but it could be a considerable breaking change to - // fix it. It should be addressed in a larger project to refactor the entire parser/lexer - // chain after View Engine has been removed. - value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity); - } else { - value += valueToken.parts.join(''); - } - valueEnd = attrEnd = valueToken.sourceSpan.end; - } + if (this._peek.type === lex.TokenType.ATTR_VALUE) { + const valueToken = this._advance(); + value = valueToken.parts[0]; + end = valueToken.sourceSpan.end; + valueSpan = valueToken.sourceSpan; } - - // Consume any quote if (this._peek.type === lex.TokenType.ATTR_QUOTE) { const quoteToken = this._advance(); - attrEnd = quoteToken.sourceSpan.end; + end = quoteToken.sourceSpan.end; } - - const valueSpan = valueStartSpan && valueEnd && - new ParseSourceSpan(valueStartSpan.start, valueEnd, valueStartSpan.fullStart); + const keySpan = new ParseSourceSpan(attrName.sourceSpan.start, attrName.sourceSpan.end); return new html.Attribute( fullName, value, - new ParseSourceSpan(attrName.sourceSpan.start, attrEnd, attrName.sourceSpan.fullStart), - attrName.sourceSpan, valueSpan); + new ParseSourceSpan(attrName.sourceSpan.start, end, attrName.sourceSpan.fullStart), keySpan, + valueSpan); } private _getParentElement(): html.Element|null { diff --git a/packages/compiler/test/ml_parser/html_parser_spec.ts b/packages/compiler/test/ml_parser/html_parser_spec.ts index 4fbfa75692..279bca60d3 100644 --- a/packages/compiler/test/ml_parser/html_parser_spec.ts +++ b/packages/compiler/test/ml_parser/html_parser_spec.ts @@ -250,19 +250,6 @@ import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn, humanizeNodes} ]); }); - it('should decode HTML entities in interpolated attributes', () => { - // Note that the detail of decoding corner-cases is tested in the - // "should decode HTML entities in interpolations" spec. - expect(humanizeDomSourceSpans(parser.parse('
', 'TestComp'))) - .toEqual([ - [ - html.Element, 'div', 0, '
', '
', - '
' - ], - [html.Attribute, 'foo', '{{&}}', 'foo="{{&}}"'] - ]); - }); - it('should normalize line endings within attribute values', () => { const result = parser.parse('
', 'TestComp'); diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts index 835d59970d..54005b28ba 100644 --- a/packages/compiler/test/ml_parser/lexer_spec.ts +++ b/packages/compiler/test/ml_parser/lexer_spec.ts @@ -257,7 +257,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.INCOMPLETE_TAG_OPEN, '')).toEqual([ [lex.TokenType.TAG_OPEN_START, '', 't'], [lex.TokenType.ATTR_NAME, '', 'a'], - [lex.TokenType.ATTR_VALUE_TEXT, 'b'], - [lex.TokenType.TAG_OPEN_END], - [lex.TokenType.EOF], - ]); - }); - - it('should parse attributes with unquoted interpolation value', () => { - expect(tokenizeAndHumanizeParts('')).toEqual([ - [lex.TokenType.TAG_OPEN_START, '', 'a'], - [lex.TokenType.ATTR_NAME, '', 'a'], - [lex.TokenType.ATTR_VALUE_TEXT, ''], - [lex.TokenType.ATTR_VALUE_INTERPOLATION, '{{', 'link.text', '}}'], - [lex.TokenType.ATTR_VALUE_TEXT, ''], - [lex.TokenType.TAG_OPEN_END], - [lex.TokenType.EOF], - ]); - }); - - it('should parse attributes with empty quoted value', () => { - expect(tokenizeAndHumanizeParts('')).toEqual([ - [lex.TokenType.TAG_OPEN_START, '', 't'], - [lex.TokenType.ATTR_NAME, '', 'a'], - [lex.TokenType.ATTR_QUOTE, '"'], - [lex.TokenType.ATTR_VALUE_TEXT, ''], - [lex.TokenType.ATTR_QUOTE, '"'], + [lex.TokenType.ATTR_VALUE, 'b'], [lex.TokenType.TAG_OPEN_END], [lex.TokenType.EOF], ]); @@ -396,7 +366,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u expect(tokenizeAndHumanizeParts('')).toEqual([ [lex.TokenType.TAG_OPEN_START, '', 't'], [lex.TokenType.ATTR_NAME, '', 'a'], - [lex.TokenType.ATTR_VALUE_TEXT, 'b'], + [lex.TokenType.ATTR_VALUE, 'b'], [lex.TokenType.TAG_OPEN_END], [lex.TokenType.EOF], ]); @@ -407,7 +377,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.TAG_OPEN_START, '', 't'], [lex.TokenType.ATTR_NAME, '', 'a'], [lex.TokenType.ATTR_QUOTE, '"'], - [lex.TokenType.ATTR_VALUE_TEXT, 'AA'], + [lex.TokenType.ATTR_VALUE, 'AA'], [lex.TokenType.ATTR_QUOTE, '"'], [lex.TokenType.TAG_OPEN_END], [lex.TokenType.EOF], @@ -419,11 +389,11 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.TAG_OPEN_START, '', 't'], [lex.TokenType.ATTR_NAME, '', 'a'], [lex.TokenType.ATTR_QUOTE, '"'], - [lex.TokenType.ATTR_VALUE_TEXT, '&'], + [lex.TokenType.ATTR_VALUE, '&'], [lex.TokenType.ATTR_QUOTE, '"'], [lex.TokenType.ATTR_NAME, '', 'b'], [lex.TokenType.ATTR_QUOTE, '"'], - [lex.TokenType.ATTR_VALUE_TEXT, 'c&&d'], + [lex.TokenType.ATTR_VALUE, 'c&&d'], [lex.TokenType.ATTR_QUOTE, '"'], [lex.TokenType.TAG_OPEN_END], [lex.TokenType.EOF], @@ -435,7 +405,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.TAG_OPEN_START, '', 't'], [lex.TokenType.ATTR_NAME, '', 'a'], [lex.TokenType.ATTR_QUOTE, '"'], - [lex.TokenType.ATTR_VALUE_TEXT, 'b && c &'], + [lex.TokenType.ATTR_VALUE, 'b && c &'], [lex.TokenType.ATTR_QUOTE, '"'], [lex.TokenType.TAG_OPEN_END], [lex.TokenType.EOF], @@ -447,7 +417,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.TAG_OPEN_START, '', 't'], [lex.TokenType.ATTR_NAME, '', 'a'], [lex.TokenType.ATTR_QUOTE, '\''], - [lex.TokenType.ATTR_VALUE_TEXT, 't\ne\ns\nt'], + [lex.TokenType.ATTR_VALUE, 't\ne\ns\nt'], [lex.TokenType.ATTR_QUOTE, '\''], [lex.TokenType.TAG_OPEN_END], [lex.TokenType.EOF], @@ -458,7 +428,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u expect(tokenizeAndHumanizeSourceSpans('')).toEqual([ [lex.TokenType.TAG_OPEN_START, ''], [lex.TokenType.EOF, ''], ]); @@ -466,13 +436,13 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u it('should report missing closing single quote', () => { expect(tokenizeAndHumanizeErrors('')).toEqual([ - [lex.TokenType.ATTR_VALUE_TEXT, 'Unexpected character "EOF"', '0:8'], + [lex.TokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:8'], ]); }); it('should report missing closing double quote', () => { expect(tokenizeAndHumanizeErrors('