diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts
index f0fb361232..d62a54f576 100644
--- a/packages/compiler/src/ml_parser/lexer.ts
+++ b/packages/compiler/src/ml_parser/lexer.ts
@@ -22,6 +22,7 @@ export enum TokenType {
TEXT,
ESCAPABLE_RAW_TEXT,
RAW_TEXT,
+ INTERPOLATION,
COMMENT_START,
COMMENT_END,
CDATA_START,
@@ -285,7 +286,7 @@ class _Tokenizer {
}
const token = new Token(
this._currentTokenType, parts,
- this._cursor.getSpan(this._currentTokenStart, this._leadingTriviaCodePoints));
+ (end ?? this._cursor).getSpan(this._currentTokenStart, this._leadingTriviaCodePoints));
this.tokens.push(token);
this._currentTokenStart = null;
this._currentTokenType = null;
@@ -696,19 +697,16 @@ class _Tokenizer {
}
private _consumeText() {
- const start = this._cursor.clone();
- this._beginToken(TokenType.TEXT, start);
+ this._beginToken(TokenType.TEXT);
const parts: string[] = [];
do {
+ const current = this._cursor.clone();
if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
- parts.push(this._interpolationConfig.start);
- this._inInterpolation = true;
- } else if (
- this._interpolationConfig && this._inInterpolation &&
- this._attemptStr(this._interpolationConfig.end)) {
- parts.push(this._interpolationConfig.end);
- this._inInterpolation = false;
+ this._endToken([this._processCarriageReturns(parts.join(''))], current);
+ this._consumeInterpolation(current);
+ parts.length = 0;
+ this._beginToken(TokenType.TEXT);
} else {
parts.push(this._readChar(true));
}
@@ -721,6 +719,61 @@ class _Tokenizer {
this._endToken([this._processCarriageReturns(parts.join(''))]);
}
+ private _consumeInterpolation(interpolationStart: CharacterCursor) {
+ const parts: string[] = [];
+ this._beginToken(TokenType.INTERPOLATION, interpolationStart);
+ parts.push(this._interpolationConfig.start);
+
+ // Find the end of the interpolation, ignoring content inside quotes.
+ const expressionStart = this._cursor.clone();
+ let inQuote: string|null = null;
+ let inComment = false;
+ while (this._cursor.peek() !== chars.$EOF) {
+ const current = this._cursor.clone();
+
+ if (this._isTagStart()) {
+ // We are starting what looks like an HTML element in the middle of this interpolation.
+ // Reset the cursor to before the `<` character and end the interpolation token.
+ // (This is actually wrong but here for backward compatibility).
+ this._cursor = current;
+ parts.push(this._getProcessedChars(expressionStart, current));
+ return this._endToken(parts);
+ }
+
+ if (inQuote === null) {
+ if (this._attemptStr(this._interpolationConfig.end)) {
+ // We are not in a string, and we hit the end interpolation marker
+ parts.push(this._getProcessedChars(expressionStart, current));
+ parts.push(this._interpolationConfig.end);
+ return this._endToken(parts);
+ } else if (this._attemptStr('//')) {
+ // Once we are in a comment we ignore any quotes
+ inComment = true;
+ }
+ }
+
+ const char = this._readChar(true);
+ if (char === '\\') {
+ // Skip the next character because it was escaped.
+ this._readChar(true);
+ } else if (char === inQuote) {
+ // Exiting the current quoted string
+ inQuote = null;
+ } else if (!inComment && /['"`]/.test(char)) {
+ // Entering a new quoted string
+ inQuote = char;
+ }
+ }
+
+ // We hit EOF without finding a closing interpolation marker
+ parts.push(this._getProcessedChars(expressionStart, this._cursor));
+ return this._endToken(parts);
+ }
+
+ private _getProcessedChars(start: CharacterCursor, end: CharacterCursor): string {
+ return this._processCarriageReturns(end.getChars(start))
+ }
+
private _isTextEnd(): boolean {
if (this._isTagStart() || this._cursor.peek() === chars.$EOF) {
return true;
diff --git a/packages/compiler/src/ml_parser/parser.ts b/packages/compiler/src/ml_parser/parser.ts
index 24465f8e97..fd01357d43 100644
--- a/packages/compiler/src/ml_parser/parser.ts
+++ b/packages/compiler/src/ml_parser/parser.ts
@@ -9,6 +9,7 @@
import {ParseError, ParseSourceSpan} from '../parse_util';
import * as html from './ast';
+import {NAMED_ENTITIES} from './entities';
import * as lex from './lexer';
import {getNsPrefix, mergeNsAndName, splitNsName, TagDefinition} from './tags';
@@ -215,6 +216,7 @@ class _TreeBuilder {
}
private _consumeText(token: lex.Token) {
+ const startSpan = token.sourceSpan;
let text = token.parts[0];
if (text.length > 0 && text[0] == '\n') {
const parent = this._getParentElement();
@@ -224,8 +226,29 @@ class _TreeBuilder {
}
}
+ // For now recombine text and interpolation tokens
+ if (this._peek.type === lex.TokenType.INTERPOLATION) {
+ while (this._peek.type === lex.TokenType.INTERPOLATION ||
+ this._peek.type === lex.TokenType.TEXT) {
+ token = this._advance();
+ if (token.type === lex.TokenType.INTERPOLATION) {
+ // For backward compatibility we decode HTML entities that appear in interpolation
+ // expressions. This is arguably a bug, but it could be a considerable breaking change to
+ // fix it. It should be addressed in a larger project to refactor the entire parser/lexer
+ // chain after View Engine has been removed.
+ text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity);
+ } else {
+ text += token.parts.join('');
+ }
+ }
+ }
+
if (text.length > 0) {
- this._addToParent(new html.Text(text, token.sourceSpan));
+ const endSpan = token.sourceSpan;
+ this._addToParent(new html.Text(
+ text,
+ new ParseSourceSpan(
+ startSpan.start, endSpan.end, startSpan.fullStart, startSpan.details)));
}
}
@@ -395,3 +418,21 @@ class _TreeBuilder {
function lastOnStack(stack: any[], element: any): boolean {
return stack.length > 0 && stack[stack.length - 1] === element;
}
+
+/**
+ * Decode the `entity` string, which we believe is the contents of an HTML entity.
+ *
+ * If the string is not actually a valid/known entity then just return the original `match` string.
+ */
+function decodeEntity(match: string, entity: string): string {
+ if (NAMED_ENTITIES[entity] !== undefined) {
+ return NAMED_ENTITIES[entity] || match;
+ }
+ if (/^#x[a-f0-9]+$/i.test(entity)) {
+ return String.fromCodePoint(parseInt(entity.slice(2), 16));
+ }
+ if (/^#\d+$/.test(entity)) {
+ return String.fromCodePoint(parseInt(entity.slice(1), 10));
+ }
+ return match;
+}
diff --git a/packages/compiler/test/ml_parser/html_parser_spec.ts b/packages/compiler/test/ml_parser/html_parser_spec.ts
index b971d9187a..279bca60d3 100644
--- a/packages/compiler/test/ml_parser/html_parser_spec.ts
+++ b/packages/compiler/test/ml_parser/html_parser_spec.ts
@@ -675,6 +675,32 @@ import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn, humanizeNodes}
expect(node.endSourceSpan!.end.offset).toEqual(12);
});
+ // This checks backward compatibility with a previous version of the lexer, which would
+ // treat interpolation expressions as regular HTML escapable text.
+ it('should decode HTML entities in interpolations', () => {
+ expect(humanizeDomSourceSpans(parser.parse(
+ '{{&}}' +
+ '{{▾}}' +
+ '{{▾}}' +
+ '{{& (no semi-colon)}}' +
+ '{{BE; (invalid decimal)}}',
+ 'TestComp')))
+ .toEqual([[
+ html.Text,
+ '{{&}}' +
+ '{{\u25BE}}' +
+ '{{\u25BE}}' +
+ '{{& (no semi-colon)}}' +
+ '{{BE; (invalid decimal)}}',
+ 0,
+ '{{&}}' +
+ '{{▾}}' +
+ '{{▾}}' +
+ '{{& (no semi-colon)}}' +
+ '{{BE; (invalid decimal)}}',
+ ]]);
+ });
+
it('should not set the end source span for void elements', () => {
expect(humanizeDomSourceSpans(parser.parse('
', 'TestComp'))).toEqual([
[html.Element, 'div', 0, '
', '', '
'],
diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts
index 5c795ed959..54005b28ba 100644
--- a/packages/compiler/test/ml_parser/lexer_spec.ts
+++ b/packages/compiler/test/ml_parser/lexer_spec.ts
@@ -549,25 +549,66 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
});
it('should parse interpolation', () => {
- expect(tokenizeAndHumanizeParts('{{ a }}b{{ c // comment }}')).toEqual([
- [lex.TokenType.TEXT, '{{ a }}b{{ c // comment }}'],
- [lex.TokenType.EOF],
+ expect(tokenizeAndHumanizeParts('{{ a }}b{{ c // comment }}d{{ e "}}" f }}g{{ h // " i }}'))
+ .toEqual([
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', ' a ', '}}'],
+ [lex.TokenType.TEXT, 'b'],
+ [lex.TokenType.INTERPOLATION, '{{', ' c // comment ', '}}'],
+ [lex.TokenType.TEXT, 'd'],
+ [lex.TokenType.INTERPOLATION, '{{', ' e "}}" f ', '}}'],
+ [lex.TokenType.TEXT, 'g'],
+ [lex.TokenType.INTERPOLATION, '{{', ' h // " i ', '}}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.EOF],
+ ]);
+
+ expect(tokenizeAndHumanizeSourceSpans('{{ a }}b{{ c // comment }}')).toEqual([
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{ a }}'],
+ [lex.TokenType.TEXT, 'b'],
+ [lex.TokenType.INTERPOLATION, '{{ c // comment }}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.EOF, ''],
]);
});
it('should parse interpolation with custom markers', () => {
expect(tokenizeAndHumanizeParts('{% a %}', {interpolationConfig: {start: '{%', end: '%}'}}))
.toEqual([
- [lex.TokenType.TEXT, '{% a %}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{%', ' a ', '%}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EOF],
]);
});
- it('should handle CR & LF', () => {
+ it('should handle CR & LF in text', () => {
expect(tokenizeAndHumanizeParts('t\ne\rs\r\nt')).toEqual([
[lex.TokenType.TEXT, 't\ne\ns\nt'],
[lex.TokenType.EOF],
]);
+
+ expect(tokenizeAndHumanizeSourceSpans('t\ne\rs\r\nt')).toEqual([
+ [lex.TokenType.TEXT, 't\ne\rs\r\nt'],
+ [lex.TokenType.EOF, ''],
+ ]);
+ });
+
+ it('should handle CR & LF in interpolation', () => {
+ expect(tokenizeAndHumanizeParts('{{t\ne\rs\r\nt}}')).toEqual([
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', 't\ne\ns\nt', '}}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.EOF],
+ ]);
+
+ expect(tokenizeAndHumanizeSourceSpans('{{t\ne\rs\r\nt}}')).toEqual([
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{t\ne\rs\r\nt}}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.EOF, ''],
+ ]);
});
it('should parse entities', () => {
@@ -575,6 +616,11 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TEXT, 'a&b'],
[lex.TokenType.EOF],
]);
+
+ expect(tokenizeAndHumanizeSourceSpans('a&b')).toEqual([
+ [lex.TokenType.TEXT, 'a&b'],
+ [lex.TokenType.EOF, ''],
+ ]);
});
it('should parse text starting with "&"', () => {
@@ -593,7 +639,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should allow "<" in text nodes', () => {
expect(tokenizeAndHumanizeParts('{{ a < b ? c : d }}')).toEqual([
- [lex.TokenType.TEXT, '{{ a < b ? c : d }}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', ' a < b ? c : d ', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EOF],
]);
@@ -614,7 +662,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should break out of interpolation in text token on valid start tag', () => {
expect(tokenizeAndHumanizeParts('{{ a d }}')).toEqual([
- [lex.TokenType.TEXT, '{{ a '],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', ' a '],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.TAG_OPEN_START, '', 'b'],
[lex.TokenType.ATTR_NAME, '', '&&'],
[lex.TokenType.ATTR_NAME, '', 'c'],
@@ -626,7 +676,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should break out of interpolation in text token on valid comment', () => {
expect(tokenizeAndHumanizeParts('{{ a }}')).toEqual([
- [lex.TokenType.TEXT, '{{ a }'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', ' a }'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.COMMENT_START],
[lex.TokenType.RAW_TEXT, ''],
[lex.TokenType.COMMENT_END],
@@ -637,7 +689,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should break out of interpolation in text token on valid CDATA', () => {
expect(tokenizeAndHumanizeParts('{{ a }}')).toEqual([
- [lex.TokenType.TEXT, '{{ a }'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', ' a }'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.CDATA_START],
[lex.TokenType.RAW_TEXT, ''],
[lex.TokenType.CDATA_END],
@@ -653,13 +707,14 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
.toEqual([
[lex.TokenType.TAG_OPEN_START, '', 'code'],
[lex.TokenType.TAG_OPEN_END],
- [lex.TokenType.TEXT, '{{\'<={\'}}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', '\'<={\'', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.TAG_CLOSE, '', 'code'],
[lex.TokenType.EOF],
]);
});
-
it('should parse start tags quotes in place of an attribute name as text', () => {
expect(tokenizeAndHumanizeParts('')).toEqual([
[lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'],
@@ -703,18 +758,32 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should be able to escape {', () => {
expect(tokenizeAndHumanizeParts('{{ "{" }}')).toEqual([
- [lex.TokenType.TEXT, '{{ "{" }}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', ' "{" ', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EOF],
]);
});
it('should be able to escape {{', () => {
expect(tokenizeAndHumanizeParts('{{ "{{" }}')).toEqual([
- [lex.TokenType.TEXT, '{{ "{{" }}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', ' "{{" ', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EOF],
]);
});
+ it('should capture everything up to the end of file in the interpolation expression part if there are mismatched quotes',
+ () => {
+ expect(tokenizeAndHumanizeParts('{{ "{{a}}\' }}')).toEqual([
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.INTERPOLATION, '{{', ' "{{a}}\' }}'],
+ [lex.TokenType.TEXT, ''],
+ [lex.TokenType.EOF],
+ ]);
+ });
+
it('should treat expansion form as text when they are not parsed', () => {
expect(tokenizeAndHumanizeParts(
'{a, b, =4 {c}}', {tokenizeExpansionForms: false}))
@@ -976,7 +1045,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.RAW_TEXT, 'three'],
[lex.TokenType.EXPANSION_CASE_VALUE, '=4'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
- [lex.TokenType.TEXT, 'four {{a}}'],
+ [lex.TokenType.TEXT, 'four '],
+ [lex.TokenType.INTERPOLATION, '{{', 'a', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.EOF],
@@ -1033,7 +1104,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_CASE_VALUE, '=1'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
- [lex.TokenType.TEXT, 'One {{message}}'],
+ [lex.TokenType.TEXT, 'One '],
+ [lex.TokenType.INTERPOLATION, '{{', 'message', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.TEXT, '\n'],
@@ -1063,7 +1136,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_CASE_VALUE, '=1'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
- [lex.TokenType.TEXT, 'One {{message}}'],
+ [lex.TokenType.TEXT, 'One '],
+ [lex.TokenType.INTERPOLATION, '{{', 'message', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.TEXT, '\n'],
@@ -1144,7 +1219,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_CASE_VALUE, '=1'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
- [lex.TokenType.TEXT, 'One {{message}}'],
+ [lex.TokenType.TEXT, 'One '],
+ [lex.TokenType.INTERPOLATION, '{{', 'message', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.TEXT, '\n'],
@@ -1174,7 +1251,9 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_CASE_VALUE, '=1'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
- [lex.TokenType.TEXT, 'One {{message}}'],
+ [lex.TokenType.TEXT, 'One '],
+ [lex.TokenType.INTERPOLATION, '{{', 'message', '}}'],
+ [lex.TokenType.TEXT, ''],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.TEXT, '\n'],
@@ -1301,8 +1380,11 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TEXT, '\n \n \n'],
[lex.TokenType.EOF],
]);
- expect(tokenizeAndHumanizeParts('\\r \\r \\r', {escapedString: true})).toEqual([
- [lex.TokenType.TEXT, '\n \n \n'], // post processing converts `\r` to `\n`
+ expect(tokenizeAndHumanizeParts('\\r{{\\r}}\\r', {escapedString: true})).toEqual([
+ // post processing converts `\r` to `\n`
+ [lex.TokenType.TEXT, '\n'],
+ [lex.TokenType.INTERPOLATION, '{{', '\n', '}}'],
+ [lex.TokenType.TEXT, '\n'],
[lex.TokenType.EOF],
]);
expect(tokenizeAndHumanizeParts('\\v \\v \\v', {escapedString: true})).toEqual([