Revert "refactor(compiler): support interpolation tokens when lexing markup (#42062)" (#43033)

This reverts commit c8a46bfdcd.

PR Close #43033
This commit is contained in:
atscott 2021-08-03 14:49:17 -07:00
parent 443ece4587
commit dda75ca1d0
4 changed files with 31 additions and 233 deletions

View File

@ -22,7 +22,6 @@ export enum TokenType {
TEXT,
ESCAPABLE_RAW_TEXT,
RAW_TEXT,
INTERPOLATION,
COMMENT_START,
COMMENT_END,
CDATA_START,
@ -286,7 +285,7 @@ class _Tokenizer {
}
const token = new Token(
this._currentTokenType, parts,
(end ?? this._cursor).getSpan(this._currentTokenStart, this._leadingTriviaCodePoints));
this._cursor.getSpan(this._currentTokenStart, this._leadingTriviaCodePoints));
this.tokens.push(token);
this._currentTokenStart = null;
this._currentTokenType = null;
@ -697,16 +696,19 @@ class _Tokenizer {
}
private _consumeText() {
this._beginToken(TokenType.TEXT);
const start = this._cursor.clone();
this._beginToken(TokenType.TEXT, start);
const parts: string[] = [];
do {
const current = this._cursor.clone();
if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
this._endToken([this._processCarriageReturns(parts.join(''))], current);
this._consumeInterpolation(current);
parts.length = 0;
this._beginToken(TokenType.TEXT);
parts.push(this._interpolationConfig.start);
this._inInterpolation = true;
} else if (
this._interpolationConfig && this._inInterpolation &&
this._attemptStr(this._interpolationConfig.end)) {
parts.push(this._interpolationConfig.end);
this._inInterpolation = false;
} else {
parts.push(this._readChar(true));
}
@ -719,61 +721,6 @@ class _Tokenizer {
this._endToken([this._processCarriageReturns(parts.join(''))]);
}
private _consumeInterpolation(interpolationStart: CharacterCursor) {
const parts: string[] = [];
this._beginToken(TokenType.INTERPOLATION, interpolationStart);
parts.push(this._interpolationConfig.start);
// Find the end of the interpolation, ignoring content inside quotes.
const expressionStart = this._cursor.clone();
let inQuote: string|null = null;
let inComment = false;
while (this._cursor.peek() !== chars.$EOF) {
const current = this._cursor.clone();
if (this._isTagStart()) {
// We are starting what looks like an HTML element in the middle of this interpolation.
// Reset the cursor to before the `<` character and end the interpolation token.
// (This is actually wrong but here for backward compatibility).
this._cursor = current;
parts.push(this._getProcessedChars(expressionStart, current));
return this._endToken(parts);
}
if (inQuote === null) {
if (this._attemptStr(this._interpolationConfig.end)) {
// We are not in a string, and we hit the end interpolation marker
parts.push(this._getProcessedChars(expressionStart, current));
parts.push(this._interpolationConfig.end);
return this._endToken(parts);
} else if (this._attemptStr('//')) {
// Once we are in a comment we ignore any quotes
inComment = true;
}
}
const char = this._readChar(true);
if (char === '\\') {
// Skip the next character because it was escaped.
this._readChar(true);
} else if (char === inQuote) {
// Exiting the current quoted string
inQuote = null;
} else if (!inComment && /['"`]/.test(char)) {
// Entering a new quoted string
inQuote = char;
}
}
// We hit EOF without finding a closing interpolation marker
parts.push(this._getProcessedChars(expressionStart, this._cursor));
return this._endToken(parts);
}
private _getProcessedChars(start: CharacterCursor, end: CharacterCursor): string {
return this._processCarriageReturns(end.getChars(start))
}
private _isTextEnd(): boolean {
if (this._isTagStart() || this._cursor.peek() === chars.$EOF) {
return true;

View File

@ -9,7 +9,6 @@
import {ParseError, ParseSourceSpan} from '../parse_util';
import * as html from './ast';
import {NAMED_ENTITIES} from './entities';
import * as lex from './lexer';
import {getNsPrefix, mergeNsAndName, splitNsName, TagDefinition} from './tags';
@ -216,7 +215,6 @@ class _TreeBuilder {
}
private _consumeText(token: lex.Token) {
const startSpan = token.sourceSpan;
let text = token.parts[0];
if (text.length > 0 && text[0] == '\n') {
const parent = this._getParentElement();
@ -226,29 +224,8 @@ class _TreeBuilder {
}
}
// For now recombine text and interpolation tokens
if (this._peek.type === lex.TokenType.INTERPOLATION) {
while (this._peek.type === lex.TokenType.INTERPOLATION ||
this._peek.type === lex.TokenType.TEXT) {
token = this._advance();
if (token.type === lex.TokenType.INTERPOLATION) {
// For backward compatibility we decode HTML entities that appear in interpolation
// expressions. This is arguably a bug, but it could be a considerable breaking change to
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
// chain after View Engine has been removed.
text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity);
} else {
text += token.parts.join('');
}
}
}
if (text.length > 0) {
const endSpan = token.sourceSpan;
this._addToParent(new html.Text(
text,
new ParseSourceSpan(
startSpan.start, endSpan.end, startSpan.fullStart, startSpan.details)));
this._addToParent(new html.Text(text, token.sourceSpan));
}
}
@ -418,21 +395,3 @@ class _TreeBuilder {
function lastOnStack(stack: any[], element: any): boolean {
return stack.length > 0 && stack[stack.length - 1] === element;
}
/**
* Decode the `entity` string, which we believe is the contents of an HTML entity.
*
* If the string is not actually a valid/known entity then just return the original `match` string.
*/
function decodeEntity(match: string, entity: string): string {
if (NAMED_ENTITIES[entity] !== undefined) {
return NAMED_ENTITIES[entity] || match;
}
if (/^#x[a-f0-9]+$/i.test(entity)) {
return String.fromCodePoint(parseInt(entity.slice(2), 16));
}
if (/^#\d+$/.test(entity)) {
return String.fromCodePoint(parseInt(entity.slice(1), 10));
}
return match;
}

View File

@ -675,32 +675,6 @@ import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn, humanizeNodes}
expect(node.endSourceSpan!.end.offset).toEqual(12);
});
// This checks backward compatibility with a previous version of the lexer, which would
// treat interpolation expressions as regular HTML escapable text.
it('should decode HTML entities in interpolations', () => {
expect(humanizeDomSourceSpans(parser.parse(
'{{&amp;}}' +
'{{&#x25BE;}}' +
'{{&#9662;}}' +
'{{&amp (no semi-colon)}}' +
'{{&#25BE; (invalid decimal)}}',
'TestComp')))
.toEqual([[
html.Text,
'{{&}}' +
'{{\u25BE}}' +
'{{\u25BE}}' +
'{{&amp (no semi-colon)}}' +
'{{&#25BE; (invalid decimal)}}',
0,
'{{&amp;}}' +
'{{&#x25BE;}}' +
'{{&#9662;}}' +
'{{&amp (no semi-colon)}}' +
'{{&#25BE; (invalid decimal)}}',
]]);
});
it('should not set the end source span for void elements', () => {
expect(humanizeDomSourceSpans(parser.parse('<div><br></div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0, '<div><br></div>', '<div>', '</div>'],

View File

@ -549,66 +549,25 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
});
it('should parse interpolation', () => {
expect(tokenizeAndHumanizeParts('{{ a }}b{{ c // comment }}d{{ e "}}" f }}g{{ h // " i }}'))
.toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', ' a ', '}}'],
[lex.TokenType.TEXT, 'b'],
[lex.TokenType.INTERPOLATION, '{{', ' c // comment ', '}}'],
[lex.TokenType.TEXT, 'd'],
[lex.TokenType.INTERPOLATION, '{{', ' e "}}" f ', '}}'],
[lex.TokenType.TEXT, 'g'],
[lex.TokenType.INTERPOLATION, '{{', ' h // " i ', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.EOF],
]);
expect(tokenizeAndHumanizeSourceSpans('{{ a }}b{{ c // comment }}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{ a }}'],
[lex.TokenType.TEXT, 'b'],
[lex.TokenType.INTERPOLATION, '{{ c // comment }}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.EOF, ''],
expect(tokenizeAndHumanizeParts('{{ a }}b{{ c // comment }}')).toEqual([
[lex.TokenType.TEXT, '{{ a }}b{{ c // comment }}'],
[lex.TokenType.EOF],
]);
});
it('should parse interpolation with custom markers', () => {
expect(tokenizeAndHumanizeParts('{% a %}', {interpolationConfig: {start: '{%', end: '%}'}}))
.toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{%', ' a ', '%}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, '{% a %}'],
[lex.TokenType.EOF],
]);
});
it('should handle CR & LF in text', () => {
it('should handle CR & LF', () => {
expect(tokenizeAndHumanizeParts('t\ne\rs\r\nt')).toEqual([
[lex.TokenType.TEXT, 't\ne\ns\nt'],
[lex.TokenType.EOF],
]);
expect(tokenizeAndHumanizeSourceSpans('t\ne\rs\r\nt')).toEqual([
[lex.TokenType.TEXT, 't\ne\rs\r\nt'],
[lex.TokenType.EOF, ''],
]);
});
it('should handle CR & LF in interpolation', () => {
expect(tokenizeAndHumanizeParts('{{t\ne\rs\r\nt}}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', 't\ne\ns\nt', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.EOF],
]);
expect(tokenizeAndHumanizeSourceSpans('{{t\ne\rs\r\nt}}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{t\ne\rs\r\nt}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.EOF, ''],
]);
});
it('should parse entities', () => {
@ -616,11 +575,6 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TEXT, 'a&b'],
[lex.TokenType.EOF],
]);
expect(tokenizeAndHumanizeSourceSpans('a&amp;b')).toEqual([
[lex.TokenType.TEXT, 'a&amp;b'],
[lex.TokenType.EOF, ''],
]);
});
it('should parse text starting with "&"', () => {
@ -639,9 +593,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should allow "<" in text nodes', () => {
expect(tokenizeAndHumanizeParts('{{ a < b ? c : d }}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', ' a < b ? c : d ', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, '{{ a < b ? c : d }}'],
[lex.TokenType.EOF],
]);
@ -662,9 +614,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should break out of interpolation in text token on valid start tag', () => {
expect(tokenizeAndHumanizeParts('{{ a <b && c > d }}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', ' a '],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, '{{ a '],
[lex.TokenType.TAG_OPEN_START, '', 'b'],
[lex.TokenType.ATTR_NAME, '', '&&'],
[lex.TokenType.ATTR_NAME, '', 'c'],
@ -676,9 +626,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should break out of interpolation in text token on valid comment', () => {
expect(tokenizeAndHumanizeParts('{{ a }<!---->}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', ' a }'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, '{{ a }'],
[lex.TokenType.COMMENT_START],
[lex.TokenType.RAW_TEXT, ''],
[lex.TokenType.COMMENT_END],
@ -689,9 +637,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should break out of interpolation in text token on valid CDATA', () => {
expect(tokenizeAndHumanizeParts('{{ a }<![CDATA[]]>}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', ' a }'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, '{{ a }'],
[lex.TokenType.CDATA_START],
[lex.TokenType.RAW_TEXT, ''],
[lex.TokenType.CDATA_END],
@ -707,14 +653,13 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
.toEqual([
[lex.TokenType.TAG_OPEN_START, '', 'code'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', '\'<={\'', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, '{{\'<={\'}}'],
[lex.TokenType.TAG_CLOSE, '', 'code'],
[lex.TokenType.EOF],
]);
});
it('should parse start tags quotes in place of an attribute name as text', () => {
expect(tokenizeAndHumanizeParts('<t ">')).toEqual([
[lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'],
@ -758,32 +703,18 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should be able to escape {', () => {
expect(tokenizeAndHumanizeParts('{{ "{" }}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', ' "{" ', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, '{{ "{" }}'],
[lex.TokenType.EOF],
]);
});
it('should be able to escape {{', () => {
expect(tokenizeAndHumanizeParts('{{ "{{" }}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', ' "{{" ', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, '{{ "{{" }}'],
[lex.TokenType.EOF],
]);
});
it('should capture everything up to the end of file in the interpolation expression part if there are mismatched quotes',
() => {
expect(tokenizeAndHumanizeParts('{{ "{{a}}\' }}')).toEqual([
[lex.TokenType.TEXT, ''],
[lex.TokenType.INTERPOLATION, '{{', ' "{{a}}\' }}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.EOF],
]);
});
it('should treat expansion form as text when they are not parsed', () => {
expect(tokenizeAndHumanizeParts(
'<span>{a, b, =4 {c}}</span>', {tokenizeExpansionForms: false}))
@ -1045,9 +976,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.RAW_TEXT, 'three'],
[lex.TokenType.EXPANSION_CASE_VALUE, '=4'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
[lex.TokenType.TEXT, 'four '],
[lex.TokenType.INTERPOLATION, '{{', 'a', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, 'four {{a}}'],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.EOF],
@ -1104,9 +1033,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_CASE_VALUE, '=1'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
[lex.TokenType.TEXT, 'One '],
[lex.TokenType.INTERPOLATION, '{{', 'message', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, 'One {{message}}'],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.TEXT, '\n'],
@ -1136,9 +1063,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_CASE_VALUE, '=1'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
[lex.TokenType.TEXT, 'One '],
[lex.TokenType.INTERPOLATION, '{{', 'message', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, 'One {{message}}'],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.TEXT, '\n'],
@ -1219,9 +1144,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_CASE_VALUE, '=1'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
[lex.TokenType.TEXT, 'One '],
[lex.TokenType.INTERPOLATION, '{{', 'message', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, 'One {{message}}'],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.TEXT, '\n'],
@ -1251,9 +1174,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_CASE_VALUE, '=1'],
[lex.TokenType.EXPANSION_CASE_EXP_START],
[lex.TokenType.TEXT, 'One '],
[lex.TokenType.INTERPOLATION, '{{', 'message', '}}'],
[lex.TokenType.TEXT, ''],
[lex.TokenType.TEXT, 'One {{message}}'],
[lex.TokenType.EXPANSION_CASE_EXP_END],
[lex.TokenType.EXPANSION_FORM_END],
[lex.TokenType.TEXT, '\n'],
@ -1380,11 +1301,8 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TEXT, '\n \n \n'],
[lex.TokenType.EOF],
]);
expect(tokenizeAndHumanizeParts('\\r{{\\r}}\\r', {escapedString: true})).toEqual([
// post processing converts `\r` to `\n`
[lex.TokenType.TEXT, '\n'],
[lex.TokenType.INTERPOLATION, '{{', '\n', '}}'],
[lex.TokenType.TEXT, '\n'],
expect(tokenizeAndHumanizeParts('\\r \\r \\r', {escapedString: true})).toEqual([
[lex.TokenType.TEXT, '\n \n \n'], // post processing converts `\r` to `\n`
[lex.TokenType.EOF],
]);
expect(tokenizeAndHumanizeParts('\\v \\v \\v', {escapedString: true})).toEqual([