From dda75ca1d06bf4949fd13eef182a279045570b39 Mon Sep 17 00:00:00 2001 From: atscott Date: Tue, 3 Aug 2021 14:49:17 -0700 Subject: [PATCH] Revert "refactor(compiler): support interpolation tokens when lexing markup (#42062)" (#43033) This reverts commit c8a46bfdcd5dac0044c4322a5b3967163056b339. PR Close #43033 --- packages/compiler/src/ml_parser/lexer.ts | 73 ++--------- packages/compiler/src/ml_parser/parser.ts | 43 +----- .../test/ml_parser/html_parser_spec.ts | 26 ---- .../compiler/test/ml_parser/lexer_spec.ts | 122 +++--------------- 4 files changed, 31 insertions(+), 233 deletions(-) diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts index d62a54f576..f0fb361232 100644 --- a/packages/compiler/src/ml_parser/lexer.ts +++ b/packages/compiler/src/ml_parser/lexer.ts @@ -22,7 +22,6 @@ export enum TokenType { TEXT, ESCAPABLE_RAW_TEXT, RAW_TEXT, - INTERPOLATION, COMMENT_START, COMMENT_END, CDATA_START, @@ -286,7 +285,7 @@ class _Tokenizer { } const token = new Token( this._currentTokenType, parts, - (end ?? this._cursor).getSpan(this._currentTokenStart, this._leadingTriviaCodePoints)); + this._cursor.getSpan(this._currentTokenStart, this._leadingTriviaCodePoints)); this.tokens.push(token); this._currentTokenStart = null; this._currentTokenType = null; @@ -697,16 +696,19 @@ class _Tokenizer { } private _consumeText() { - this._beginToken(TokenType.TEXT); + const start = this._cursor.clone(); + this._beginToken(TokenType.TEXT, start); const parts: string[] = []; do { - const current = this._cursor.clone(); if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) { - this._endToken([this._processCarriageReturns(parts.join(''))], current); - this._consumeInterpolation(current); - parts.length = 0; - this._beginToken(TokenType.TEXT); + parts.push(this._interpolationConfig.start); + this._inInterpolation = true; + } else if ( + this._interpolationConfig && this._inInterpolation && + this._attemptStr(this._interpolationConfig.end)) { + parts.push(this._interpolationConfig.end); + this._inInterpolation = false; } else { parts.push(this._readChar(true)); } @@ -719,61 +721,6 @@ class _Tokenizer { this._endToken([this._processCarriageReturns(parts.join(''))]); } - private _consumeInterpolation(interpolationStart: CharacterCursor) { - const parts: string[] = []; - this._beginToken(TokenType.INTERPOLATION, interpolationStart); - parts.push(this._interpolationConfig.start); - - // Find the end of the interpolation, ignoring content inside quotes. - const expressionStart = this._cursor.clone(); - let inQuote: string|null = null; - let inComment = false; - while (this._cursor.peek() !== chars.$EOF) { - const current = this._cursor.clone(); - - if (this._isTagStart()) { - // We are starting what looks like an HTML element in the middle of this interpolation. - // Reset the cursor to before the `<` character and end the interpolation token. - // (This is actually wrong but here for backward compatibility). - this._cursor = current; - parts.push(this._getProcessedChars(expressionStart, current)); - return this._endToken(parts); - } - - if (inQuote === null) { - if (this._attemptStr(this._interpolationConfig.end)) { - // We are not in a string, and we hit the end interpolation marker - parts.push(this._getProcessedChars(expressionStart, current)); - parts.push(this._interpolationConfig.end); - return this._endToken(parts); - } else if (this._attemptStr('//')) { - // Once we are in a comment we ignore any quotes - inComment = true; - } - } - - const char = this._readChar(true); - if (char === '\\') { - // Skip the next character because it was escaped. - this._readChar(true); - } else if (char === inQuote) { - // Exiting the current quoted string - inQuote = null; - } else if (!inComment && /['"`]/.test(char)) { - // Entering a new quoted string - inQuote = char; - } - } - - // We hit EOF without finding a closing interpolation marker - parts.push(this._getProcessedChars(expressionStart, this._cursor)); - return this._endToken(parts); - } - - private _getProcessedChars(start: CharacterCursor, end: CharacterCursor): string { - return this._processCarriageReturns(end.getChars(start)) - } - private _isTextEnd(): boolean { if (this._isTagStart() || this._cursor.peek() === chars.$EOF) { return true; diff --git a/packages/compiler/src/ml_parser/parser.ts b/packages/compiler/src/ml_parser/parser.ts index fd01357d43..24465f8e97 100644 --- a/packages/compiler/src/ml_parser/parser.ts +++ b/packages/compiler/src/ml_parser/parser.ts @@ -9,7 +9,6 @@ import {ParseError, ParseSourceSpan} from '../parse_util'; import * as html from './ast'; -import {NAMED_ENTITIES} from './entities'; import * as lex from './lexer'; import {getNsPrefix, mergeNsAndName, splitNsName, TagDefinition} from './tags'; @@ -216,7 +215,6 @@ class _TreeBuilder { } private _consumeText(token: lex.Token) { - const startSpan = token.sourceSpan; let text = token.parts[0]; if (text.length > 0 && text[0] == '\n') { const parent = this._getParentElement(); @@ -226,29 +224,8 @@ class _TreeBuilder { } } - // For now recombine text and interpolation tokens - if (this._peek.type === lex.TokenType.INTERPOLATION) { - while (this._peek.type === lex.TokenType.INTERPOLATION || - this._peek.type === lex.TokenType.TEXT) { - token = this._advance(); - if (token.type === lex.TokenType.INTERPOLATION) { - // For backward compatibility we decode HTML entities that appear in interpolation - // expressions. This is arguably a bug, but it could be a considerable breaking change to - // fix it. It should be addressed in a larger project to refactor the entire parser/lexer - // chain after View Engine has been removed. - text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity); - } else { - text += token.parts.join(''); - } - } - } - if (text.length > 0) { - const endSpan = token.sourceSpan; - this._addToParent(new html.Text( - text, - new ParseSourceSpan( - startSpan.start, endSpan.end, startSpan.fullStart, startSpan.details))); + this._addToParent(new html.Text(text, token.sourceSpan)); } } @@ -418,21 +395,3 @@ class _TreeBuilder { function lastOnStack(stack: any[], element: any): boolean { return stack.length > 0 && stack[stack.length - 1] === element; } - -/** - * Decode the `entity` string, which we believe is the contents of an HTML entity. - * - * If the string is not actually a valid/known entity then just return the original `match` string. - */ -function decodeEntity(match: string, entity: string): string { - if (NAMED_ENTITIES[entity] !== undefined) { - return NAMED_ENTITIES[entity] || match; - } - if (/^#x[a-f0-9]+$/i.test(entity)) { - return String.fromCodePoint(parseInt(entity.slice(2), 16)); - } - if (/^#\d+$/.test(entity)) { - return String.fromCodePoint(parseInt(entity.slice(1), 10)); - } - return match; -} diff --git a/packages/compiler/test/ml_parser/html_parser_spec.ts b/packages/compiler/test/ml_parser/html_parser_spec.ts index 279bca60d3..b971d9187a 100644 --- a/packages/compiler/test/ml_parser/html_parser_spec.ts +++ b/packages/compiler/test/ml_parser/html_parser_spec.ts @@ -675,32 +675,6 @@ import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn, humanizeNodes} expect(node.endSourceSpan!.end.offset).toEqual(12); }); - // This checks backward compatibility with a previous version of the lexer, which would - // treat interpolation expressions as regular HTML escapable text. - it('should decode HTML entities in interpolations', () => { - expect(humanizeDomSourceSpans(parser.parse( - '{{&}}' + - '{{▾}}' + - '{{▾}}' + - '{{& (no semi-colon)}}' + - '{{BE; (invalid decimal)}}', - 'TestComp'))) - .toEqual([[ - html.Text, - '{{&}}' + - '{{\u25BE}}' + - '{{\u25BE}}' + - '{{& (no semi-colon)}}' + - '{{BE; (invalid decimal)}}', - 0, - '{{&}}' + - '{{▾}}' + - '{{▾}}' + - '{{& (no semi-colon)}}' + - '{{BE; (invalid decimal)}}', - ]]); - }); - it('should not set the end source span for void elements', () => { expect(humanizeDomSourceSpans(parser.parse('

', 'TestComp'))).toEqual([ [html.Element, 'div', 0, '

', '
', '
'], diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts index 54005b28ba..5c795ed959 100644 --- a/packages/compiler/test/ml_parser/lexer_spec.ts +++ b/packages/compiler/test/ml_parser/lexer_spec.ts @@ -549,66 +549,25 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u }); it('should parse interpolation', () => { - expect(tokenizeAndHumanizeParts('{{ a }}b{{ c // comment }}d{{ e "}}" f }}g{{ h // " i }}')) - .toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', ' a ', '}}'], - [lex.TokenType.TEXT, 'b'], - [lex.TokenType.INTERPOLATION, '{{', ' c // comment ', '}}'], - [lex.TokenType.TEXT, 'd'], - [lex.TokenType.INTERPOLATION, '{{', ' e "}}" f ', '}}'], - [lex.TokenType.TEXT, 'g'], - [lex.TokenType.INTERPOLATION, '{{', ' h // " i ', '}}'], - [lex.TokenType.TEXT, ''], - [lex.TokenType.EOF], - ]); - - expect(tokenizeAndHumanizeSourceSpans('{{ a }}b{{ c // comment }}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{ a }}'], - [lex.TokenType.TEXT, 'b'], - [lex.TokenType.INTERPOLATION, '{{ c // comment }}'], - [lex.TokenType.TEXT, ''], - [lex.TokenType.EOF, ''], + expect(tokenizeAndHumanizeParts('{{ a }}b{{ c // comment }}')).toEqual([ + [lex.TokenType.TEXT, '{{ a }}b{{ c // comment }}'], + [lex.TokenType.EOF], ]); }); it('should parse interpolation with custom markers', () => { expect(tokenizeAndHumanizeParts('{% a %}', {interpolationConfig: {start: '{%', end: '%}'}})) .toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{%', ' a ', '%}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, '{% a %}'], [lex.TokenType.EOF], ]); }); - it('should handle CR & LF in text', () => { + it('should handle CR & LF', () => { expect(tokenizeAndHumanizeParts('t\ne\rs\r\nt')).toEqual([ [lex.TokenType.TEXT, 't\ne\ns\nt'], [lex.TokenType.EOF], ]); - - expect(tokenizeAndHumanizeSourceSpans('t\ne\rs\r\nt')).toEqual([ - [lex.TokenType.TEXT, 't\ne\rs\r\nt'], - [lex.TokenType.EOF, ''], - ]); - }); - - it('should handle CR & LF in interpolation', () => { - expect(tokenizeAndHumanizeParts('{{t\ne\rs\r\nt}}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', 't\ne\ns\nt', '}}'], - [lex.TokenType.TEXT, ''], - [lex.TokenType.EOF], - ]); - - expect(tokenizeAndHumanizeSourceSpans('{{t\ne\rs\r\nt}}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{t\ne\rs\r\nt}}'], - [lex.TokenType.TEXT, ''], - [lex.TokenType.EOF, ''], - ]); }); it('should parse entities', () => { @@ -616,11 +575,6 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.TEXT, 'a&b'], [lex.TokenType.EOF], ]); - - expect(tokenizeAndHumanizeSourceSpans('a&b')).toEqual([ - [lex.TokenType.TEXT, 'a&b'], - [lex.TokenType.EOF, ''], - ]); }); it('should parse text starting with "&"', () => { @@ -639,9 +593,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u it('should allow "<" in text nodes', () => { expect(tokenizeAndHumanizeParts('{{ a < b ? c : d }}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', ' a < b ? c : d ', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, '{{ a < b ? c : d }}'], [lex.TokenType.EOF], ]); @@ -662,9 +614,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u it('should break out of interpolation in text token on valid start tag', () => { expect(tokenizeAndHumanizeParts('{{ a d }}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', ' a '], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, '{{ a '], [lex.TokenType.TAG_OPEN_START, '', 'b'], [lex.TokenType.ATTR_NAME, '', '&&'], [lex.TokenType.ATTR_NAME, '', 'c'], @@ -676,9 +626,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u it('should break out of interpolation in text token on valid comment', () => { expect(tokenizeAndHumanizeParts('{{ a }}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', ' a }'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, '{{ a }'], [lex.TokenType.COMMENT_START], [lex.TokenType.RAW_TEXT, ''], [lex.TokenType.COMMENT_END], @@ -689,9 +637,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u it('should break out of interpolation in text token on valid CDATA', () => { expect(tokenizeAndHumanizeParts('{{ a }}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', ' a }'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, '{{ a }'], [lex.TokenType.CDATA_START], [lex.TokenType.RAW_TEXT, ''], [lex.TokenType.CDATA_END], @@ -707,14 +653,13 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u .toEqual([ [lex.TokenType.TAG_OPEN_START, '', 'code'], [lex.TokenType.TAG_OPEN_END], - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', '\'<={\'', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, '{{\'<={\'}}'], [lex.TokenType.TAG_CLOSE, '', 'code'], [lex.TokenType.EOF], ]); }); + it('should parse start tags quotes in place of an attribute name as text', () => { expect(tokenizeAndHumanizeParts('')).toEqual([ [lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'], @@ -758,32 +703,18 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u it('should be able to escape {', () => { expect(tokenizeAndHumanizeParts('{{ "{" }}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', ' "{" ', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, '{{ "{" }}'], [lex.TokenType.EOF], ]); }); it('should be able to escape {{', () => { expect(tokenizeAndHumanizeParts('{{ "{{" }}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', ' "{{" ', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, '{{ "{{" }}'], [lex.TokenType.EOF], ]); }); - it('should capture everything up to the end of file in the interpolation expression part if there are mismatched quotes', - () => { - expect(tokenizeAndHumanizeParts('{{ "{{a}}\' }}')).toEqual([ - [lex.TokenType.TEXT, ''], - [lex.TokenType.INTERPOLATION, '{{', ' "{{a}}\' }}'], - [lex.TokenType.TEXT, ''], - [lex.TokenType.EOF], - ]); - }); - it('should treat expansion form as text when they are not parsed', () => { expect(tokenizeAndHumanizeParts( '{a, b, =4 {c}}', {tokenizeExpansionForms: false})) @@ -1045,9 +976,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.RAW_TEXT, 'three'], [lex.TokenType.EXPANSION_CASE_VALUE, '=4'], [lex.TokenType.EXPANSION_CASE_EXP_START], - [lex.TokenType.TEXT, 'four '], - [lex.TokenType.INTERPOLATION, '{{', 'a', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, 'four {{a}}'], [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_FORM_END], [lex.TokenType.EOF], @@ -1104,9 +1033,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_CASE_VALUE, '=1'], [lex.TokenType.EXPANSION_CASE_EXP_START], - [lex.TokenType.TEXT, 'One '], - [lex.TokenType.INTERPOLATION, '{{', 'message', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, 'One {{message}}'], [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_FORM_END], [lex.TokenType.TEXT, '\n'], @@ -1136,9 +1063,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_CASE_VALUE, '=1'], [lex.TokenType.EXPANSION_CASE_EXP_START], - [lex.TokenType.TEXT, 'One '], - [lex.TokenType.INTERPOLATION, '{{', 'message', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, 'One {{message}}'], [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_FORM_END], [lex.TokenType.TEXT, '\n'], @@ -1219,9 +1144,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_CASE_VALUE, '=1'], [lex.TokenType.EXPANSION_CASE_EXP_START], - [lex.TokenType.TEXT, 'One '], - [lex.TokenType.INTERPOLATION, '{{', 'message', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, 'One {{message}}'], [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_FORM_END], [lex.TokenType.TEXT, '\n'], @@ -1251,9 +1174,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_CASE_VALUE, '=1'], [lex.TokenType.EXPANSION_CASE_EXP_START], - [lex.TokenType.TEXT, 'One '], - [lex.TokenType.INTERPOLATION, '{{', 'message', '}}'], - [lex.TokenType.TEXT, ''], + [lex.TokenType.TEXT, 'One {{message}}'], [lex.TokenType.EXPANSION_CASE_EXP_END], [lex.TokenType.EXPANSION_FORM_END], [lex.TokenType.TEXT, '\n'], @@ -1380,11 +1301,8 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u [lex.TokenType.TEXT, '\n \n \n'], [lex.TokenType.EOF], ]); - expect(tokenizeAndHumanizeParts('\\r{{\\r}}\\r', {escapedString: true})).toEqual([ - // post processing converts `\r` to `\n` - [lex.TokenType.TEXT, '\n'], - [lex.TokenType.INTERPOLATION, '{{', '\n', '}}'], - [lex.TokenType.TEXT, '\n'], + expect(tokenizeAndHumanizeParts('\\r \\r \\r', {escapedString: true})).toEqual([ + [lex.TokenType.TEXT, '\n \n \n'], // post processing converts `\r` to `\n` [lex.TokenType.EOF], ]); expect(tokenizeAndHumanizeParts('\\v \\v \\v', {escapedString: true})).toEqual([