Revert "refactor(compiler): support interpolation tokens when lexing attribute values (#42062)" (#43033)

This reverts commit c516e252fc.

PR Close #43033
This commit is contained in:
atscott 2021-08-03 14:49:09 -07:00
parent 8d8ab4775c
commit 77731b8fe8
4 changed files with 68 additions and 143 deletions

View File

@ -29,8 +29,7 @@ export enum TokenType {
CDATA_END,
ATTR_NAME,
ATTR_QUOTE,
ATTR_VALUE_TEXT,
ATTR_VALUE_INTERPOLATION,
ATTR_VALUE,
DOC_TYPE,
EXPANSION_FORM_START,
EXPANSION_CASE_VALUE,
@ -229,8 +228,7 @@ class _Tokenizer {
this._consumeTagOpen(start);
}
} else if (!(this._tokenizeIcu && this._tokenizeExpansionForm())) {
this._consumeWithInterpolation(
TokenType.TEXT, TokenType.INTERPOLATION, () => this._isTextEnd());
this._consumeText();
}
} catch (e) {
this.handleError(e);
@ -597,25 +595,29 @@ class _Tokenizer {
private _consumeAttributeValue() {
let value: string;
if (this._cursor.peek() === chars.$SQ || this._cursor.peek() === chars.$DQ) {
this._beginToken(TokenType.ATTR_QUOTE);
const quoteChar = this._cursor.peek();
this._consumeQuote(quoteChar);
this._consumeWithInterpolation(
TokenType.ATTR_VALUE_TEXT, TokenType.ATTR_VALUE_INTERPOLATION,
() => this._cursor.peek() === quoteChar);
this._consumeQuote(quoteChar);
this._cursor.advance();
this._endToken([String.fromCodePoint(quoteChar)]);
this._beginToken(TokenType.ATTR_VALUE);
const parts: string[] = [];
while (this._cursor.peek() !== quoteChar) {
parts.push(this._readChar(true));
}
value = parts.join('');
this._endToken([this._processCarriageReturns(value)]);
this._beginToken(TokenType.ATTR_QUOTE);
this._cursor.advance();
this._endToken([String.fromCodePoint(quoteChar)]);
} else {
const endPredicate = () => isNameEnd(this._cursor.peek());
this._consumeWithInterpolation(
TokenType.ATTR_VALUE_TEXT, TokenType.ATTR_VALUE_INTERPOLATION, endPredicate);
this._beginToken(TokenType.ATTR_VALUE);
const valueStart = this._cursor.clone();
this._requireCharCodeUntilFn(isNameEnd, 1);
value = this._cursor.getChars(valueStart);
this._endToken([this._processCarriageReturns(value)]);
}
}
private _consumeQuote(quoteChar: number) {
this._beginToken(TokenType.ATTR_QUOTE);
this._requireCharCode(quoteChar);
this._endToken([String.fromCodePoint(quoteChar)]);
}
private _consumeTagOpenEnd() {
const tokenType =
this._attemptCharCode(chars.$SLASH) ? TokenType.TAG_OPEN_END_VOID : TokenType.TAG_OPEN_END;
@ -694,31 +696,21 @@ class _Tokenizer {
this._expansionCaseStack.pop();
}
/**
* Consume a string that may contain interpolation expressions.
* The first token consumed will be of `tokenType` and then there will be alternating
* `interpolationTokenType` and `tokenType` tokens until the `endPredicate()` returns true.
*
* @param textTokenType the kind of tokens to interleave around interpolation tokens.
* @param interpolationTokenType the kind of tokens that contain interpolation.
* @param endPredicate a function that should return true when we should stop consuming.
*/
private _consumeWithInterpolation(
textTokenType: TokenType, interpolationTokenType: TokenType, endPredicate: () => boolean) {
this._beginToken(textTokenType);
private _consumeText() {
this._beginToken(TokenType.TEXT);
const parts: string[] = [];
while (!endPredicate()) {
do {
const current = this._cursor.clone();
if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
this._endToken([this._processCarriageReturns(parts.join(''))], current);
this._consumeInterpolation(interpolationTokenType, current);
this._consumeInterpolation(current);
parts.length = 0;
this._beginToken(textTokenType);
this._beginToken(TokenType.TEXT);
} else {
parts.push(this._readChar(true));
}
}
} while (!this._isTextEnd());
// It is possible that an interpolation was started but not ended inside this text token.
// Make sure that we reset the state of the lexer correctly.
@ -727,15 +719,14 @@ class _Tokenizer {
this._endToken([this._processCarriageReturns(parts.join(''))]);
}
private _consumeInterpolation(
interpolationTokenType: TokenType, interpolationStart: CharacterCursor) {
private _consumeInterpolation(interpolationStart: CharacterCursor) {
const parts: string[] = [];
this._beginToken(interpolationTokenType, interpolationStart);
this._beginToken(TokenType.INTERPOLATION, interpolationStart);
parts.push(this._interpolationConfig.start);
// Find the end of the interpolation, ignoring content inside quotes.
const expressionStart = this._cursor.clone();
let inQuote: number|null = null;
let inQuote: string|null = null;
let inComment = false;
while (this._cursor.peek() !== chars.$EOF) {
const current = this._cursor.clone();
@ -761,15 +752,14 @@ class _Tokenizer {
}
}
const char = this._cursor.peek();
this._cursor.advance();
if (char === chars.$BACKSLASH) {
const char = this._readChar(true);
if (char === '\\') {
// Skip the next character because it was escaped.
this._cursor.advance();
this._readChar(true);
} else if (char === inQuote) {
// Exiting the current quoted string
inQuote = null;
} else if (!inComment && chars.isQuote(char)) {
} else if (!inComment && /['"`]/.test(char)) {
// Entering a new quoted string
inQuote = char;
}

View File

@ -6,7 +6,7 @@
* found in the LICENSE file at https://angular.io/license
*/
import {ParseError, ParseLocation, ParseSourceSpan} from '../parse_util';
import {ParseError, ParseSourceSpan} from '../parse_util';
import * as html from './ast';
import {NAMED_ENTITIES} from './entities';
@ -362,49 +362,27 @@ class _TreeBuilder {
private _consumeAttr(attrName: lex.Token): html.Attribute {
const fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]);
let attrEnd = attrName.sourceSpan.end;
// Consume any quote
let end = attrName.sourceSpan.end;
let value = '';
let valueSpan: ParseSourceSpan = undefined!;
if (this._peek.type === lex.TokenType.ATTR_QUOTE) {
this._advance();
}
// Consume the value
let value = '';
let valueStartSpan: ParseSourceSpan|undefined = undefined;
let valueEnd: ParseLocation|undefined = undefined;
if (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT) {
valueStartSpan = this._peek.sourceSpan;
valueEnd = this._peek.sourceSpan.end;
// For now we are recombining text and interpolation tokens
while (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT ||
this._peek.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) {
let valueToken = this._advance();
if (valueToken.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) {
// For backward compatibility we decode HTML entities that appear in interpolation
// expressions. This is arguably a bug, but it could be a considerable breaking change to
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
// chain after View Engine has been removed.
value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity);
} else {
value += valueToken.parts.join('');
}
valueEnd = attrEnd = valueToken.sourceSpan.end;
}
if (this._peek.type === lex.TokenType.ATTR_VALUE) {
const valueToken = this._advance();
value = valueToken.parts[0];
end = valueToken.sourceSpan.end;
valueSpan = valueToken.sourceSpan;
}
// Consume any quote
if (this._peek.type === lex.TokenType.ATTR_QUOTE) {
const quoteToken = this._advance();
attrEnd = quoteToken.sourceSpan.end;
end = quoteToken.sourceSpan.end;
}
const valueSpan = valueStartSpan && valueEnd &&
new ParseSourceSpan(valueStartSpan.start, valueEnd, valueStartSpan.fullStart);
const keySpan = new ParseSourceSpan(attrName.sourceSpan.start, attrName.sourceSpan.end);
return new html.Attribute(
fullName, value,
new ParseSourceSpan(attrName.sourceSpan.start, attrEnd, attrName.sourceSpan.fullStart),
attrName.sourceSpan, valueSpan);
new ParseSourceSpan(attrName.sourceSpan.start, end, attrName.sourceSpan.fullStart), keySpan,
valueSpan);
}
private _getParentElement(): html.Element|null {

View File

@ -250,19 +250,6 @@ import {humanizeDom, humanizeDomSourceSpans, humanizeLineColumn, humanizeNodes}
]);
});
it('should decode HTML entities in interpolated attributes', () => {
// Note that the detail of decoding corner-cases is tested in the
// "should decode HTML entities in interpolations" spec.
expect(humanizeDomSourceSpans(parser.parse('<div foo="{{&amp;}}"></div>', 'TestComp')))
.toEqual([
[
html.Element, 'div', 0, '<div foo="{{&amp;}}"></div>', '<div foo="{{&amp;}}">',
'</div>'
],
[html.Attribute, 'foo', '{{&}}', 'foo="{{&amp;}}"']
]);
});
it('should normalize line endings within attribute values', () => {
const result =
parser.parse('<div key=" \r\n line 1 \r\n line 2 "></div>', 'TestComp');

View File

@ -257,7 +257,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.INCOMPLETE_TAG_OPEN, '<div'],
[lex.TokenType.ATTR_NAME, 'class'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 'hi'],
[lex.TokenType.ATTR_VALUE, 'hi'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_NAME, 'sty'],
[lex.TokenType.TAG_OPEN_START, '<span'],
@ -295,21 +295,15 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, ''],
[lex.TokenType.ATTR_VALUE_INTERPOLATION, '{{', 'v', '}}'],
[lex.TokenType.ATTR_VALUE_TEXT, ''],
[lex.TokenType.ATTR_VALUE, '{{v}}'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_NAME, '', 'b'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 's'],
[lex.TokenType.ATTR_VALUE_INTERPOLATION, '{{', 'm', '}}'],
[lex.TokenType.ATTR_VALUE_TEXT, 'e'],
[lex.TokenType.ATTR_VALUE, 's{{m}}e'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_NAME, '', 'c'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 's'],
[lex.TokenType.ATTR_VALUE_INTERPOLATION, '{{', 'm//c', '}}'],
[lex.TokenType.ATTR_VALUE_TEXT, 'e'],
[lex.TokenType.ATTR_VALUE, 's{{m//c}}e'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
@ -339,7 +333,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '\''],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.ATTR_QUOTE, '\''],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
@ -351,7 +345,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
@ -362,31 +356,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
expect(tokenizeAndHumanizeParts('<t a=b>')).toEqual([
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
]);
});
it('should parse attributes with unquoted interpolation value', () => {
expect(tokenizeAndHumanizeParts('<a a={{link.text}}>')).toEqual([
[lex.TokenType.TAG_OPEN_START, '', 'a'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_VALUE_TEXT, ''],
[lex.TokenType.ATTR_VALUE_INTERPOLATION, '{{', 'link.text', '}}'],
[lex.TokenType.ATTR_VALUE_TEXT, ''],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
]);
});
it('should parse attributes with empty quoted value', () => {
expect(tokenizeAndHumanizeParts('<t a="">')).toEqual([
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, ''],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
]);
@ -396,7 +366,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
expect(tokenizeAndHumanizeParts('<t a = b >')).toEqual([
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
]);
@ -407,7 +377,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 'AA'],
[lex.TokenType.ATTR_VALUE, 'AA'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
@ -419,11 +389,11 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, '&amp'],
[lex.TokenType.ATTR_VALUE, '&amp'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_NAME, '', 'b'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 'c&&d'],
[lex.TokenType.ATTR_VALUE, 'c&&d'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
@ -435,7 +405,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 'b && c &'],
[lex.TokenType.ATTR_VALUE, 'b && c &'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
@ -447,7 +417,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '\''],
[lex.TokenType.ATTR_VALUE_TEXT, 't\ne\ns\nt'],
[lex.TokenType.ATTR_VALUE, 't\ne\ns\nt'],
[lex.TokenType.ATTR_QUOTE, '\''],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
@ -458,7 +428,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
expect(tokenizeAndHumanizeSourceSpans('<t a=b>')).toEqual([
[lex.TokenType.TAG_OPEN_START, '<t'],
[lex.TokenType.ATTR_NAME, 'a'],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.TAG_OPEN_END, '>'],
[lex.TokenType.EOF, ''],
]);
@ -466,13 +436,13 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
it('should report missing closing single quote', () => {
expect(tokenizeAndHumanizeErrors('<t a=\'b>')).toEqual([
[lex.TokenType.ATTR_VALUE_TEXT, 'Unexpected character "EOF"', '0:8'],
[lex.TokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:8'],
]);
});
it('should report missing closing double quote', () => {
expect(tokenizeAndHumanizeErrors('<t a="b>')).toEqual([
[lex.TokenType.ATTR_VALUE_TEXT, 'Unexpected character "EOF"', '0:8'],
[lex.TokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:8'],
]);
});
});
@ -765,7 +735,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.ATTR_QUOTE, '"'],
// TODO(ayazhafiz): the " symbol should be a synthetic attribute,
// allowing us to complete the opening tag correctly.
@ -777,7 +747,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.INCOMPLETE_TAG_OPEN, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '\''],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.ATTR_QUOTE, '\''],
// TODO(ayazhafiz): the ' symbol should be a synthetic attribute,
// allowing us to complete the opening tag correctly.
@ -1568,11 +1538,11 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_NAME, '', 'c'],
[lex.TokenType.ATTR_QUOTE, '\''],
[lex.TokenType.ATTR_VALUE_TEXT, 'd'],
[lex.TokenType.ATTR_VALUE, 'd'],
[lex.TokenType.ATTR_QUOTE, '\''],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.EOF],
@ -1621,7 +1591,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'd'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.ATTR_VALUE_TEXT, 'e'],
[lex.TokenType.ATTR_VALUE, 'e'],
[lex.TokenType.ATTR_QUOTE, '"'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.TAG_CLOSE, '', 't'],
@ -1634,7 +1604,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
expect(tokenizeAndHumanizeParts(text, {escapedString: true})).toEqual([
[lex.TokenType.TAG_OPEN_START, '', 't'],
[lex.TokenType.ATTR_NAME, '', 'a'],
[lex.TokenType.ATTR_VALUE_TEXT, 'b'],
[lex.TokenType.ATTR_VALUE, 'b'],
[lex.TokenType.TAG_OPEN_END],
[lex.TokenType.TAG_CLOSE, '', 't'],
[lex.TokenType.EOF],