This reverts commit 942b24d5ea
.
PR Close #43033
This commit is contained in:
parent
ea5ed4e4d4
commit
8d8ab4775c
|
@ -23,7 +23,6 @@ export enum TokenType {
|
|||
ESCAPABLE_RAW_TEXT,
|
||||
RAW_TEXT,
|
||||
INTERPOLATION,
|
||||
ENCODED_ENTITY,
|
||||
COMMENT_START,
|
||||
COMMENT_END,
|
||||
CDATA_START,
|
||||
|
@ -396,16 +395,19 @@ class _Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
private _readChar(): string {
|
||||
// Don't rely upon reading directly from `_input` as the actual char value
|
||||
// may have been generated from an escape sequence.
|
||||
const char = String.fromCodePoint(this._cursor.peek());
|
||||
this._cursor.advance();
|
||||
return char;
|
||||
private _readChar(decodeEntities: boolean): string {
|
||||
if (decodeEntities && this._cursor.peek() === chars.$AMPERSAND) {
|
||||
return this._decodeEntity();
|
||||
} else {
|
||||
// Don't rely upon reading directly from `_input` as the actual char value
|
||||
// may have been generated from an escape sequence.
|
||||
const char = String.fromCodePoint(this._cursor.peek());
|
||||
this._cursor.advance();
|
||||
return char;
|
||||
}
|
||||
}
|
||||
|
||||
private _consumeEntity(textTokenType: TokenType): void {
|
||||
this._beginToken(TokenType.ENCODED_ENTITY);
|
||||
private _decodeEntity(): string {
|
||||
const start = this._cursor.clone();
|
||||
this._cursor.advance();
|
||||
if (this._attemptCharCode(chars.$HASH)) {
|
||||
|
@ -425,7 +427,7 @@ class _Tokenizer {
|
|||
this._cursor.advance();
|
||||
try {
|
||||
const charCode = parseInt(strNum, isHex ? 16 : 10);
|
||||
this._endToken([String.fromCharCode(charCode), this._cursor.getChars(start)]);
|
||||
return String.fromCharCode(charCode);
|
||||
} catch {
|
||||
throw this._createError(
|
||||
_unknownEntityErrorMsg(this._cursor.getChars(start)), this._cursor.getSpan());
|
||||
|
@ -434,25 +436,21 @@ class _Tokenizer {
|
|||
const nameStart = this._cursor.clone();
|
||||
this._attemptCharCodeUntilFn(isNamedEntityEnd);
|
||||
if (this._cursor.peek() != chars.$SEMICOLON) {
|
||||
// No semicolon was found so abort the encoded entity token that was in progress, and treat
|
||||
// this as a text token
|
||||
this._beginToken(textTokenType, start);
|
||||
this._cursor = nameStart;
|
||||
this._endToken(['&']);
|
||||
} else {
|
||||
const name = this._cursor.getChars(nameStart);
|
||||
this._cursor.advance();
|
||||
const char = NAMED_ENTITIES[name];
|
||||
if (!char) {
|
||||
throw this._createError(_unknownEntityErrorMsg(name), this._cursor.getSpan(start));
|
||||
}
|
||||
this._endToken([char, `&${name};`]);
|
||||
return '&';
|
||||
}
|
||||
const name = this._cursor.getChars(nameStart);
|
||||
this._cursor.advance();
|
||||
const char = NAMED_ENTITIES[name];
|
||||
if (!char) {
|
||||
throw this._createError(_unknownEntityErrorMsg(name), this._cursor.getSpan(start));
|
||||
}
|
||||
return char;
|
||||
}
|
||||
}
|
||||
|
||||
private _consumeRawText(consumeEntities: boolean, endMarkerPredicate: () => boolean): void {
|
||||
this._beginToken(consumeEntities ? TokenType.ESCAPABLE_RAW_TEXT : TokenType.RAW_TEXT);
|
||||
private _consumeRawText(decodeEntities: boolean, endMarkerPredicate: () => boolean): Token {
|
||||
this._beginToken(decodeEntities ? TokenType.ESCAPABLE_RAW_TEXT : TokenType.RAW_TEXT);
|
||||
const parts: string[] = [];
|
||||
while (true) {
|
||||
const tagCloseStart = this._cursor.clone();
|
||||
|
@ -461,16 +459,9 @@ class _Tokenizer {
|
|||
if (foundEndMarker) {
|
||||
break;
|
||||
}
|
||||
if (consumeEntities && this._cursor.peek() === chars.$AMPERSAND) {
|
||||
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
||||
parts.length = 0;
|
||||
this._consumeEntity(TokenType.ESCAPABLE_RAW_TEXT);
|
||||
this._beginToken(TokenType.ESCAPABLE_RAW_TEXT);
|
||||
} else {
|
||||
parts.push(this._readChar());
|
||||
}
|
||||
parts.push(this._readChar(decodeEntities));
|
||||
}
|
||||
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
||||
return this._endToken([this._processCarriageReturns(parts.join(''))]);
|
||||
}
|
||||
|
||||
private _consumeComment(start: CharacterCursor) {
|
||||
|
@ -572,8 +563,8 @@ class _Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
private _consumeRawTextWithTagClose(prefix: string, tagName: string, consumeEntities: boolean) {
|
||||
this._consumeRawText(consumeEntities, () => {
|
||||
private _consumeRawTextWithTagClose(prefix: string, tagName: string, decodeEntities: boolean) {
|
||||
this._consumeRawText(decodeEntities, () => {
|
||||
if (!this._attemptCharCode(chars.$LT)) return false;
|
||||
if (!this._attemptCharCode(chars.$SLASH)) return false;
|
||||
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||
|
@ -721,16 +712,11 @@ class _Tokenizer {
|
|||
const current = this._cursor.clone();
|
||||
if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
|
||||
this._endToken([this._processCarriageReturns(parts.join(''))], current);
|
||||
parts.length = 0;
|
||||
this._consumeInterpolation(interpolationTokenType, current);
|
||||
this._beginToken(textTokenType);
|
||||
} else if (this._cursor.peek() === chars.$AMPERSAND) {
|
||||
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
||||
parts.length = 0;
|
||||
this._consumeEntity(textTokenType);
|
||||
this._beginToken(textTokenType);
|
||||
} else {
|
||||
parts.push(this._readChar());
|
||||
parts.push(this._readChar(true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -909,9 +895,7 @@ function mergeTextTokens(srcTokens: Token[]): Token[] {
|
|||
let lastDstToken: Token|undefined = undefined;
|
||||
for (let i = 0; i < srcTokens.length; i++) {
|
||||
const token = srcTokens[i];
|
||||
if ((lastDstToken && lastDstToken.type == TokenType.TEXT && token.type == TokenType.TEXT) ||
|
||||
(lastDstToken && lastDstToken.type == TokenType.ATTR_VALUE_TEXT &&
|
||||
token.type == TokenType.ATTR_VALUE_TEXT)) {
|
||||
if (lastDstToken && lastDstToken.type == TokenType.TEXT && token.type == TokenType.TEXT) {
|
||||
lastDstToken.parts[0]! += token.parts[0];
|
||||
lastDstToken.sourceSpan.end = token.sourceSpan.end;
|
||||
} else {
|
||||
|
|
|
@ -226,21 +226,20 @@ class _TreeBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
// For now recombine text, interpolation and entity tokens
|
||||
while (this._peek.type === lex.TokenType.INTERPOLATION ||
|
||||
this._peek.type === lex.TokenType.TEXT ||
|
||||
this._peek.type === lex.TokenType.ENCODED_ENTITY) {
|
||||
token = this._advance();
|
||||
if (token.type === lex.TokenType.INTERPOLATION) {
|
||||
// For backward compatibility we decode HTML entities that appear in interpolation
|
||||
// expressions. This is arguably a bug, but it could be a considerable breaking change to
|
||||
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
||||
// chain after View Engine has been removed.
|
||||
text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
||||
} else if (token.type === lex.TokenType.ENCODED_ENTITY) {
|
||||
text += token.parts[0];
|
||||
} else {
|
||||
text += token.parts.join('');
|
||||
// For now recombine text and interpolation tokens
|
||||
if (this._peek.type === lex.TokenType.INTERPOLATION) {
|
||||
while (this._peek.type === lex.TokenType.INTERPOLATION ||
|
||||
this._peek.type === lex.TokenType.TEXT) {
|
||||
token = this._advance();
|
||||
if (token.type === lex.TokenType.INTERPOLATION) {
|
||||
// For backward compatibility we decode HTML entities that appear in interpolation
|
||||
// expressions. This is arguably a bug, but it could be a considerable breaking change to
|
||||
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
||||
// chain after View Engine has been removed.
|
||||
text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
||||
} else {
|
||||
text += token.parts.join('');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -370,17 +369,16 @@ class _TreeBuilder {
|
|||
this._advance();
|
||||
}
|
||||
|
||||
// Consume the attribute value
|
||||
// Consume the value
|
||||
let value = '';
|
||||
let valueStartSpan: ParseSourceSpan|undefined = undefined;
|
||||
let valueEnd: ParseLocation|undefined = undefined;
|
||||
if (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT) {
|
||||
valueStartSpan = this._peek.sourceSpan;
|
||||
valueEnd = this._peek.sourceSpan.end;
|
||||
// For now recombine text, interpolation and entity tokens
|
||||
// For now we are recombining text and interpolation tokens
|
||||
while (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT ||
|
||||
this._peek.type === lex.TokenType.ATTR_VALUE_INTERPOLATION ||
|
||||
this._peek.type === lex.TokenType.ENCODED_ENTITY) {
|
||||
this._peek.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) {
|
||||
let valueToken = this._advance();
|
||||
if (valueToken.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) {
|
||||
// For backward compatibility we decode HTML entities that appear in interpolation
|
||||
|
@ -388,8 +386,6 @@ class _TreeBuilder {
|
|||
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
||||
// chain after View Engine has been removed.
|
||||
value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
||||
} else if (valueToken.type === lex.TokenType.ENCODED_ENTITY) {
|
||||
value += valueToken.parts[0];
|
||||
} else {
|
||||
value += valueToken.parts.join('');
|
||||
}
|
||||
|
|
|
@ -407,11 +407,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
|||
[lex.TokenType.TAG_OPEN_START, '', 't'],
|
||||
[lex.TokenType.ATTR_NAME, '', 'a'],
|
||||
[lex.TokenType.ATTR_QUOTE, '"'],
|
||||
[lex.TokenType.ATTR_VALUE_TEXT, ''],
|
||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
||||
[lex.TokenType.ATTR_VALUE_TEXT, ''],
|
||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
||||
[lex.TokenType.ATTR_VALUE_TEXT, ''],
|
||||
[lex.TokenType.ATTR_VALUE_TEXT, 'AA'],
|
||||
[lex.TokenType.ATTR_QUOTE, '"'],
|
||||
[lex.TokenType.TAG_OPEN_END],
|
||||
[lex.TokenType.EOF],
|
||||
|
@ -526,60 +522,50 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
|||
describe('entities', () => {
|
||||
it('should parse named entities', () => {
|
||||
expect(tokenizeAndHumanizeParts('a&b')).toEqual([
|
||||
[lex.TokenType.TEXT, 'a'],
|
||||
[lex.TokenType.ENCODED_ENTITY, '&', '&'],
|
||||
[lex.TokenType.TEXT, 'b'],
|
||||
[lex.TokenType.TEXT, 'a&b'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should parse hexadecimal entities', () => {
|
||||
expect(tokenizeAndHumanizeParts('AA')).toEqual([
|
||||
[lex.TokenType.TEXT, ''],
|
||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
||||
[lex.TokenType.TEXT, ''],
|
||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
||||
[lex.TokenType.TEXT, ''],
|
||||
[lex.TokenType.TEXT, 'AA'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should parse decimal entities', () => {
|
||||
expect(tokenizeAndHumanizeParts('A')).toEqual([
|
||||
[lex.TokenType.TEXT, ''],
|
||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
||||
[lex.TokenType.TEXT, ''],
|
||||
[lex.TokenType.TEXT, 'A'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should store the locations', () => {
|
||||
expect(tokenizeAndHumanizeSourceSpans('a&b')).toEqual([
|
||||
[lex.TokenType.TEXT, 'a'],
|
||||
[lex.TokenType.ENCODED_ENTITY, '&'],
|
||||
[lex.TokenType.TEXT, 'b'],
|
||||
[lex.TokenType.TEXT, 'a&b'],
|
||||
[lex.TokenType.EOF, ''],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should report malformed/unknown entities', () => {
|
||||
expect(tokenizeAndHumanizeErrors('&tbo;')).toEqual([[
|
||||
lex.TokenType.ENCODED_ENTITY,
|
||||
lex.TokenType.TEXT,
|
||||
'Unknown entity "tbo" - use the "&#<decimal>;" or "&#x<hex>;" syntax', '0:0'
|
||||
]]);
|
||||
expect(tokenizeAndHumanizeErrors('sdf;')).toEqual([[
|
||||
lex.TokenType.ENCODED_ENTITY,
|
||||
lex.TokenType.TEXT,
|
||||
'Unable to parse entity "s" - decimal character reference entities must end with ";"',
|
||||
'0:4'
|
||||
]]);
|
||||
expect(tokenizeAndHumanizeErrors('
sdf;')).toEqual([[
|
||||
lex.TokenType.ENCODED_ENTITY,
|
||||
lex.TokenType.TEXT,
|
||||
'Unable to parse entity "
s" - hexadecimal character reference entities must end with ";"',
|
||||
'0:5'
|
||||
]]);
|
||||
|
||||
expect(tokenizeAndHumanizeErrors('઼')).toEqual([
|
||||
[lex.TokenType.ENCODED_ENTITY, 'Unexpected character "EOF"', '0:6']
|
||||
[lex.TokenType.TEXT, 'Unexpected character "EOF"', '0:6']
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
@ -657,16 +643,12 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
|||
|
||||
it('should parse entities', () => {
|
||||
expect(tokenizeAndHumanizeParts('a&b')).toEqual([
|
||||
[lex.TokenType.TEXT, 'a'],
|
||||
[lex.TokenType.ENCODED_ENTITY, '&', '&'],
|
||||
[lex.TokenType.TEXT, 'b'],
|
||||
[lex.TokenType.TEXT, 'a&b'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
|
||||
expect(tokenizeAndHumanizeSourceSpans('a&b')).toEqual([
|
||||
[lex.TokenType.TEXT, 'a'],
|
||||
[lex.TokenType.ENCODED_ENTITY, '&'],
|
||||
[lex.TokenType.TEXT, 'b'],
|
||||
[lex.TokenType.TEXT, 'a&b'],
|
||||
[lex.TokenType.EOF, ''],
|
||||
]);
|
||||
});
|
||||
|
@ -912,9 +894,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
|||
expect(tokenizeAndHumanizeParts(`<title>&</title>`)).toEqual([
|
||||
[lex.TokenType.TAG_OPEN_START, '', 'title'],
|
||||
[lex.TokenType.TAG_OPEN_END],
|
||||
[lex.TokenType.ESCAPABLE_RAW_TEXT, ''],
|
||||
[lex.TokenType.ENCODED_ENTITY, '&', '&'],
|
||||
[lex.TokenType.ESCAPABLE_RAW_TEXT, ''],
|
||||
[lex.TokenType.ESCAPABLE_RAW_TEXT, '&'],
|
||||
[lex.TokenType.TAG_CLOSE, '', 'title'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
|
|
Loading…
Reference in New Issue