This reverts commit 942b24d5ea
.
PR Close #43033
This commit is contained in:
parent
ea5ed4e4d4
commit
8d8ab4775c
|
@ -23,7 +23,6 @@ export enum TokenType {
|
||||||
ESCAPABLE_RAW_TEXT,
|
ESCAPABLE_RAW_TEXT,
|
||||||
RAW_TEXT,
|
RAW_TEXT,
|
||||||
INTERPOLATION,
|
INTERPOLATION,
|
||||||
ENCODED_ENTITY,
|
|
||||||
COMMENT_START,
|
COMMENT_START,
|
||||||
COMMENT_END,
|
COMMENT_END,
|
||||||
CDATA_START,
|
CDATA_START,
|
||||||
|
@ -396,16 +395,19 @@ class _Tokenizer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private _readChar(): string {
|
private _readChar(decodeEntities: boolean): string {
|
||||||
|
if (decodeEntities && this._cursor.peek() === chars.$AMPERSAND) {
|
||||||
|
return this._decodeEntity();
|
||||||
|
} else {
|
||||||
// Don't rely upon reading directly from `_input` as the actual char value
|
// Don't rely upon reading directly from `_input` as the actual char value
|
||||||
// may have been generated from an escape sequence.
|
// may have been generated from an escape sequence.
|
||||||
const char = String.fromCodePoint(this._cursor.peek());
|
const char = String.fromCodePoint(this._cursor.peek());
|
||||||
this._cursor.advance();
|
this._cursor.advance();
|
||||||
return char;
|
return char;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private _consumeEntity(textTokenType: TokenType): void {
|
private _decodeEntity(): string {
|
||||||
this._beginToken(TokenType.ENCODED_ENTITY);
|
|
||||||
const start = this._cursor.clone();
|
const start = this._cursor.clone();
|
||||||
this._cursor.advance();
|
this._cursor.advance();
|
||||||
if (this._attemptCharCode(chars.$HASH)) {
|
if (this._attemptCharCode(chars.$HASH)) {
|
||||||
|
@ -425,7 +427,7 @@ class _Tokenizer {
|
||||||
this._cursor.advance();
|
this._cursor.advance();
|
||||||
try {
|
try {
|
||||||
const charCode = parseInt(strNum, isHex ? 16 : 10);
|
const charCode = parseInt(strNum, isHex ? 16 : 10);
|
||||||
this._endToken([String.fromCharCode(charCode), this._cursor.getChars(start)]);
|
return String.fromCharCode(charCode);
|
||||||
} catch {
|
} catch {
|
||||||
throw this._createError(
|
throw this._createError(
|
||||||
_unknownEntityErrorMsg(this._cursor.getChars(start)), this._cursor.getSpan());
|
_unknownEntityErrorMsg(this._cursor.getChars(start)), this._cursor.getSpan());
|
||||||
|
@ -434,25 +436,21 @@ class _Tokenizer {
|
||||||
const nameStart = this._cursor.clone();
|
const nameStart = this._cursor.clone();
|
||||||
this._attemptCharCodeUntilFn(isNamedEntityEnd);
|
this._attemptCharCodeUntilFn(isNamedEntityEnd);
|
||||||
if (this._cursor.peek() != chars.$SEMICOLON) {
|
if (this._cursor.peek() != chars.$SEMICOLON) {
|
||||||
// No semicolon was found so abort the encoded entity token that was in progress, and treat
|
|
||||||
// this as a text token
|
|
||||||
this._beginToken(textTokenType, start);
|
|
||||||
this._cursor = nameStart;
|
this._cursor = nameStart;
|
||||||
this._endToken(['&']);
|
return '&';
|
||||||
} else {
|
}
|
||||||
const name = this._cursor.getChars(nameStart);
|
const name = this._cursor.getChars(nameStart);
|
||||||
this._cursor.advance();
|
this._cursor.advance();
|
||||||
const char = NAMED_ENTITIES[name];
|
const char = NAMED_ENTITIES[name];
|
||||||
if (!char) {
|
if (!char) {
|
||||||
throw this._createError(_unknownEntityErrorMsg(name), this._cursor.getSpan(start));
|
throw this._createError(_unknownEntityErrorMsg(name), this._cursor.getSpan(start));
|
||||||
}
|
}
|
||||||
this._endToken([char, `&${name};`]);
|
return char;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private _consumeRawText(consumeEntities: boolean, endMarkerPredicate: () => boolean): void {
|
private _consumeRawText(decodeEntities: boolean, endMarkerPredicate: () => boolean): Token {
|
||||||
this._beginToken(consumeEntities ? TokenType.ESCAPABLE_RAW_TEXT : TokenType.RAW_TEXT);
|
this._beginToken(decodeEntities ? TokenType.ESCAPABLE_RAW_TEXT : TokenType.RAW_TEXT);
|
||||||
const parts: string[] = [];
|
const parts: string[] = [];
|
||||||
while (true) {
|
while (true) {
|
||||||
const tagCloseStart = this._cursor.clone();
|
const tagCloseStart = this._cursor.clone();
|
||||||
|
@ -461,16 +459,9 @@ class _Tokenizer {
|
||||||
if (foundEndMarker) {
|
if (foundEndMarker) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (consumeEntities && this._cursor.peek() === chars.$AMPERSAND) {
|
parts.push(this._readChar(decodeEntities));
|
||||||
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
|
||||||
parts.length = 0;
|
|
||||||
this._consumeEntity(TokenType.ESCAPABLE_RAW_TEXT);
|
|
||||||
this._beginToken(TokenType.ESCAPABLE_RAW_TEXT);
|
|
||||||
} else {
|
|
||||||
parts.push(this._readChar());
|
|
||||||
}
|
}
|
||||||
}
|
return this._endToken([this._processCarriageReturns(parts.join(''))]);
|
||||||
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private _consumeComment(start: CharacterCursor) {
|
private _consumeComment(start: CharacterCursor) {
|
||||||
|
@ -572,8 +563,8 @@ class _Tokenizer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private _consumeRawTextWithTagClose(prefix: string, tagName: string, consumeEntities: boolean) {
|
private _consumeRawTextWithTagClose(prefix: string, tagName: string, decodeEntities: boolean) {
|
||||||
this._consumeRawText(consumeEntities, () => {
|
this._consumeRawText(decodeEntities, () => {
|
||||||
if (!this._attemptCharCode(chars.$LT)) return false;
|
if (!this._attemptCharCode(chars.$LT)) return false;
|
||||||
if (!this._attemptCharCode(chars.$SLASH)) return false;
|
if (!this._attemptCharCode(chars.$SLASH)) return false;
|
||||||
this._attemptCharCodeUntilFn(isNotWhitespace);
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||||
|
@ -721,16 +712,11 @@ class _Tokenizer {
|
||||||
const current = this._cursor.clone();
|
const current = this._cursor.clone();
|
||||||
if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
|
if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
|
||||||
this._endToken([this._processCarriageReturns(parts.join(''))], current);
|
this._endToken([this._processCarriageReturns(parts.join(''))], current);
|
||||||
parts.length = 0;
|
|
||||||
this._consumeInterpolation(interpolationTokenType, current);
|
this._consumeInterpolation(interpolationTokenType, current);
|
||||||
this._beginToken(textTokenType);
|
|
||||||
} else if (this._cursor.peek() === chars.$AMPERSAND) {
|
|
||||||
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
|
||||||
parts.length = 0;
|
parts.length = 0;
|
||||||
this._consumeEntity(textTokenType);
|
|
||||||
this._beginToken(textTokenType);
|
this._beginToken(textTokenType);
|
||||||
} else {
|
} else {
|
||||||
parts.push(this._readChar());
|
parts.push(this._readChar(true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -909,9 +895,7 @@ function mergeTextTokens(srcTokens: Token[]): Token[] {
|
||||||
let lastDstToken: Token|undefined = undefined;
|
let lastDstToken: Token|undefined = undefined;
|
||||||
for (let i = 0; i < srcTokens.length; i++) {
|
for (let i = 0; i < srcTokens.length; i++) {
|
||||||
const token = srcTokens[i];
|
const token = srcTokens[i];
|
||||||
if ((lastDstToken && lastDstToken.type == TokenType.TEXT && token.type == TokenType.TEXT) ||
|
if (lastDstToken && lastDstToken.type == TokenType.TEXT && token.type == TokenType.TEXT) {
|
||||||
(lastDstToken && lastDstToken.type == TokenType.ATTR_VALUE_TEXT &&
|
|
||||||
token.type == TokenType.ATTR_VALUE_TEXT)) {
|
|
||||||
lastDstToken.parts[0]! += token.parts[0];
|
lastDstToken.parts[0]! += token.parts[0];
|
||||||
lastDstToken.sourceSpan.end = token.sourceSpan.end;
|
lastDstToken.sourceSpan.end = token.sourceSpan.end;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -226,10 +226,10 @@ class _TreeBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// For now recombine text, interpolation and entity tokens
|
// For now recombine text and interpolation tokens
|
||||||
|
if (this._peek.type === lex.TokenType.INTERPOLATION) {
|
||||||
while (this._peek.type === lex.TokenType.INTERPOLATION ||
|
while (this._peek.type === lex.TokenType.INTERPOLATION ||
|
||||||
this._peek.type === lex.TokenType.TEXT ||
|
this._peek.type === lex.TokenType.TEXT) {
|
||||||
this._peek.type === lex.TokenType.ENCODED_ENTITY) {
|
|
||||||
token = this._advance();
|
token = this._advance();
|
||||||
if (token.type === lex.TokenType.INTERPOLATION) {
|
if (token.type === lex.TokenType.INTERPOLATION) {
|
||||||
// For backward compatibility we decode HTML entities that appear in interpolation
|
// For backward compatibility we decode HTML entities that appear in interpolation
|
||||||
|
@ -237,12 +237,11 @@ class _TreeBuilder {
|
||||||
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
||||||
// chain after View Engine has been removed.
|
// chain after View Engine has been removed.
|
||||||
text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
||||||
} else if (token.type === lex.TokenType.ENCODED_ENTITY) {
|
|
||||||
text += token.parts[0];
|
|
||||||
} else {
|
} else {
|
||||||
text += token.parts.join('');
|
text += token.parts.join('');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (text.length > 0) {
|
if (text.length > 0) {
|
||||||
const endSpan = token.sourceSpan;
|
const endSpan = token.sourceSpan;
|
||||||
|
@ -370,17 +369,16 @@ class _TreeBuilder {
|
||||||
this._advance();
|
this._advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consume the attribute value
|
// Consume the value
|
||||||
let value = '';
|
let value = '';
|
||||||
let valueStartSpan: ParseSourceSpan|undefined = undefined;
|
let valueStartSpan: ParseSourceSpan|undefined = undefined;
|
||||||
let valueEnd: ParseLocation|undefined = undefined;
|
let valueEnd: ParseLocation|undefined = undefined;
|
||||||
if (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT) {
|
if (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT) {
|
||||||
valueStartSpan = this._peek.sourceSpan;
|
valueStartSpan = this._peek.sourceSpan;
|
||||||
valueEnd = this._peek.sourceSpan.end;
|
valueEnd = this._peek.sourceSpan.end;
|
||||||
// For now recombine text, interpolation and entity tokens
|
// For now we are recombining text and interpolation tokens
|
||||||
while (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT ||
|
while (this._peek.type === lex.TokenType.ATTR_VALUE_TEXT ||
|
||||||
this._peek.type === lex.TokenType.ATTR_VALUE_INTERPOLATION ||
|
this._peek.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) {
|
||||||
this._peek.type === lex.TokenType.ENCODED_ENTITY) {
|
|
||||||
let valueToken = this._advance();
|
let valueToken = this._advance();
|
||||||
if (valueToken.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) {
|
if (valueToken.type === lex.TokenType.ATTR_VALUE_INTERPOLATION) {
|
||||||
// For backward compatibility we decode HTML entities that appear in interpolation
|
// For backward compatibility we decode HTML entities that appear in interpolation
|
||||||
|
@ -388,8 +386,6 @@ class _TreeBuilder {
|
||||||
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
// fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
||||||
// chain after View Engine has been removed.
|
// chain after View Engine has been removed.
|
||||||
value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
||||||
} else if (valueToken.type === lex.TokenType.ENCODED_ENTITY) {
|
|
||||||
value += valueToken.parts[0];
|
|
||||||
} else {
|
} else {
|
||||||
value += valueToken.parts.join('');
|
value += valueToken.parts.join('');
|
||||||
}
|
}
|
||||||
|
|
|
@ -407,11 +407,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
||||||
[lex.TokenType.TAG_OPEN_START, '', 't'],
|
[lex.TokenType.TAG_OPEN_START, '', 't'],
|
||||||
[lex.TokenType.ATTR_NAME, '', 'a'],
|
[lex.TokenType.ATTR_NAME, '', 'a'],
|
||||||
[lex.TokenType.ATTR_QUOTE, '"'],
|
[lex.TokenType.ATTR_QUOTE, '"'],
|
||||||
[lex.TokenType.ATTR_VALUE_TEXT, ''],
|
[lex.TokenType.ATTR_VALUE_TEXT, 'AA'],
|
||||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
|
||||||
[lex.TokenType.ATTR_VALUE_TEXT, ''],
|
|
||||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
|
||||||
[lex.TokenType.ATTR_VALUE_TEXT, ''],
|
|
||||||
[lex.TokenType.ATTR_QUOTE, '"'],
|
[lex.TokenType.ATTR_QUOTE, '"'],
|
||||||
[lex.TokenType.TAG_OPEN_END],
|
[lex.TokenType.TAG_OPEN_END],
|
||||||
[lex.TokenType.EOF],
|
[lex.TokenType.EOF],
|
||||||
|
@ -526,60 +522,50 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
||||||
describe('entities', () => {
|
describe('entities', () => {
|
||||||
it('should parse named entities', () => {
|
it('should parse named entities', () => {
|
||||||
expect(tokenizeAndHumanizeParts('a&b')).toEqual([
|
expect(tokenizeAndHumanizeParts('a&b')).toEqual([
|
||||||
[lex.TokenType.TEXT, 'a'],
|
[lex.TokenType.TEXT, 'a&b'],
|
||||||
[lex.TokenType.ENCODED_ENTITY, '&', '&'],
|
|
||||||
[lex.TokenType.TEXT, 'b'],
|
|
||||||
[lex.TokenType.EOF],
|
[lex.TokenType.EOF],
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse hexadecimal entities', () => {
|
it('should parse hexadecimal entities', () => {
|
||||||
expect(tokenizeAndHumanizeParts('AA')).toEqual([
|
expect(tokenizeAndHumanizeParts('AA')).toEqual([
|
||||||
[lex.TokenType.TEXT, ''],
|
[lex.TokenType.TEXT, 'AA'],
|
||||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
|
||||||
[lex.TokenType.TEXT, ''],
|
|
||||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
|
||||||
[lex.TokenType.TEXT, ''],
|
|
||||||
[lex.TokenType.EOF],
|
[lex.TokenType.EOF],
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse decimal entities', () => {
|
it('should parse decimal entities', () => {
|
||||||
expect(tokenizeAndHumanizeParts('A')).toEqual([
|
expect(tokenizeAndHumanizeParts('A')).toEqual([
|
||||||
[lex.TokenType.TEXT, ''],
|
[lex.TokenType.TEXT, 'A'],
|
||||||
[lex.TokenType.ENCODED_ENTITY, 'A', 'A'],
|
|
||||||
[lex.TokenType.TEXT, ''],
|
|
||||||
[lex.TokenType.EOF],
|
[lex.TokenType.EOF],
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should store the locations', () => {
|
it('should store the locations', () => {
|
||||||
expect(tokenizeAndHumanizeSourceSpans('a&b')).toEqual([
|
expect(tokenizeAndHumanizeSourceSpans('a&b')).toEqual([
|
||||||
[lex.TokenType.TEXT, 'a'],
|
[lex.TokenType.TEXT, 'a&b'],
|
||||||
[lex.TokenType.ENCODED_ENTITY, '&'],
|
|
||||||
[lex.TokenType.TEXT, 'b'],
|
|
||||||
[lex.TokenType.EOF, ''],
|
[lex.TokenType.EOF, ''],
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should report malformed/unknown entities', () => {
|
it('should report malformed/unknown entities', () => {
|
||||||
expect(tokenizeAndHumanizeErrors('&tbo;')).toEqual([[
|
expect(tokenizeAndHumanizeErrors('&tbo;')).toEqual([[
|
||||||
lex.TokenType.ENCODED_ENTITY,
|
lex.TokenType.TEXT,
|
||||||
'Unknown entity "tbo" - use the "&#<decimal>;" or "&#x<hex>;" syntax', '0:0'
|
'Unknown entity "tbo" - use the "&#<decimal>;" or "&#x<hex>;" syntax', '0:0'
|
||||||
]]);
|
]]);
|
||||||
expect(tokenizeAndHumanizeErrors('sdf;')).toEqual([[
|
expect(tokenizeAndHumanizeErrors('sdf;')).toEqual([[
|
||||||
lex.TokenType.ENCODED_ENTITY,
|
lex.TokenType.TEXT,
|
||||||
'Unable to parse entity "s" - decimal character reference entities must end with ";"',
|
'Unable to parse entity "s" - decimal character reference entities must end with ";"',
|
||||||
'0:4'
|
'0:4'
|
||||||
]]);
|
]]);
|
||||||
expect(tokenizeAndHumanizeErrors('
sdf;')).toEqual([[
|
expect(tokenizeAndHumanizeErrors('
sdf;')).toEqual([[
|
||||||
lex.TokenType.ENCODED_ENTITY,
|
lex.TokenType.TEXT,
|
||||||
'Unable to parse entity "
s" - hexadecimal character reference entities must end with ";"',
|
'Unable to parse entity "
s" - hexadecimal character reference entities must end with ";"',
|
||||||
'0:5'
|
'0:5'
|
||||||
]]);
|
]]);
|
||||||
|
|
||||||
expect(tokenizeAndHumanizeErrors('઼')).toEqual([
|
expect(tokenizeAndHumanizeErrors('઼')).toEqual([
|
||||||
[lex.TokenType.ENCODED_ENTITY, 'Unexpected character "EOF"', '0:6']
|
[lex.TokenType.TEXT, 'Unexpected character "EOF"', '0:6']
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -657,16 +643,12 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
||||||
|
|
||||||
it('should parse entities', () => {
|
it('should parse entities', () => {
|
||||||
expect(tokenizeAndHumanizeParts('a&b')).toEqual([
|
expect(tokenizeAndHumanizeParts('a&b')).toEqual([
|
||||||
[lex.TokenType.TEXT, 'a'],
|
[lex.TokenType.TEXT, 'a&b'],
|
||||||
[lex.TokenType.ENCODED_ENTITY, '&', '&'],
|
|
||||||
[lex.TokenType.TEXT, 'b'],
|
|
||||||
[lex.TokenType.EOF],
|
[lex.TokenType.EOF],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
expect(tokenizeAndHumanizeSourceSpans('a&b')).toEqual([
|
expect(tokenizeAndHumanizeSourceSpans('a&b')).toEqual([
|
||||||
[lex.TokenType.TEXT, 'a'],
|
[lex.TokenType.TEXT, 'a&b'],
|
||||||
[lex.TokenType.ENCODED_ENTITY, '&'],
|
|
||||||
[lex.TokenType.TEXT, 'b'],
|
|
||||||
[lex.TokenType.EOF, ''],
|
[lex.TokenType.EOF, ''],
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
@ -912,9 +894,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
||||||
expect(tokenizeAndHumanizeParts(`<title>&</title>`)).toEqual([
|
expect(tokenizeAndHumanizeParts(`<title>&</title>`)).toEqual([
|
||||||
[lex.TokenType.TAG_OPEN_START, '', 'title'],
|
[lex.TokenType.TAG_OPEN_START, '', 'title'],
|
||||||
[lex.TokenType.TAG_OPEN_END],
|
[lex.TokenType.TAG_OPEN_END],
|
||||||
[lex.TokenType.ESCAPABLE_RAW_TEXT, ''],
|
[lex.TokenType.ESCAPABLE_RAW_TEXT, '&'],
|
||||||
[lex.TokenType.ENCODED_ENTITY, '&', '&'],
|
|
||||||
[lex.TokenType.ESCAPABLE_RAW_TEXT, ''],
|
|
||||||
[lex.TokenType.TAG_CLOSE, '', 'title'],
|
[lex.TokenType.TAG_CLOSE, '', 'title'],
|
||||||
[lex.TokenType.EOF],
|
[lex.TokenType.EOF],
|
||||||
]);
|
]);
|
||||||
|
|
Loading…
Reference in New Issue