2016-06-23 09:47:54 -07:00
|
|
|
/**
|
|
|
|
* @license
|
2020-05-19 12:08:49 -07:00
|
|
|
* Copyright Google LLC All Rights Reserved.
|
2016-06-23 09:47:54 -07:00
|
|
|
*
|
|
|
|
* Use of this source code is governed by an MIT-style license that can be
|
|
|
|
* found in the LICENSE file at https://angular.io/license
|
|
|
|
*/
|
|
|
|
|
2016-07-21 11:41:25 -07:00
|
|
|
import * as chars from '../chars';
|
|
|
|
import {ParseError, ParseLocation, ParseSourceFile, ParseSourceSpan} from '../parse_util';
|
2021-07-11 10:24:55 +02:00
|
|
|
import {NAMED_ENTITIES} from './entities';
|
2016-07-21 11:41:25 -07:00
|
|
|
|
2016-06-20 09:52:41 -07:00
|
|
|
import {DEFAULT_INTERPOLATION_CONFIG, InterpolationConfig} from './interpolation_config';
|
2021-07-11 10:24:55 +02:00
|
|
|
import {TagContentType, TagDefinition} from './tags';
|
2015-10-07 09:34:21 -07:00
|
|
|
|
2016-07-21 13:56:58 -07:00
|
|
|
export enum TokenType {
|
2015-10-07 09:34:21 -07:00
|
|
|
TAG_OPEN_START,
|
|
|
|
TAG_OPEN_END,
|
|
|
|
TAG_OPEN_END_VOID,
|
|
|
|
TAG_CLOSE,
|
2020-09-02 10:17:01 -05:00
|
|
|
INCOMPLETE_TAG_OPEN,
|
2015-10-07 09:34:21 -07:00
|
|
|
TEXT,
|
|
|
|
ESCAPABLE_RAW_TEXT,
|
|
|
|
RAW_TEXT,
|
2021-05-11 17:03:38 +01:00
|
|
|
INTERPOLATION,
|
2021-05-14 18:53:17 +01:00
|
|
|
ENCODED_ENTITY,
|
2015-10-07 09:34:21 -07:00
|
|
|
COMMENT_START,
|
|
|
|
COMMENT_END,
|
|
|
|
CDATA_START,
|
|
|
|
CDATA_END,
|
|
|
|
ATTR_NAME,
|
2019-02-08 22:10:20 +00:00
|
|
|
ATTR_QUOTE,
|
2021-05-13 17:00:56 +01:00
|
|
|
ATTR_VALUE_TEXT,
|
|
|
|
ATTR_VALUE_INTERPOLATION,
|
2015-10-07 09:34:21 -07:00
|
|
|
DOC_TYPE,
|
2016-04-12 11:46:28 -07:00
|
|
|
EXPANSION_FORM_START,
|
|
|
|
EXPANSION_CASE_VALUE,
|
|
|
|
EXPANSION_CASE_EXP_START,
|
|
|
|
EXPANSION_CASE_EXP_END,
|
|
|
|
EXPANSION_FORM_END,
|
2015-10-07 09:34:21 -07:00
|
|
|
EOF
|
|
|
|
}
|
|
|
|
|
2016-07-21 13:56:58 -07:00
|
|
|
export class Token {
|
2019-02-08 22:10:19 +00:00
|
|
|
constructor(
|
|
|
|
public type: TokenType|null, public parts: string[], public sourceSpan: ParseSourceSpan) {}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2016-07-21 13:56:58 -07:00
|
|
|
export class TokenError extends ParseError {
|
2019-02-08 22:10:19 +00:00
|
|
|
constructor(errorMsg: string, public tokenType: TokenType|null, span: ParseSourceSpan) {
|
2016-02-16 16:46:51 -08:00
|
|
|
super(span, errorMsg);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-21 13:56:58 -07:00
|
|
|
export class TokenizeResult {
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
constructor(
|
|
|
|
public tokens: Token[], public errors: TokenError[],
|
|
|
|
public nonNormalizedIcuExpressions: Token[]) {}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2019-02-08 22:10:19 +00:00
|
|
|
export interface LexerRange {
|
|
|
|
startPos: number;
|
|
|
|
startLine: number;
|
|
|
|
startCol: number;
|
|
|
|
endPos: number;
|
|
|
|
}
|
|
|
|
|
2019-02-08 22:10:19 +00:00
|
|
|
/**
|
|
|
|
* Options that modify how the text is tokenized.
|
|
|
|
*/
|
|
|
|
export interface TokenizeOptions {
|
|
|
|
/** Whether to tokenize ICU messages (considered as text nodes when false). */
|
|
|
|
tokenizeExpansionForms?: boolean;
|
|
|
|
/** How to tokenize interpolation markers. */
|
|
|
|
interpolationConfig?: InterpolationConfig;
|
2019-02-08 22:10:19 +00:00
|
|
|
/**
|
|
|
|
* The start and end point of the text to parse within the `source` string.
|
|
|
|
* The entire `source` string is parsed if this is not provided.
|
|
|
|
* */
|
|
|
|
range?: LexerRange;
|
2019-02-08 22:10:19 +00:00
|
|
|
/**
|
|
|
|
* If this text is stored in a JavaScript string, then we have to deal with escape sequences.
|
|
|
|
*
|
|
|
|
* **Example 1:**
|
|
|
|
*
|
|
|
|
* ```
|
|
|
|
* "abc\"def\nghi"
|
|
|
|
* ```
|
|
|
|
*
|
|
|
|
* - The `\"` must be converted to `"`.
|
|
|
|
* - The `\n` must be converted to a new line character in a token,
|
|
|
|
* but it should not increment the current line for source mapping.
|
|
|
|
*
|
|
|
|
* **Example 2:**
|
|
|
|
*
|
|
|
|
* ```
|
|
|
|
* "abc\
|
|
|
|
* def"
|
|
|
|
* ```
|
|
|
|
*
|
|
|
|
* The line continuation (`\` followed by a newline) should be removed from a token
|
|
|
|
* but the new line should increment the current line for source mapping.
|
|
|
|
*/
|
|
|
|
escapedString?: boolean;
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
/**
|
|
|
|
* If this text is stored in an external template (e.g. via `templateUrl`) then we need to decide
|
|
|
|
* whether or not to normalize the line-endings (from `\r\n` to `\n`) when processing ICU
|
|
|
|
* expressions.
|
|
|
|
*
|
|
|
|
* If `true` then we will normalize ICU expression line endings.
|
|
|
|
* The default is `false`, but this will be switched in a future major release.
|
|
|
|
*/
|
|
|
|
i18nNormalizeLineEndingsInICUs?: boolean;
|
2019-04-24 20:40:55 +01:00
|
|
|
/**
|
|
|
|
* An array of characters that should be considered as leading trivia.
|
|
|
|
* Leading trivia are characters that are not important to the developer, and so should not be
|
|
|
|
* included in source-map segments. A common example is whitespace.
|
|
|
|
*/
|
|
|
|
leadingTriviaChars?: string[];
|
2019-10-17 15:26:10 -07:00
|
|
|
/**
|
|
|
|
* If true, do not convert CRLF to LF.
|
|
|
|
*/
|
|
|
|
preserveLineEndings?: boolean;
|
2019-02-08 22:10:19 +00:00
|
|
|
}
|
|
|
|
|
2016-07-21 13:56:58 -07:00
|
|
|
export function tokenize(
|
|
|
|
source: string, url: string, getTagDefinition: (tagName: string) => TagDefinition,
|
2019-02-08 22:10:19 +00:00
|
|
|
options: TokenizeOptions = {}): TokenizeResult {
|
2020-04-26 18:15:43 +01:00
|
|
|
const tokenizer = new _Tokenizer(new ParseSourceFile(source, url), getTagDefinition, options);
|
|
|
|
tokenizer.tokenize();
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
return new TokenizeResult(
|
|
|
|
mergeTextTokens(tokenizer.tokens), tokenizer.errors, tokenizer.nonNormalizedIcuExpressions);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2016-06-22 17:25:42 -07:00
|
|
|
const _CR_OR_CRLF_REGEXP = /\r\n?/g;
|
2015-12-04 23:12:31 -08:00
|
|
|
|
2016-06-22 17:25:42 -07:00
|
|
|
function _unexpectedCharacterErrorMsg(charCode: number): string {
|
2016-06-24 14:31:35 -07:00
|
|
|
const char = charCode === chars.$EOF ? 'EOF' : String.fromCharCode(charCode);
|
2015-10-07 09:34:21 -07:00
|
|
|
return `Unexpected character "${char}"`;
|
|
|
|
}
|
|
|
|
|
2016-06-22 17:25:42 -07:00
|
|
|
function _unknownEntityErrorMsg(entitySrc: string): string {
|
2015-11-10 15:56:25 -08:00
|
|
|
return `Unknown entity "${entitySrc}" - use the "&#<decimal>;" or "&#x<hex>;" syntax`;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2020-07-31 12:26:39 -07:00
|
|
|
function _unparsableEntityErrorMsg(type: CharacterReferenceType, entityStr: string): string {
|
|
|
|
return `Unable to parse entity "${entityStr}" - ${
|
|
|
|
type} character reference entities must end with ";"`;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum CharacterReferenceType {
|
|
|
|
HEX = 'hexadecimal',
|
|
|
|
DEC = 'decimal',
|
|
|
|
}
|
|
|
|
|
2016-06-22 17:25:42 -07:00
|
|
|
class _ControlFlowError {
|
2016-07-21 13:56:58 -07:00
|
|
|
constructor(public error: TokenError) {}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2020-11-16 22:37:09 +01:00
|
|
|
// See https://www.w3.org/TR/html51/syntax.html#writing-html-documents
|
2016-07-21 13:56:58 -07:00
|
|
|
class _Tokenizer {
|
2019-02-26 13:06:26 +00:00
|
|
|
private _cursor: CharacterCursor;
|
2019-02-08 22:10:19 +00:00
|
|
|
private _tokenizeIcu: boolean;
|
|
|
|
private _interpolationConfig: InterpolationConfig;
|
2019-04-24 20:40:55 +01:00
|
|
|
private _leadingTriviaCodePoints: number[]|undefined;
|
2019-02-26 13:06:26 +00:00
|
|
|
private _currentTokenStart: CharacterCursor|null = null;
|
2019-02-08 22:10:19 +00:00
|
|
|
private _currentTokenType: TokenType|null = null;
|
2016-07-21 13:56:58 -07:00
|
|
|
private _expansionCaseStack: TokenType[] = [];
|
2016-06-21 16:55:17 -07:00
|
|
|
private _inInterpolation: boolean = false;
|
2019-10-17 15:26:10 -07:00
|
|
|
private readonly _preserveLineEndings: boolean;
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
private readonly _escapedString: boolean;
|
|
|
|
private readonly _i18nNormalizeLineEndingsInICUs: boolean;
|
2016-07-21 13:56:58 -07:00
|
|
|
tokens: Token[] = [];
|
|
|
|
errors: TokenError[] = [];
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
nonNormalizedIcuExpressions: Token[] = [];
|
2015-10-07 09:34:21 -07:00
|
|
|
|
2016-06-22 17:25:42 -07:00
|
|
|
/**
|
2019-02-27 08:31:59 +00:00
|
|
|
* @param _file The html source file being tokenized.
|
|
|
|
* @param _getTagDefinition A function that will retrieve a tag definition for a given tag name.
|
|
|
|
* @param options Configuration of the tokenization.
|
2016-06-22 17:25:42 -07:00
|
|
|
*/
|
2016-06-20 09:52:41 -07:00
|
|
|
constructor(
|
2019-02-26 13:06:26 +00:00
|
|
|
_file: ParseSourceFile, private _getTagDefinition: (tagName: string) => TagDefinition,
|
2019-02-08 22:10:19 +00:00
|
|
|
options: TokenizeOptions) {
|
|
|
|
this._tokenizeIcu = options.tokenizeExpansionForms || false;
|
|
|
|
this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG;
|
2019-04-24 20:40:55 +01:00
|
|
|
this._leadingTriviaCodePoints =
|
|
|
|
options.leadingTriviaChars && options.leadingTriviaChars.map(c => c.codePointAt(0) || 0);
|
2019-02-26 13:06:26 +00:00
|
|
|
const range =
|
|
|
|
options.range || {endPos: _file.content.length, startPos: 0, startLine: 0, startCol: 0};
|
|
|
|
this._cursor = options.escapedString ? new EscapedCharacterCursor(_file, range) :
|
|
|
|
new PlainCharacterCursor(_file, range);
|
2019-10-17 15:26:10 -07:00
|
|
|
this._preserveLineEndings = options.preserveLineEndings || false;
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
this._escapedString = options.escapedString || false;
|
|
|
|
this._i18nNormalizeLineEndingsInICUs = options.i18nNormalizeLineEndingsInICUs || false;
|
2019-02-08 22:10:19 +00:00
|
|
|
try {
|
2019-02-26 13:06:26 +00:00
|
|
|
this._cursor.init();
|
2019-02-08 22:10:19 +00:00
|
|
|
} catch (e) {
|
2019-02-26 13:06:26 +00:00
|
|
|
this.handleError(e);
|
2019-02-08 22:10:19 +00:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2015-12-04 23:12:31 -08:00
|
|
|
private _processCarriageReturns(content: string): string {
|
2019-10-17 15:26:10 -07:00
|
|
|
if (this._preserveLineEndings) {
|
|
|
|
return content;
|
|
|
|
}
|
2020-11-16 22:37:09 +01:00
|
|
|
// https://www.w3.org/TR/html51/syntax.html#preprocessing-the-input-stream
|
2016-02-16 16:46:51 -08:00
|
|
|
// In order to keep the original position in the source, we can not
|
|
|
|
// pre-process it.
|
2015-12-04 23:12:31 -08:00
|
|
|
// Instead CRs are processed right before instantiating the tokens.
|
2016-06-24 14:31:35 -07:00
|
|
|
return content.replace(_CR_OR_CRLF_REGEXP, '\n');
|
2015-12-04 23:12:31 -08:00
|
|
|
}
|
|
|
|
|
2020-04-26 18:15:43 +01:00
|
|
|
tokenize(): void {
|
2019-02-26 13:06:26 +00:00
|
|
|
while (this._cursor.peek() !== chars.$EOF) {
|
|
|
|
const start = this._cursor.clone();
|
2015-10-07 09:34:21 -07:00
|
|
|
try {
|
2016-06-17 10:57:50 -07:00
|
|
|
if (this._attemptCharCode(chars.$LT)) {
|
|
|
|
if (this._attemptCharCode(chars.$BANG)) {
|
|
|
|
if (this._attemptCharCode(chars.$LBRACKET)) {
|
2015-10-07 09:34:21 -07:00
|
|
|
this._consumeCdata(start);
|
2016-06-17 10:57:50 -07:00
|
|
|
} else if (this._attemptCharCode(chars.$MINUS)) {
|
2015-10-07 09:34:21 -07:00
|
|
|
this._consumeComment(start);
|
|
|
|
} else {
|
|
|
|
this._consumeDocType(start);
|
|
|
|
}
|
2016-06-17 10:57:50 -07:00
|
|
|
} else if (this._attemptCharCode(chars.$SLASH)) {
|
2015-10-07 09:34:21 -07:00
|
|
|
this._consumeTagClose(start);
|
|
|
|
} else {
|
|
|
|
this._consumeTagOpen(start);
|
|
|
|
}
|
2016-12-16 15:33:16 -08:00
|
|
|
} else if (!(this._tokenizeIcu && this._tokenizeExpansionForm())) {
|
2021-05-13 17:00:56 +01:00
|
|
|
this._consumeWithInterpolation(
|
|
|
|
TokenType.TEXT, TokenType.INTERPOLATION, () => this._isTextEnd());
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
} catch (e) {
|
2019-02-26 13:06:26 +00:00
|
|
|
this.handleError(e);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
2016-07-21 13:56:58 -07:00
|
|
|
this._beginToken(TokenType.EOF);
|
2015-10-07 09:34:21 -07:00
|
|
|
this._endToken([]);
|
|
|
|
}
|
|
|
|
|
2016-06-22 17:25:42 -07:00
|
|
|
/**
|
2017-07-19 14:01:28 -07:00
|
|
|
* @returns whether an ICU token has been created
|
2016-06-22 17:25:42 -07:00
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
private _tokenizeExpansionForm(): boolean {
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this.isExpansionFormStart()) {
|
2016-06-22 17:25:42 -07:00
|
|
|
this._consumeExpansionFormStart();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
if (isExpansionCaseStart(this._cursor.peek()) && this._isInExpansionForm()) {
|
2016-06-22 17:25:42 -07:00
|
|
|
this._consumeExpansionCaseStart();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this._cursor.peek() === chars.$RBRACE) {
|
2016-06-22 17:25:42 -07:00
|
|
|
if (this._isInExpansionCase()) {
|
|
|
|
this._consumeExpansionCaseEnd();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (this._isInExpansionForm()) {
|
|
|
|
this._consumeExpansionFormEnd();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
private _beginToken(type: TokenType, start = this._cursor.clone()) {
|
2016-06-09 13:48:53 -07:00
|
|
|
this._currentTokenStart = start;
|
|
|
|
this._currentTokenType = type;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2019-12-08 14:57:07 +01:00
|
|
|
private _endToken(parts: string[], end?: CharacterCursor): Token {
|
2019-02-08 22:10:19 +00:00
|
|
|
if (this._currentTokenStart === null) {
|
|
|
|
throw new TokenError(
|
|
|
|
'Programming error - attempted to end a token when there was no start to the token',
|
2019-02-26 13:06:26 +00:00
|
|
|
this._currentTokenType, this._cursor.getSpan(end));
|
2019-02-08 22:10:19 +00:00
|
|
|
}
|
|
|
|
if (this._currentTokenType === null) {
|
|
|
|
throw new TokenError(
|
|
|
|
'Programming error - attempted to end a token which has no token type', null,
|
2019-02-26 13:06:26 +00:00
|
|
|
this._cursor.getSpan(this._currentTokenStart));
|
2019-02-08 22:10:19 +00:00
|
|
|
}
|
2019-04-24 20:40:55 +01:00
|
|
|
const token = new Token(
|
|
|
|
this._currentTokenType, parts,
|
2021-05-11 17:03:38 +01:00
|
|
|
(end ?? this._cursor).getSpan(this._currentTokenStart, this._leadingTriviaCodePoints));
|
2015-10-07 09:34:21 -07:00
|
|
|
this.tokens.push(token);
|
2019-02-08 22:10:19 +00:00
|
|
|
this._currentTokenStart = null;
|
|
|
|
this._currentTokenType = null;
|
2015-10-07 09:34:21 -07:00
|
|
|
return token;
|
|
|
|
}
|
|
|
|
|
2016-06-22 17:25:42 -07:00
|
|
|
private _createError(msg: string, span: ParseSourceSpan): _ControlFlowError {
|
2016-07-22 17:16:56 -07:00
|
|
|
if (this._isInExpansionForm()) {
|
2016-07-21 13:56:58 -07:00
|
|
|
msg += ` (Do you have an unescaped "{" in your template? Use "{{ '{' }}") to escape it.)`;
|
2016-07-22 17:16:56 -07:00
|
|
|
}
|
2016-07-21 13:56:58 -07:00
|
|
|
const error = new TokenError(msg, this._currentTokenType, span);
|
2019-02-26 13:06:26 +00:00
|
|
|
this._currentTokenStart = null;
|
|
|
|
this._currentTokenType = null;
|
2016-06-22 17:25:42 -07:00
|
|
|
return new _ControlFlowError(error);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
private handleError(e: any) {
|
|
|
|
if (e instanceof CursorError) {
|
|
|
|
e = this._createError(e.msg, this._cursor.getSpan(e.cursor));
|
2019-02-08 22:10:19 +00:00
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
if (e instanceof _ControlFlowError) {
|
|
|
|
this.errors.push(e.error);
|
|
|
|
} else {
|
|
|
|
throw e;
|
2019-02-08 22:10:19 +00:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2015-12-21 11:32:58 -08:00
|
|
|
private _attemptCharCode(charCode: number): boolean {
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this._cursor.peek() === charCode) {
|
|
|
|
this._cursor.advance();
|
2015-10-07 09:34:21 -07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-12-21 11:32:58 -08:00
|
|
|
private _attemptCharCodeCaseInsensitive(charCode: number): boolean {
|
2019-02-26 13:06:26 +00:00
|
|
|
if (compareCharCodeCaseInsensitive(this._cursor.peek(), charCode)) {
|
|
|
|
this._cursor.advance();
|
2015-12-21 11:32:58 -08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
private _requireCharCode(charCode: number) {
|
2019-02-26 13:06:26 +00:00
|
|
|
const location = this._cursor.clone();
|
2015-12-21 11:32:58 -08:00
|
|
|
if (!this._attemptCharCode(charCode)) {
|
2016-06-09 14:53:03 -07:00
|
|
|
throw this._createError(
|
2019-02-26 13:06:26 +00:00
|
|
|
_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-21 11:32:58 -08:00
|
|
|
private _attemptStr(chars: string): boolean {
|
2016-06-21 16:55:17 -07:00
|
|
|
const len = chars.length;
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this._cursor.charsLeft() < len) {
|
2016-06-21 16:55:17 -07:00
|
|
|
return false;
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
const initialPosition = this._cursor.clone();
|
2016-06-24 14:31:35 -07:00
|
|
|
for (let i = 0; i < len; i++) {
|
|
|
|
if (!this._attemptCharCode(chars.charCodeAt(i))) {
|
2016-02-17 14:52:51 +01:00
|
|
|
// If attempting to parse the string fails, we want to reset the parser
|
|
|
|
// to where it was before the attempt
|
2019-02-26 13:06:26 +00:00
|
|
|
this._cursor = initialPosition;
|
2015-10-07 09:34:21 -07:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-12-21 11:32:58 -08:00
|
|
|
private _attemptStrCaseInsensitive(chars: string): boolean {
|
2016-06-24 14:31:35 -07:00
|
|
|
for (let i = 0; i < chars.length; i++) {
|
|
|
|
if (!this._attemptCharCodeCaseInsensitive(chars.charCodeAt(i))) {
|
2015-12-21 11:32:58 -08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private _requireStr(chars: string) {
|
2019-02-26 13:06:26 +00:00
|
|
|
const location = this._cursor.clone();
|
2015-12-21 11:32:58 -08:00
|
|
|
if (!this._attemptStr(chars)) {
|
2019-02-26 13:06:26 +00:00
|
|
|
throw this._createError(
|
|
|
|
_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-21 11:10:25 -07:00
|
|
|
private _attemptCharCodeUntilFn(predicate: (code: number) => boolean) {
|
2019-02-26 13:06:26 +00:00
|
|
|
while (!predicate(this._cursor.peek())) {
|
|
|
|
this._cursor.advance();
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-21 11:10:25 -07:00
|
|
|
private _requireCharCodeUntilFn(predicate: (code: number) => boolean, len: number) {
|
2019-02-26 13:06:26 +00:00
|
|
|
const start = this._cursor.clone();
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(predicate);
|
2019-12-08 14:57:07 +01:00
|
|
|
if (this._cursor.diff(start) < len) {
|
2016-06-22 17:25:42 -07:00
|
|
|
throw this._createError(
|
2019-02-26 13:06:26 +00:00
|
|
|
_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(start));
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private _attemptUntilChar(char: number) {
|
2019-02-26 13:06:26 +00:00
|
|
|
while (this._cursor.peek() !== char) {
|
|
|
|
this._cursor.advance();
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-14 18:53:17 +01:00
|
|
|
private _readChar(): string {
|
|
|
|
// Don't rely upon reading directly from `_input` as the actual char value
|
|
|
|
// may have been generated from an escape sequence.
|
|
|
|
const char = String.fromCodePoint(this._cursor.peek());
|
|
|
|
this._cursor.advance();
|
|
|
|
return char;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2021-05-14 18:53:17 +01:00
|
|
|
private _consumeEntity(textTokenType: TokenType): void {
|
|
|
|
this._beginToken(TokenType.ENCODED_ENTITY);
|
2019-02-26 13:06:26 +00:00
|
|
|
const start = this._cursor.clone();
|
|
|
|
this._cursor.advance();
|
2016-06-17 10:57:50 -07:00
|
|
|
if (this._attemptCharCode(chars.$HASH)) {
|
2016-11-12 14:08:58 +01:00
|
|
|
const isHex = this._attemptCharCode(chars.$x) || this._attemptCharCode(chars.$X);
|
2019-02-26 13:06:26 +00:00
|
|
|
const codeStart = this._cursor.clone();
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isDigitEntityEnd);
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this._cursor.peek() != chars.$SEMICOLON) {
|
2020-07-31 12:26:39 -07:00
|
|
|
// Advance cursor to include the peeked character in the string provided to the error
|
|
|
|
// message.
|
|
|
|
this._cursor.advance();
|
|
|
|
const entityType = isHex ? CharacterReferenceType.HEX : CharacterReferenceType.DEC;
|
2019-02-26 13:06:26 +00:00
|
|
|
throw this._createError(
|
2020-07-31 12:26:39 -07:00
|
|
|
_unparsableEntityErrorMsg(entityType, this._cursor.getChars(start)),
|
|
|
|
this._cursor.getSpan());
|
2015-11-10 15:56:25 -08:00
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
const strNum = this._cursor.getChars(codeStart);
|
|
|
|
this._cursor.advance();
|
2015-11-10 15:56:25 -08:00
|
|
|
try {
|
2016-11-12 14:08:58 +01:00
|
|
|
const charCode = parseInt(strNum, isHex ? 16 : 10);
|
2021-05-14 18:53:17 +01:00
|
|
|
this._endToken([String.fromCharCode(charCode), this._cursor.getChars(start)]);
|
2018-08-14 15:34:51 +02:00
|
|
|
} catch {
|
2019-02-26 13:06:26 +00:00
|
|
|
throw this._createError(
|
|
|
|
_unknownEntityErrorMsg(this._cursor.getChars(start)), this._cursor.getSpan());
|
2015-11-10 15:56:25 -08:00
|
|
|
}
|
|
|
|
} else {
|
2019-02-26 13:06:26 +00:00
|
|
|
const nameStart = this._cursor.clone();
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNamedEntityEnd);
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this._cursor.peek() != chars.$SEMICOLON) {
|
2021-05-14 18:53:17 +01:00
|
|
|
// No semicolon was found so abort the encoded entity token that was in progress, and treat
|
|
|
|
// this as a text token
|
|
|
|
this._beginToken(textTokenType, start);
|
2019-02-26 13:06:26 +00:00
|
|
|
this._cursor = nameStart;
|
2021-05-14 18:53:17 +01:00
|
|
|
this._endToken(['&']);
|
|
|
|
} else {
|
|
|
|
const name = this._cursor.getChars(nameStart);
|
|
|
|
this._cursor.advance();
|
|
|
|
const char = NAMED_ENTITIES[name];
|
|
|
|
if (!char) {
|
|
|
|
throw this._createError(_unknownEntityErrorMsg(name), this._cursor.getSpan(start));
|
|
|
|
}
|
|
|
|
this._endToken([char, `&${name};`]);
|
2015-11-10 15:56:25 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-14 18:53:17 +01:00
|
|
|
private _consumeRawText(consumeEntities: boolean, endMarkerPredicate: () => boolean): void {
|
|
|
|
this._beginToken(consumeEntities ? TokenType.ESCAPABLE_RAW_TEXT : TokenType.RAW_TEXT);
|
2016-06-24 14:31:35 -07:00
|
|
|
const parts: string[] = [];
|
2015-10-07 09:34:21 -07:00
|
|
|
while (true) {
|
2019-02-26 13:06:26 +00:00
|
|
|
const tagCloseStart = this._cursor.clone();
|
|
|
|
const foundEndMarker = endMarkerPredicate();
|
|
|
|
this._cursor = tagCloseStart;
|
|
|
|
if (foundEndMarker) {
|
2015-10-07 09:34:21 -07:00
|
|
|
break;
|
|
|
|
}
|
2021-05-14 18:53:17 +01:00
|
|
|
if (consumeEntities && this._cursor.peek() === chars.$AMPERSAND) {
|
|
|
|
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
|
|
|
parts.length = 0;
|
|
|
|
this._consumeEntity(TokenType.ESCAPABLE_RAW_TEXT);
|
|
|
|
this._beginToken(TokenType.ESCAPABLE_RAW_TEXT);
|
|
|
|
} else {
|
|
|
|
parts.push(this._readChar());
|
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2021-05-14 18:53:17 +01:00
|
|
|
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
private _consumeComment(start: CharacterCursor) {
|
2016-07-21 13:56:58 -07:00
|
|
|
this._beginToken(TokenType.COMMENT_START, start);
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$MINUS);
|
2015-10-07 09:34:21 -07:00
|
|
|
this._endToken([]);
|
2019-02-26 13:06:26 +00:00
|
|
|
this._consumeRawText(false, () => this._attemptStr('-->'));
|
|
|
|
this._beginToken(TokenType.COMMENT_END);
|
|
|
|
this._requireStr('-->');
|
2015-10-07 09:34:21 -07:00
|
|
|
this._endToken([]);
|
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
private _consumeCdata(start: CharacterCursor) {
|
2016-07-21 13:56:58 -07:00
|
|
|
this._beginToken(TokenType.CDATA_START, start);
|
2015-12-21 11:32:58 -08:00
|
|
|
this._requireStr('CDATA[');
|
2015-10-07 09:34:21 -07:00
|
|
|
this._endToken([]);
|
2019-02-26 13:06:26 +00:00
|
|
|
this._consumeRawText(false, () => this._attemptStr(']]>'));
|
|
|
|
this._beginToken(TokenType.CDATA_END);
|
|
|
|
this._requireStr(']]>');
|
2015-10-07 09:34:21 -07:00
|
|
|
this._endToken([]);
|
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
private _consumeDocType(start: CharacterCursor) {
|
2016-07-21 13:56:58 -07:00
|
|
|
this._beginToken(TokenType.DOC_TYPE, start);
|
2019-02-26 13:06:26 +00:00
|
|
|
const contentStart = this._cursor.clone();
|
2016-06-17 10:57:50 -07:00
|
|
|
this._attemptUntilChar(chars.$GT);
|
2019-02-26 13:06:26 +00:00
|
|
|
const content = this._cursor.getChars(contentStart);
|
|
|
|
this._cursor.advance();
|
|
|
|
this._endToken([content]);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
private _consumePrefixAndName(): string[] {
|
2019-02-26 13:06:26 +00:00
|
|
|
const nameOrPrefixStart = this._cursor.clone();
|
2019-02-26 13:05:54 +00:00
|
|
|
let prefix: string = '';
|
2019-02-26 13:06:26 +00:00
|
|
|
while (this._cursor.peek() !== chars.$COLON && !isPrefixEnd(this._cursor.peek())) {
|
|
|
|
this._cursor.advance();
|
|
|
|
}
|
|
|
|
let nameStart: CharacterCursor;
|
|
|
|
if (this._cursor.peek() === chars.$COLON) {
|
|
|
|
prefix = this._cursor.getChars(nameOrPrefixStart);
|
|
|
|
this._cursor.advance();
|
|
|
|
nameStart = this._cursor.clone();
|
2015-10-07 09:34:21 -07:00
|
|
|
} else {
|
|
|
|
nameStart = nameOrPrefixStart;
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
this._requireCharCodeUntilFn(isNameEnd, prefix === '' ? 0 : 1);
|
|
|
|
const name = this._cursor.getChars(nameStart);
|
2015-10-07 09:34:21 -07:00
|
|
|
return [prefix, name];
|
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
private _consumeTagOpen(start: CharacterCursor) {
|
2016-07-21 13:56:58 -07:00
|
|
|
let tagName: string;
|
2019-02-26 13:05:54 +00:00
|
|
|
let prefix: string;
|
|
|
|
let openTagToken: Token|undefined;
|
2015-12-06 13:11:00 -08:00
|
|
|
try {
|
2019-02-26 13:06:26 +00:00
|
|
|
if (!chars.isAsciiLetter(this._cursor.peek())) {
|
|
|
|
throw this._createError(
|
|
|
|
_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(start));
|
2015-12-06 13:12:41 -08:00
|
|
|
}
|
2019-03-15 11:56:42 +01:00
|
|
|
|
2019-02-26 13:05:54 +00:00
|
|
|
openTagToken = this._consumeTagOpenStart(start);
|
|
|
|
prefix = openTagToken.parts[0];
|
|
|
|
tagName = openTagToken.parts[1];
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
2020-09-02 10:17:01 -05:00
|
|
|
while (this._cursor.peek() !== chars.$SLASH && this._cursor.peek() !== chars.$GT &&
|
2021-03-01 12:41:40 -08:00
|
|
|
this._cursor.peek() !== chars.$LT && this._cursor.peek() !== chars.$EOF) {
|
2015-12-06 13:11:00 -08:00
|
|
|
this._consumeAttributeName();
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
2016-06-17 10:57:50 -07:00
|
|
|
if (this._attemptCharCode(chars.$EQ)) {
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
2015-12-06 13:11:00 -08:00
|
|
|
this._consumeAttributeValue();
|
|
|
|
}
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-12-06 13:11:00 -08:00
|
|
|
this._consumeTagOpenEnd();
|
|
|
|
} catch (e) {
|
2016-06-22 17:25:42 -07:00
|
|
|
if (e instanceof _ControlFlowError) {
|
2019-02-26 13:06:26 +00:00
|
|
|
if (openTagToken) {
|
2020-09-02 10:17:01 -05:00
|
|
|
// We errored before we could close the opening tag, so it is incomplete.
|
|
|
|
openTagToken.type = TokenType.INCOMPLETE_TAG_OPEN;
|
|
|
|
} else {
|
|
|
|
// When the start tag is invalid, assume we want a "<" as text.
|
|
|
|
// Back to back text tokens are merged at the end.
|
|
|
|
this._beginToken(TokenType.TEXT, start);
|
|
|
|
this._endToken(['<']);
|
2019-02-26 13:06:26 +00:00
|
|
|
}
|
2015-12-06 13:11:00 -08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
throw e;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-12-06 13:11:00 -08:00
|
|
|
|
2020-12-25 10:56:51 +02:00
|
|
|
const contentTokenType = this._getTagDefinition(tagName).getContentType(prefix);
|
2016-07-21 13:56:58 -07:00
|
|
|
|
|
|
|
if (contentTokenType === TagContentType.RAW_TEXT) {
|
2019-02-26 13:05:54 +00:00
|
|
|
this._consumeRawTextWithTagClose(prefix, tagName, false);
|
2016-07-21 13:56:58 -07:00
|
|
|
} else if (contentTokenType === TagContentType.ESCAPABLE_RAW_TEXT) {
|
2019-02-26 13:05:54 +00:00
|
|
|
this._consumeRawTextWithTagClose(prefix, tagName, true);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-14 18:53:17 +01:00
|
|
|
private _consumeRawTextWithTagClose(prefix: string, tagName: string, consumeEntities: boolean) {
|
|
|
|
this._consumeRawText(consumeEntities, () => {
|
2019-02-26 13:06:26 +00:00
|
|
|
if (!this._attemptCharCode(chars.$LT)) return false;
|
2016-06-17 10:57:50 -07:00
|
|
|
if (!this._attemptCharCode(chars.$SLASH)) return false;
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
2019-02-26 13:05:54 +00:00
|
|
|
if (!this._attemptStrCaseInsensitive(tagName)) return false;
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
2016-06-24 14:31:35 -07:00
|
|
|
return this._attemptCharCode(chars.$GT);
|
2015-10-07 09:34:21 -07:00
|
|
|
});
|
2019-02-26 13:06:26 +00:00
|
|
|
this._beginToken(TokenType.TAG_CLOSE);
|
|
|
|
this._requireCharCodeUntilFn(code => code === chars.$GT, 3);
|
|
|
|
this._cursor.advance(); // Consume the `>`
|
2019-02-26 13:05:54 +00:00
|
|
|
this._endToken([prefix, tagName]);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
private _consumeTagOpenStart(start: CharacterCursor) {
|
2016-07-21 13:56:58 -07:00
|
|
|
this._beginToken(TokenType.TAG_OPEN_START, start);
|
2016-06-24 14:31:35 -07:00
|
|
|
const parts = this._consumePrefixAndName();
|
2019-02-26 13:05:54 +00:00
|
|
|
return this._endToken(parts);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
private _consumeAttributeName() {
|
2019-03-15 11:56:42 +01:00
|
|
|
const attrNameStart = this._cursor.peek();
|
|
|
|
if (attrNameStart === chars.$SQ || attrNameStart === chars.$DQ) {
|
|
|
|
throw this._createError(_unexpectedCharacterErrorMsg(attrNameStart), this._cursor.getSpan());
|
|
|
|
}
|
2016-07-21 13:56:58 -07:00
|
|
|
this._beginToken(TokenType.ATTR_NAME);
|
2016-06-24 14:31:35 -07:00
|
|
|
const prefixAndName = this._consumePrefixAndName();
|
2015-10-07 09:34:21 -07:00
|
|
|
this._endToken(prefixAndName);
|
|
|
|
}
|
|
|
|
|
|
|
|
private _consumeAttributeValue() {
|
2016-11-12 14:08:58 +01:00
|
|
|
let value: string;
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this._cursor.peek() === chars.$SQ || this._cursor.peek() === chars.$DQ) {
|
|
|
|
const quoteChar = this._cursor.peek();
|
2021-05-13 17:00:56 +01:00
|
|
|
this._consumeQuote(quoteChar);
|
|
|
|
this._consumeWithInterpolation(
|
|
|
|
TokenType.ATTR_VALUE_TEXT, TokenType.ATTR_VALUE_INTERPOLATION,
|
|
|
|
() => this._cursor.peek() === quoteChar);
|
|
|
|
this._consumeQuote(quoteChar);
|
2015-10-07 09:34:21 -07:00
|
|
|
} else {
|
2021-05-13 17:00:56 +01:00
|
|
|
const endPredicate = () => isNameEnd(this._cursor.peek());
|
|
|
|
this._consumeWithInterpolation(
|
|
|
|
TokenType.ATTR_VALUE_TEXT, TokenType.ATTR_VALUE_INTERPOLATION, endPredicate);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-13 17:00:56 +01:00
|
|
|
private _consumeQuote(quoteChar: number) {
|
|
|
|
this._beginToken(TokenType.ATTR_QUOTE);
|
|
|
|
this._requireCharCode(quoteChar);
|
|
|
|
this._endToken([String.fromCodePoint(quoteChar)]);
|
|
|
|
}
|
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
private _consumeTagOpenEnd() {
|
2016-07-21 13:56:58 -07:00
|
|
|
const tokenType =
|
|
|
|
this._attemptCharCode(chars.$SLASH) ? TokenType.TAG_OPEN_END_VOID : TokenType.TAG_OPEN_END;
|
2015-10-07 09:34:21 -07:00
|
|
|
this._beginToken(tokenType);
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$GT);
|
2015-10-07 09:34:21 -07:00
|
|
|
this._endToken([]);
|
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
private _consumeTagClose(start: CharacterCursor) {
|
2016-07-21 13:56:58 -07:00
|
|
|
this._beginToken(TokenType.TAG_CLOSE, start);
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
2016-11-12 14:08:58 +01:00
|
|
|
const prefixAndName = this._consumePrefixAndName();
|
2015-12-21 11:32:58 -08:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$GT);
|
2015-10-07 09:34:21 -07:00
|
|
|
this._endToken(prefixAndName);
|
|
|
|
}
|
|
|
|
|
2016-04-12 11:46:28 -07:00
|
|
|
private _consumeExpansionFormStart() {
|
2019-02-26 13:06:26 +00:00
|
|
|
this._beginToken(TokenType.EXPANSION_FORM_START);
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$LBRACE);
|
2016-04-12 11:46:28 -07:00
|
|
|
this._endToken([]);
|
|
|
|
|
2016-07-21 13:56:58 -07:00
|
|
|
this._expansionCaseStack.push(TokenType.EXPANSION_FORM_START);
|
2016-07-22 17:16:56 -07:00
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
this._beginToken(TokenType.RAW_TEXT);
|
2016-06-24 14:31:35 -07:00
|
|
|
const condition = this._readUntil(chars.$COMMA);
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
const normalizedCondition = this._processCarriageReturns(condition);
|
2020-08-25 21:27:00 +01:00
|
|
|
if (this._i18nNormalizeLineEndingsInICUs) {
|
|
|
|
// We explicitly want to normalize line endings for this text.
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
this._endToken([normalizedCondition]);
|
|
|
|
} else {
|
2020-08-25 21:27:00 +01:00
|
|
|
// We are not normalizing line endings.
|
fix(compiler): normalize line endings in ICU expansions (#36741)
The html parser already normalizes line endings (converting `\r\n` to `\n`)
for most text in templates but it was missing the expressions of ICU expansions.
In ViewEngine backticked literal strings, used to define inline templates,
were already normalized by the TypeScript parser.
In Ivy we are parsing the raw text of the source file directly so the line
endings need to be manually normalized.
This change ensures that inline templates have the line endings of ICU
expression normalized correctly, which matches the ViewEngine.
In ViewEngine external templates, defined in HTML files, the behavior was
different, since TypeScript was not normalizing the line endings.
Specifically, ICU expansion "expressions" are not being normalized.
This is a problem because it means that i18n message ids can be different on
different machines that are setup with different line ending handling,
or if the developer moves a template from inline to external or vice versa.
The goal is always to normalize line endings, whether inline or external.
But this would be a breaking change since it would change i18n message ids
that have been previously computed. Therefore this commit aligns the ivy
template parsing to have the same "buggy" behavior for external templates.
There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which
if set to `true` will ensure the correct non-buggy behavior. For the time
being this option defaults to `false` to ensure backward compatibility while
allowing opt-in to the desired behavior. This option's default will be
flipped in a future breaking change release.
Further, when this option is set to `false`, any ICU expression tokens,
which have not been normalized, are added to the `ParseResult` from the
`HtmlParser.parse()` method. In the future, this collection of tokens could
be used to diagnose and encourage developers to migrate their i18n message
ids. See FW-2106.
Closes #36725
PR Close #36741
2020-04-26 18:15:43 +01:00
|
|
|
const conditionToken = this._endToken([condition]);
|
|
|
|
if (normalizedCondition !== condition) {
|
|
|
|
this.nonNormalizedIcuExpressions.push(conditionToken);
|
|
|
|
}
|
|
|
|
}
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$COMMA);
|
2016-04-12 11:46:28 -07:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
this._beginToken(TokenType.RAW_TEXT);
|
2016-11-12 14:08:58 +01:00
|
|
|
const type = this._readUntil(chars.$COMMA);
|
2019-02-26 13:06:26 +00:00
|
|
|
this._endToken([type]);
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$COMMA);
|
2016-04-12 11:46:28 -07:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
|
|
|
}
|
|
|
|
|
|
|
|
private _consumeExpansionCaseStart() {
|
2019-02-26 13:06:26 +00:00
|
|
|
this._beginToken(TokenType.EXPANSION_CASE_VALUE);
|
2016-06-24 14:31:35 -07:00
|
|
|
const value = this._readUntil(chars.$LBRACE).trim();
|
2019-02-26 13:06:26 +00:00
|
|
|
this._endToken([value]);
|
2016-04-12 11:46:28 -07:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
this._beginToken(TokenType.EXPANSION_CASE_EXP_START);
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$LBRACE);
|
2019-02-26 13:06:26 +00:00
|
|
|
this._endToken([]);
|
2016-04-12 11:46:28 -07:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
|
|
|
|
2016-07-21 13:56:58 -07:00
|
|
|
this._expansionCaseStack.push(TokenType.EXPANSION_CASE_EXP_START);
|
2016-04-12 11:46:28 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
private _consumeExpansionCaseEnd() {
|
2019-02-26 13:06:26 +00:00
|
|
|
this._beginToken(TokenType.EXPANSION_CASE_EXP_END);
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$RBRACE);
|
2019-02-26 13:06:26 +00:00
|
|
|
this._endToken([]);
|
2016-04-12 11:46:28 -07:00
|
|
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
|
|
|
|
2016-06-09 13:48:53 -07:00
|
|
|
this._expansionCaseStack.pop();
|
2016-04-12 11:46:28 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
private _consumeExpansionFormEnd() {
|
2019-02-26 13:06:26 +00:00
|
|
|
this._beginToken(TokenType.EXPANSION_FORM_END);
|
2016-06-17 10:57:50 -07:00
|
|
|
this._requireCharCode(chars.$RBRACE);
|
2016-04-12 11:46:28 -07:00
|
|
|
this._endToken([]);
|
|
|
|
|
2016-06-09 13:48:53 -07:00
|
|
|
this._expansionCaseStack.pop();
|
2016-04-12 11:46:28 -07:00
|
|
|
}
|
|
|
|
|
2021-05-13 17:00:56 +01:00
|
|
|
/**
|
|
|
|
* Consume a string that may contain interpolation expressions.
|
|
|
|
* The first token consumed will be of `tokenType` and then there will be alternating
|
|
|
|
* `interpolationTokenType` and `tokenType` tokens until the `endPredicate()` returns true.
|
|
|
|
*
|
|
|
|
* @param textTokenType the kind of tokens to interleave around interpolation tokens.
|
|
|
|
* @param interpolationTokenType the kind of tokens that contain interpolation.
|
|
|
|
* @param endPredicate a function that should return true when we should stop consuming.
|
|
|
|
*/
|
|
|
|
private _consumeWithInterpolation(
|
|
|
|
textTokenType: TokenType, interpolationTokenType: TokenType, endPredicate: () => boolean) {
|
|
|
|
this._beginToken(textTokenType);
|
2016-06-24 14:31:35 -07:00
|
|
|
const parts: string[] = [];
|
2016-04-12 11:46:28 -07:00
|
|
|
|
2021-05-13 17:00:56 +01:00
|
|
|
while (!endPredicate()) {
|
2021-05-11 17:03:38 +01:00
|
|
|
const current = this._cursor.clone();
|
2016-07-21 13:56:58 -07:00
|
|
|
if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
|
2021-05-11 17:03:38 +01:00
|
|
|
this._endToken([this._processCarriageReturns(parts.join(''))], current);
|
2021-05-14 18:53:17 +01:00
|
|
|
parts.length = 0;
|
2021-05-13 17:00:56 +01:00
|
|
|
this._consumeInterpolation(interpolationTokenType, current);
|
2021-05-14 18:53:17 +01:00
|
|
|
this._beginToken(textTokenType);
|
|
|
|
} else if (this._cursor.peek() === chars.$AMPERSAND) {
|
|
|
|
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
2021-05-11 17:03:38 +01:00
|
|
|
parts.length = 0;
|
2021-05-14 18:53:17 +01:00
|
|
|
this._consumeEntity(textTokenType);
|
2021-05-13 17:00:56 +01:00
|
|
|
this._beginToken(textTokenType);
|
2016-04-12 11:46:28 -07:00
|
|
|
} else {
|
2021-05-14 18:53:17 +01:00
|
|
|
parts.push(this._readChar());
|
2016-04-12 11:46:28 -07:00
|
|
|
}
|
2021-05-13 17:00:56 +01:00
|
|
|
}
|
2016-06-20 09:52:41 -07:00
|
|
|
|
fix(compiler): do not allow unterminated interpolation to leak into later tokens (#42605)
When consuming a text token, the lexer tracks whether it is reading characters
from inside an interpolation so that it can identify invalid ICU expressions.
Inside an interpolation there will be no ICU expression so it is safe to
have unmatched `{` characters, but outside an interpolation this is an error.
Previously, if an interpolation was started, by an opening marker (e.g. `{{`)
in a text token but the text came to an end before the closing marker (e.g. `}}`)
then the lexer was not clearing its internal state that tracked that it was
inside an interpolation. When the next text token was being consumed,
the lexer, incorrectly thought it was already within an interpolation.
This resulted in invalid ICU expression errors not being reported.
For example, in the following snippet, the first text block has a prematurely
ended interpolation, and the second text block contains an invalid `{` character.
```
<div>{{</div>
<div>{</div>
```
Previously, the lexer would not have identified this as an error. Now there
will be an EOF error that looks like:
```
TS-995002: Unexpected character "EOF"
(Do you have an unescaped "{" in your template? Use "{{ '{' }}") to escape it.)
```
PR Close #42605
2021-06-19 11:25:53 +01:00
|
|
|
// It is possible that an interpolation was started but not ended inside this text token.
|
|
|
|
// Make sure that we reset the state of the lexer correctly.
|
|
|
|
this._inInterpolation = false;
|
|
|
|
|
2015-12-04 23:12:31 -08:00
|
|
|
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-11-10 15:56:25 -08:00
|
|
|
|
2021-05-13 17:00:56 +01:00
|
|
|
private _consumeInterpolation(
|
|
|
|
interpolationTokenType: TokenType, interpolationStart: CharacterCursor) {
|
2021-05-11 17:03:38 +01:00
|
|
|
const parts: string[] = [];
|
2021-05-13 17:00:56 +01:00
|
|
|
this._beginToken(interpolationTokenType, interpolationStart);
|
2021-05-11 17:03:38 +01:00
|
|
|
parts.push(this._interpolationConfig.start);
|
|
|
|
|
|
|
|
// Find the end of the interpolation, ignoring content inside quotes.
|
|
|
|
const expressionStart = this._cursor.clone();
|
2021-05-13 17:00:56 +01:00
|
|
|
let inQuote: number|null = null;
|
2021-05-11 17:03:38 +01:00
|
|
|
let inComment = false;
|
|
|
|
while (this._cursor.peek() !== chars.$EOF) {
|
|
|
|
const current = this._cursor.clone();
|
|
|
|
|
|
|
|
if (this._isTagStart()) {
|
|
|
|
// We are starting what looks like an HTML element in the middle of this interpolation.
|
|
|
|
// Reset the cursor to before the `<` character and end the interpolation token.
|
|
|
|
// (This is actually wrong but here for backward compatibility).
|
|
|
|
this._cursor = current;
|
|
|
|
parts.push(this._getProcessedChars(expressionStart, current));
|
|
|
|
return this._endToken(parts);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (inQuote === null) {
|
|
|
|
if (this._attemptStr(this._interpolationConfig.end)) {
|
|
|
|
// We are not in a string, and we hit the end interpolation marker
|
|
|
|
parts.push(this._getProcessedChars(expressionStart, current));
|
|
|
|
parts.push(this._interpolationConfig.end);
|
|
|
|
return this._endToken(parts);
|
|
|
|
} else if (this._attemptStr('//')) {
|
|
|
|
// Once we are in a comment we ignore any quotes
|
|
|
|
inComment = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-13 17:00:56 +01:00
|
|
|
const char = this._cursor.peek();
|
|
|
|
this._cursor.advance();
|
|
|
|
if (char === chars.$BACKSLASH) {
|
2021-05-11 17:03:38 +01:00
|
|
|
// Skip the next character because it was escaped.
|
2021-05-13 17:00:56 +01:00
|
|
|
this._cursor.advance();
|
2021-05-11 17:03:38 +01:00
|
|
|
} else if (char === inQuote) {
|
|
|
|
// Exiting the current quoted string
|
|
|
|
inQuote = null;
|
2021-05-13 17:00:56 +01:00
|
|
|
} else if (!inComment && chars.isQuote(char)) {
|
2021-05-11 17:03:38 +01:00
|
|
|
// Entering a new quoted string
|
|
|
|
inQuote = char;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We hit EOF without finding a closing interpolation marker
|
|
|
|
parts.push(this._getProcessedChars(expressionStart, this._cursor));
|
|
|
|
return this._endToken(parts);
|
|
|
|
}
|
|
|
|
|
|
|
|
private _getProcessedChars(start: CharacterCursor, end: CharacterCursor): string {
|
|
|
|
return this._processCarriageReturns(end.getChars(start))
|
|
|
|
}
|
|
|
|
|
2016-06-21 16:55:17 -07:00
|
|
|
private _isTextEnd(): boolean {
|
2021-06-21 21:20:44 +01:00
|
|
|
if (this._isTagStart() || this._cursor.peek() === chars.$EOF) {
|
2016-06-21 16:55:17 -07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-06-24 14:31:35 -07:00
|
|
|
if (this._tokenizeIcu && !this._inInterpolation) {
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this.isExpansionFormStart()) {
|
2016-06-21 16:55:17 -07:00
|
|
|
// start of an expansion form
|
2016-06-20 09:52:41 -07:00
|
|
|
return true;
|
2016-06-21 16:55:17 -07:00
|
|
|
}
|
|
|
|
|
2019-02-26 13:06:26 +00:00
|
|
|
if (this._cursor.peek() === chars.$RBRACE && this._isInExpansionCase()) {
|
2016-06-21 16:55:17 -07:00
|
|
|
// end of and expansion case
|
2016-04-13 16:01:25 -07:00
|
|
|
return true;
|
2016-06-21 16:55:17 -07:00
|
|
|
}
|
2016-04-12 11:46:28 -07:00
|
|
|
}
|
2016-06-21 16:55:17 -07:00
|
|
|
|
2016-04-12 11:46:28 -07:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-06-21 21:20:44 +01:00
|
|
|
/**
|
|
|
|
* Returns true if the current cursor is pointing to the start of a tag
|
|
|
|
* (opening/closing/comments/cdata/etc).
|
|
|
|
*/
|
|
|
|
private _isTagStart(): boolean {
|
|
|
|
if (this._cursor.peek() === chars.$LT) {
|
|
|
|
// We assume that `<` followed by whitespace is not the start of an HTML element.
|
|
|
|
const tmp = this._cursor.clone();
|
|
|
|
tmp.advance();
|
|
|
|
// If the next character is alphabetic, ! nor / then it is a tag start
|
|
|
|
const code = tmp.peek();
|
|
|
|
if ((chars.$a <= code && code <= chars.$z) || (chars.$A <= code && code <= chars.$Z) ||
|
|
|
|
code === chars.$SLASH || code === chars.$BANG) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-04-12 11:46:28 -07:00
|
|
|
private _readUntil(char: number): string {
|
2019-02-26 13:06:26 +00:00
|
|
|
const start = this._cursor.clone();
|
2016-04-12 11:46:28 -07:00
|
|
|
this._attemptUntilChar(char);
|
2019-02-26 13:06:26 +00:00
|
|
|
return this._cursor.getChars(start);
|
2015-11-10 15:56:25 -08:00
|
|
|
}
|
2016-04-13 16:01:25 -07:00
|
|
|
|
2016-06-09 13:48:53 -07:00
|
|
|
private _isInExpansionCase(): boolean {
|
|
|
|
return this._expansionCaseStack.length > 0 &&
|
2016-06-09 14:53:03 -07:00
|
|
|
this._expansionCaseStack[this._expansionCaseStack.length - 1] ===
|
2016-07-21 13:56:58 -07:00
|
|
|
TokenType.EXPANSION_CASE_EXP_START;
|
2016-04-13 16:01:25 -07:00
|
|
|
}
|
|
|
|
|
2016-06-09 13:48:53 -07:00
|
|
|
private _isInExpansionForm(): boolean {
|
|
|
|
return this._expansionCaseStack.length > 0 &&
|
2016-06-09 14:53:03 -07:00
|
|
|
this._expansionCaseStack[this._expansionCaseStack.length - 1] ===
|
2016-07-21 13:56:58 -07:00
|
|
|
TokenType.EXPANSION_FORM_START;
|
2016-04-13 16:01:25 -07:00
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
|
|
|
|
private isExpansionFormStart(): boolean {
|
|
|
|
if (this._cursor.peek() !== chars.$LBRACE) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (this._interpolationConfig) {
|
|
|
|
const start = this._cursor.clone();
|
|
|
|
const isInterpolation = this._attemptStr(this._interpolationConfig.start);
|
|
|
|
this._cursor = start;
|
|
|
|
return !isInterpolation;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
function isNotWhitespace(code: number): boolean {
|
2016-06-21 11:10:25 -07:00
|
|
|
return !chars.isWhitespace(code) || code === chars.$EOF;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
function isNameEnd(code: number): boolean {
|
2020-09-02 10:17:01 -05:00
|
|
|
return chars.isWhitespace(code) || code === chars.$GT || code === chars.$LT ||
|
2021-03-01 12:41:40 -08:00
|
|
|
code === chars.$SLASH || code === chars.$SQ || code === chars.$DQ || code === chars.$EQ ||
|
|
|
|
code === chars.$EOF;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
function isPrefixEnd(code: number): boolean {
|
2016-06-17 10:57:50 -07:00
|
|
|
return (code < chars.$a || chars.$z < code) && (code < chars.$A || chars.$Z < code) &&
|
|
|
|
(code < chars.$0 || code > chars.$9);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2015-11-10 15:56:25 -08:00
|
|
|
function isDigitEntityEnd(code: number): boolean {
|
2016-06-21 11:10:25 -07:00
|
|
|
return code == chars.$SEMICOLON || code == chars.$EOF || !chars.isAsciiHexDigit(code);
|
2015-11-10 15:56:25 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
function isNamedEntityEnd(code: number): boolean {
|
2016-06-21 11:10:25 -07:00
|
|
|
return code == chars.$SEMICOLON || code == chars.$EOF || !chars.isAsciiLetter(code);
|
2015-11-10 15:56:25 -08:00
|
|
|
}
|
|
|
|
|
2016-06-09 13:48:53 -07:00
|
|
|
function isExpansionCaseStart(peek: number): boolean {
|
2020-03-18 07:39:48 -04:00
|
|
|
return peek !== chars.$RBRACE;
|
2015-12-21 11:32:58 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
function compareCharCodeCaseInsensitive(code1: number, code2: number): boolean {
|
|
|
|
return toUpperCaseCharCode(code1) == toUpperCaseCharCode(code2);
|
|
|
|
}
|
|
|
|
|
|
|
|
function toUpperCaseCharCode(code: number): number {
|
2016-06-17 10:57:50 -07:00
|
|
|
return code >= chars.$a && code <= chars.$z ? code - chars.$a + chars.$A : code;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-12-06 13:11:00 -08:00
|
|
|
|
2016-07-21 13:56:58 -07:00
|
|
|
function mergeTextTokens(srcTokens: Token[]): Token[] {
|
2016-11-12 14:08:58 +01:00
|
|
|
const dstTokens: Token[] = [];
|
2017-03-24 09:59:58 -07:00
|
|
|
let lastDstToken: Token|undefined = undefined;
|
2015-12-06 13:11:00 -08:00
|
|
|
for (let i = 0; i < srcTokens.length; i++) {
|
2016-11-12 14:08:58 +01:00
|
|
|
const token = srcTokens[i];
|
2021-05-14 18:53:17 +01:00
|
|
|
if ((lastDstToken && lastDstToken.type == TokenType.TEXT && token.type == TokenType.TEXT) ||
|
|
|
|
(lastDstToken && lastDstToken.type == TokenType.ATTR_VALUE_TEXT &&
|
|
|
|
token.type == TokenType.ATTR_VALUE_TEXT)) {
|
2020-04-08 10:14:18 -07:00
|
|
|
lastDstToken.parts[0]! += token.parts[0];
|
2015-12-06 13:11:00 -08:00
|
|
|
lastDstToken.sourceSpan.end = token.sourceSpan.end;
|
|
|
|
} else {
|
|
|
|
lastDstToken = token;
|
|
|
|
dstTokens.push(lastDstToken);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return dstTokens;
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The _Tokenizer uses objects of this type to move through the input text,
|
|
|
|
* extracting "parsed characters". These could be more than one actual character
|
|
|
|
* if the text contains escape sequences.
|
|
|
|
*/
|
|
|
|
interface CharacterCursor {
|
|
|
|
/** Initialize the cursor. */
|
|
|
|
init(): void;
|
|
|
|
/** The parsed character at the current cursor position. */
|
|
|
|
peek(): number;
|
|
|
|
/** Advance the cursor by one parsed character. */
|
|
|
|
advance(): void;
|
|
|
|
/** Get a span from the marked start point to the current point. */
|
2019-04-24 20:40:55 +01:00
|
|
|
getSpan(start?: this, leadingTriviaCodePoints?: number[]): ParseSourceSpan;
|
2019-02-26 13:06:26 +00:00
|
|
|
/** Get the parsed characters from the marked start point to the current point. */
|
|
|
|
getChars(start: this): string;
|
|
|
|
/** The number of characters left before the end of the cursor. */
|
|
|
|
charsLeft(): number;
|
|
|
|
/** The number of characters between `this` cursor and `other` cursor. */
|
|
|
|
diff(other: this): number;
|
|
|
|
/** Make a copy of this cursor */
|
|
|
|
clone(): CharacterCursor;
|
|
|
|
}
|
|
|
|
|
|
|
|
interface CursorState {
|
|
|
|
peek: number;
|
|
|
|
offset: number;
|
|
|
|
line: number;
|
|
|
|
column: number;
|
|
|
|
}
|
|
|
|
|
|
|
|
class PlainCharacterCursor implements CharacterCursor {
|
|
|
|
protected state: CursorState;
|
|
|
|
protected file: ParseSourceFile;
|
|
|
|
protected input: string;
|
|
|
|
protected end: number;
|
|
|
|
|
|
|
|
constructor(fileOrCursor: PlainCharacterCursor);
|
|
|
|
constructor(fileOrCursor: ParseSourceFile, range: LexerRange);
|
|
|
|
constructor(fileOrCursor: ParseSourceFile|PlainCharacterCursor, range?: LexerRange) {
|
|
|
|
if (fileOrCursor instanceof PlainCharacterCursor) {
|
|
|
|
this.file = fileOrCursor.file;
|
|
|
|
this.input = fileOrCursor.input;
|
|
|
|
this.end = fileOrCursor.end;
|
2019-12-08 14:57:07 +01:00
|
|
|
|
|
|
|
const state = fileOrCursor.state;
|
|
|
|
// Note: avoid using `{...fileOrCursor.state}` here as that has a severe performance penalty.
|
|
|
|
// In ES5 bundles the object spread operator is translated into the `__assign` helper, which
|
|
|
|
// is not optimized by VMs as efficiently as a raw object literal. Since this constructor is
|
|
|
|
// called in tight loops, this difference matters.
|
|
|
|
this.state = {
|
|
|
|
peek: state.peek,
|
|
|
|
offset: state.offset,
|
|
|
|
line: state.line,
|
|
|
|
column: state.column,
|
|
|
|
};
|
2019-02-26 13:06:26 +00:00
|
|
|
} else {
|
|
|
|
if (!range) {
|
|
|
|
throw new Error(
|
|
|
|
'Programming error: the range argument must be provided with a file argument.');
|
|
|
|
}
|
|
|
|
this.file = fileOrCursor;
|
|
|
|
this.input = fileOrCursor.content;
|
|
|
|
this.end = range.endPos;
|
|
|
|
this.state = {
|
|
|
|
peek: -1,
|
|
|
|
offset: range.startPos,
|
|
|
|
line: range.startLine,
|
|
|
|
column: range.startCol,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-08 10:14:18 -07:00
|
|
|
clone(): PlainCharacterCursor {
|
|
|
|
return new PlainCharacterCursor(this);
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
|
2020-04-08 10:14:18 -07:00
|
|
|
peek() {
|
|
|
|
return this.state.peek;
|
|
|
|
}
|
|
|
|
charsLeft() {
|
|
|
|
return this.end - this.state.offset;
|
|
|
|
}
|
|
|
|
diff(other: this) {
|
|
|
|
return this.state.offset - other.state.offset;
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
|
2020-04-08 10:14:18 -07:00
|
|
|
advance(): void {
|
|
|
|
this.advanceState(this.state);
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
|
2020-04-08 10:14:18 -07:00
|
|
|
init(): void {
|
|
|
|
this.updatePeek(this.state);
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
|
2019-04-24 20:40:55 +01:00
|
|
|
getSpan(start?: this, leadingTriviaCodePoints?: number[]): ParseSourceSpan {
|
2019-02-26 13:06:26 +00:00
|
|
|
start = start || this;
|
2020-10-28 21:37:24 +00:00
|
|
|
let fullStart = start;
|
2019-04-24 20:40:55 +01:00
|
|
|
if (leadingTriviaCodePoints) {
|
|
|
|
while (this.diff(start) > 0 && leadingTriviaCodePoints.indexOf(start.peek()) !== -1) {
|
2020-10-28 21:37:24 +00:00
|
|
|
if (fullStart === start) {
|
2019-12-08 14:57:07 +01:00
|
|
|
start = start.clone() as this;
|
|
|
|
}
|
2019-04-24 20:40:55 +01:00
|
|
|
start.advance();
|
|
|
|
}
|
|
|
|
}
|
2020-10-28 21:37:24 +00:00
|
|
|
const startLocation = this.locationFromCursor(start);
|
|
|
|
const endLocation = this.locationFromCursor(this);
|
|
|
|
const fullStartLocation =
|
|
|
|
fullStart !== start ? this.locationFromCursor(fullStart) : startLocation;
|
|
|
|
return new ParseSourceSpan(startLocation, endLocation, fullStartLocation);
|
2019-02-26 13:06:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
getChars(start: this): string {
|
|
|
|
return this.input.substring(start.state.offset, this.state.offset);
|
|
|
|
}
|
|
|
|
|
2020-04-08 10:14:18 -07:00
|
|
|
charAt(pos: number): number {
|
|
|
|
return this.input.charCodeAt(pos);
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
|
|
|
|
protected advanceState(state: CursorState) {
|
|
|
|
if (state.offset >= this.end) {
|
|
|
|
this.state = state;
|
|
|
|
throw new CursorError('Unexpected character "EOF"', this);
|
|
|
|
}
|
|
|
|
const currentChar = this.charAt(state.offset);
|
|
|
|
if (currentChar === chars.$LF) {
|
|
|
|
state.line++;
|
|
|
|
state.column = 0;
|
|
|
|
} else if (!chars.isNewLine(currentChar)) {
|
|
|
|
state.column++;
|
|
|
|
}
|
|
|
|
state.offset++;
|
|
|
|
this.updatePeek(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
protected updatePeek(state: CursorState): void {
|
|
|
|
state.peek = state.offset >= this.end ? chars.$EOF : this.charAt(state.offset);
|
|
|
|
}
|
2020-10-28 21:37:24 +00:00
|
|
|
|
|
|
|
private locationFromCursor(cursor: this): ParseLocation {
|
|
|
|
return new ParseLocation(
|
|
|
|
cursor.file, cursor.state.offset, cursor.state.line, cursor.state.column);
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
class EscapedCharacterCursor extends PlainCharacterCursor {
|
|
|
|
protected internalState: CursorState;
|
|
|
|
|
|
|
|
constructor(fileOrCursor: EscapedCharacterCursor);
|
|
|
|
constructor(fileOrCursor: ParseSourceFile, range: LexerRange);
|
|
|
|
constructor(fileOrCursor: ParseSourceFile|EscapedCharacterCursor, range?: LexerRange) {
|
|
|
|
if (fileOrCursor instanceof EscapedCharacterCursor) {
|
|
|
|
super(fileOrCursor);
|
|
|
|
this.internalState = {...fileOrCursor.internalState};
|
|
|
|
} else {
|
2020-04-08 10:14:18 -07:00
|
|
|
super(fileOrCursor, range!);
|
2019-02-26 13:06:26 +00:00
|
|
|
this.internalState = this.state;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-07 17:10:51 +02:00
|
|
|
override advance(): void {
|
2019-02-26 13:06:26 +00:00
|
|
|
this.state = this.internalState;
|
|
|
|
super.advance();
|
|
|
|
this.processEscapeSequence();
|
|
|
|
}
|
|
|
|
|
2021-06-07 17:10:51 +02:00
|
|
|
override init(): void {
|
2019-02-26 13:06:26 +00:00
|
|
|
super.init();
|
|
|
|
this.processEscapeSequence();
|
|
|
|
}
|
|
|
|
|
2021-06-07 17:10:51 +02:00
|
|
|
override clone(): EscapedCharacterCursor {
|
2020-04-08 10:14:18 -07:00
|
|
|
return new EscapedCharacterCursor(this);
|
|
|
|
}
|
2019-02-26 13:06:26 +00:00
|
|
|
|
2021-06-07 17:10:51 +02:00
|
|
|
override getChars(start: this): string {
|
2019-02-26 13:06:26 +00:00
|
|
|
const cursor = start.clone();
|
|
|
|
let chars = '';
|
|
|
|
while (cursor.internalState.offset < this.internalState.offset) {
|
|
|
|
chars += String.fromCodePoint(cursor.peek());
|
|
|
|
cursor.advance();
|
|
|
|
}
|
|
|
|
return chars;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Process the escape sequence that starts at the current position in the text.
|
|
|
|
*
|
|
|
|
* This method is called to ensure that `peek` has the unescaped value of escape sequences.
|
|
|
|
*/
|
|
|
|
protected processEscapeSequence(): void {
|
|
|
|
const peek = () => this.internalState.peek;
|
|
|
|
|
|
|
|
if (peek() === chars.$BACKSLASH) {
|
|
|
|
// We have hit an escape sequence so we need the internal state to become independent
|
|
|
|
// of the external state.
|
|
|
|
this.internalState = {...this.state};
|
|
|
|
|
|
|
|
// Move past the backslash
|
|
|
|
this.advanceState(this.internalState);
|
|
|
|
|
|
|
|
// First check for standard control char sequences
|
|
|
|
if (peek() === chars.$n) {
|
|
|
|
this.state.peek = chars.$LF;
|
|
|
|
} else if (peek() === chars.$r) {
|
|
|
|
this.state.peek = chars.$CR;
|
|
|
|
} else if (peek() === chars.$v) {
|
|
|
|
this.state.peek = chars.$VTAB;
|
|
|
|
} else if (peek() === chars.$t) {
|
|
|
|
this.state.peek = chars.$TAB;
|
|
|
|
} else if (peek() === chars.$b) {
|
|
|
|
this.state.peek = chars.$BSPACE;
|
|
|
|
} else if (peek() === chars.$f) {
|
|
|
|
this.state.peek = chars.$FF;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now consider more complex sequences
|
|
|
|
else if (peek() === chars.$u) {
|
|
|
|
// Unicode code-point sequence
|
|
|
|
this.advanceState(this.internalState); // advance past the `u` char
|
|
|
|
if (peek() === chars.$LBRACE) {
|
|
|
|
// Variable length Unicode, e.g. `\x{123}`
|
|
|
|
this.advanceState(this.internalState); // advance past the `{` char
|
|
|
|
// Advance past the variable number of hex digits until we hit a `}` char
|
|
|
|
const digitStart = this.clone();
|
|
|
|
let length = 0;
|
|
|
|
while (peek() !== chars.$RBRACE) {
|
|
|
|
this.advanceState(this.internalState);
|
|
|
|
length++;
|
|
|
|
}
|
|
|
|
this.state.peek = this.decodeHexDigits(digitStart, length);
|
|
|
|
} else {
|
|
|
|
// Fixed length Unicode, e.g. `\u1234`
|
|
|
|
const digitStart = this.clone();
|
|
|
|
this.advanceState(this.internalState);
|
|
|
|
this.advanceState(this.internalState);
|
|
|
|
this.advanceState(this.internalState);
|
|
|
|
this.state.peek = this.decodeHexDigits(digitStart, 4);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (peek() === chars.$x) {
|
|
|
|
// Hex char code, e.g. `\x2F`
|
|
|
|
this.advanceState(this.internalState); // advance past the `x` char
|
|
|
|
const digitStart = this.clone();
|
|
|
|
this.advanceState(this.internalState);
|
|
|
|
this.state.peek = this.decodeHexDigits(digitStart, 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (chars.isOctalDigit(peek())) {
|
|
|
|
// Octal char code, e.g. `\012`,
|
|
|
|
let octal = '';
|
|
|
|
let length = 0;
|
|
|
|
let previous = this.clone();
|
|
|
|
while (chars.isOctalDigit(peek()) && length < 3) {
|
|
|
|
previous = this.clone();
|
|
|
|
octal += String.fromCodePoint(peek());
|
|
|
|
this.advanceState(this.internalState);
|
|
|
|
length++;
|
|
|
|
}
|
|
|
|
this.state.peek = parseInt(octal, 8);
|
|
|
|
// Backup one char
|
|
|
|
this.internalState = previous.internalState;
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (chars.isNewLine(this.internalState.peek)) {
|
|
|
|
// Line continuation `\` followed by a new line
|
|
|
|
this.advanceState(this.internalState); // advance over the newline
|
|
|
|
this.state = this.internalState;
|
|
|
|
}
|
|
|
|
|
|
|
|
else {
|
|
|
|
// If none of the `if` blocks were executed then we just have an escaped normal character.
|
|
|
|
// In that case we just, effectively, skip the backslash from the character.
|
|
|
|
this.state.peek = this.internalState.peek;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected decodeHexDigits(start: EscapedCharacterCursor, length: number): number {
|
|
|
|
const hex = this.input.substr(start.internalState.offset, length);
|
|
|
|
const charCode = parseInt(hex, 16);
|
|
|
|
if (!isNaN(charCode)) {
|
|
|
|
return charCode;
|
|
|
|
} else {
|
|
|
|
start.state = start.internalState;
|
|
|
|
throw new CursorError('Invalid hexadecimal escape sequence', start);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export class CursorError {
|
|
|
|
constructor(public msg: string, public cursor: CharacterCursor) {}
|
|
|
|
}
|