393 lines
14 KiB
TypeScript
Raw Normal View History

2016-07-21 13:56:58 -07:00
/**
* @license
* Copyright Google LLC All Rights Reserved.
2016-07-21 13:56:58 -07:00
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
import {ParseError, ParseSourceSpan} from '../parse_util';
2016-07-21 13:56:58 -07:00
import * as html from './ast';
import * as lex from './lexer';
import {getNsPrefix, mergeNsAndName, splitNsName, TagDefinition} from './tags';
2016-07-21 13:56:58 -07:00
export class TreeError extends ParseError {
static create(elementName: string|null, span: ParseSourceSpan, msg: string): TreeError {
2016-07-21 13:56:58 -07:00
return new TreeError(elementName, span, msg);
}
constructor(public elementName: string|null, span: ParseSourceSpan, msg: string) {
super(span, msg);
}
2016-07-21 13:56:58 -07:00
}
export class ParseTreeResult {
constructor(public rootNodes: html.Node[], public errors: ParseError[]) {}
}
export class Parser {
constructor(public getTagDefinition: (tagName: string) => TagDefinition) {}
2016-07-21 13:56:58 -07:00
parse(source: string, url: string, options?: lex.TokenizeOptions): ParseTreeResult {
const tokenizeResult = lex.tokenize(source, url, this.getTagDefinition, options);
const parser = new _TreeBuilder(tokenizeResult.tokens, this.getTagDefinition);
parser.build();
2016-07-21 13:56:58 -07:00
return new ParseTreeResult(
fix(compiler): normalize line endings in ICU expansions (#36741) The html parser already normalizes line endings (converting `\r\n` to `\n`) for most text in templates but it was missing the expressions of ICU expansions. In ViewEngine backticked literal strings, used to define inline templates, were already normalized by the TypeScript parser. In Ivy we are parsing the raw text of the source file directly so the line endings need to be manually normalized. This change ensures that inline templates have the line endings of ICU expression normalized correctly, which matches the ViewEngine. In ViewEngine external templates, defined in HTML files, the behavior was different, since TypeScript was not normalizing the line endings. Specifically, ICU expansion "expressions" are not being normalized. This is a problem because it means that i18n message ids can be different on different machines that are setup with different line ending handling, or if the developer moves a template from inline to external or vice versa. The goal is always to normalize line endings, whether inline or external. But this would be a breaking change since it would change i18n message ids that have been previously computed. Therefore this commit aligns the ivy template parsing to have the same "buggy" behavior for external templates. There is now a compiler option `i18nNormalizeLineEndingsInICUs`, which if set to `true` will ensure the correct non-buggy behavior. For the time being this option defaults to `false` to ensure backward compatibility while allowing opt-in to the desired behavior. This option's default will be flipped in a future breaking change release. Further, when this option is set to `false`, any ICU expression tokens, which have not been normalized, are added to the `ParseResult` from the `HtmlParser.parse()` method. In the future, this collection of tokens could be used to diagnose and encourage developers to migrate their i18n message ids. See FW-2106. Closes #36725 PR Close #36741
2020-04-26 18:15:43 +01:00
parser.rootNodes,
(tokenizeResult.errors as ParseError[]).concat(parser.errors),
);
2016-07-21 13:56:58 -07:00
}
}
class _TreeBuilder {
private _index: number = -1;
// `_peek` will be initialized by the call to `advance()` in the constructor.
private _peek!: lex.Token;
2016-07-21 13:56:58 -07:00
private _elementStack: html.Element[] = [];
rootNodes: html.Node[] = [];
errors: TreeError[] = [];
2016-07-21 13:56:58 -07:00
constructor(
private tokens: lex.Token[], private getTagDefinition: (tagName: string) => TagDefinition) {
this._advance();
}
build(): void {
2016-07-21 13:56:58 -07:00
while (this._peek.type !== lex.TokenType.EOF) {
if (this._peek.type === lex.TokenType.TAG_OPEN_START ||
this._peek.type === lex.TokenType.INCOMPLETE_TAG_OPEN) {
2016-07-21 13:56:58 -07:00
this._consumeStartTag(this._advance());
} else if (this._peek.type === lex.TokenType.TAG_CLOSE) {
this._consumeEndTag(this._advance());
} else if (this._peek.type === lex.TokenType.CDATA_START) {
this._closeVoidElement();
this._consumeCdata(this._advance());
} else if (this._peek.type === lex.TokenType.COMMENT_START) {
this._closeVoidElement();
this._consumeComment(this._advance());
} else if (
this._peek.type === lex.TokenType.TEXT || this._peek.type === lex.TokenType.RAW_TEXT ||
this._peek.type === lex.TokenType.ESCAPABLE_RAW_TEXT) {
this._closeVoidElement();
this._consumeText(this._advance());
} else if (this._peek.type === lex.TokenType.EXPANSION_FORM_START) {
this._consumeExpansion(this._advance());
} else {
// Skip all other tokens...
this._advance();
}
}
}
private _advance(): lex.Token {
const prev = this._peek;
if (this._index < this.tokens.length - 1) {
// Note: there is always an EOF token at the end
this._index++;
}
this._peek = this.tokens[this._index];
return prev;
}
private _advanceIf(type: lex.TokenType): lex.Token|null {
2016-07-21 13:56:58 -07:00
if (this._peek.type === type) {
return this._advance();
}
return null;
}
private _consumeCdata(_startToken: lex.Token) {
2016-07-21 13:56:58 -07:00
this._consumeText(this._advance());
this._advanceIf(lex.TokenType.CDATA_END);
}
private _consumeComment(token: lex.Token) {
const text = this._advanceIf(lex.TokenType.RAW_TEXT);
this._advanceIf(lex.TokenType.COMMENT_END);
2017-03-02 09:37:01 -08:00
const value = text != null ? text.parts[0].trim() : null;
2016-07-21 13:56:58 -07:00
this._addToParent(new html.Comment(value, token.sourceSpan));
}
private _consumeExpansion(token: lex.Token) {
const switchValue = this._advance();
const type = this._advance();
const cases: html.ExpansionCase[] = [];
// read =
while (this._peek.type === lex.TokenType.EXPANSION_CASE_VALUE) {
const expCase = this._parseExpansionCase();
if (!expCase) return; // error
2016-07-21 13:56:58 -07:00
cases.push(expCase);
}
// read the final }
if (this._peek.type !== lex.TokenType.EXPANSION_FORM_END) {
this.errors.push(
2016-07-21 13:56:58 -07:00
TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '}'.`));
return;
}
const sourceSpan = new ParseSourceSpan(
token.sourceSpan.start, this._peek.sourceSpan.end, token.sourceSpan.fullStart);
2016-07-21 13:56:58 -07:00
this._addToParent(new html.Expansion(
switchValue.parts[0], type.parts[0], cases, sourceSpan, switchValue.sourceSpan));
this._advance();
}
private _parseExpansionCase(): html.ExpansionCase|null {
2016-07-21 13:56:58 -07:00
const value = this._advance();
// read {
if (this._peek.type !== lex.TokenType.EXPANSION_CASE_EXP_START) {
this.errors.push(
2016-07-21 13:56:58 -07:00
TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '{'.`));
return null;
}
// read until }
const start = this._advance();
const exp = this._collectExpansionExpTokens(start);
if (!exp) return null;
2016-07-21 13:56:58 -07:00
const end = this._advance();
exp.push(new lex.Token(lex.TokenType.EOF, [], end.sourceSpan));
// parse everything in between { and }
const expansionCaseParser = new _TreeBuilder(exp, this.getTagDefinition);
expansionCaseParser.build();
if (expansionCaseParser.errors.length > 0) {
this.errors = this.errors.concat(expansionCaseParser.errors);
2016-07-21 13:56:58 -07:00
return null;
}
const sourceSpan =
new ParseSourceSpan(value.sourceSpan.start, end.sourceSpan.end, value.sourceSpan.fullStart);
const expSourceSpan =
new ParseSourceSpan(start.sourceSpan.start, end.sourceSpan.end, start.sourceSpan.fullStart);
2016-07-21 13:56:58 -07:00
return new html.ExpansionCase(
value.parts[0], expansionCaseParser.rootNodes, sourceSpan, value.sourceSpan, expSourceSpan);
2016-07-21 13:56:58 -07:00
}
private _collectExpansionExpTokens(start: lex.Token): lex.Token[]|null {
2016-07-21 13:56:58 -07:00
const exp: lex.Token[] = [];
const expansionFormStack = [lex.TokenType.EXPANSION_CASE_EXP_START];
while (true) {
if (this._peek.type === lex.TokenType.EXPANSION_FORM_START ||
this._peek.type === lex.TokenType.EXPANSION_CASE_EXP_START) {
expansionFormStack.push(this._peek.type);
}
if (this._peek.type === lex.TokenType.EXPANSION_CASE_EXP_END) {
if (lastOnStack(expansionFormStack, lex.TokenType.EXPANSION_CASE_EXP_START)) {
expansionFormStack.pop();
if (expansionFormStack.length == 0) return exp;
} else {
this.errors.push(
2016-07-21 13:56:58 -07:00
TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
return null;
}
}
if (this._peek.type === lex.TokenType.EXPANSION_FORM_END) {
if (lastOnStack(expansionFormStack, lex.TokenType.EXPANSION_FORM_START)) {
expansionFormStack.pop();
} else {
this.errors.push(
2016-07-21 13:56:58 -07:00
TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
return null;
}
}
if (this._peek.type === lex.TokenType.EOF) {
this.errors.push(
2016-07-21 13:56:58 -07:00
TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
return null;
}
exp.push(this._advance());
}
}
private _consumeText(token: lex.Token) {
let text = token.parts[0];
if (text.length > 0 && text[0] == '\n') {
const parent = this._getParentElement();
2017-03-02 09:37:01 -08:00
if (parent != null && parent.children.length == 0 &&
2016-07-21 13:56:58 -07:00
this.getTagDefinition(parent.name).ignoreFirstLf) {
text = text.substring(1);
}
}
if (text.length > 0) {
this._addToParent(new html.Text(text, token.sourceSpan));
}
}
private _closeVoidElement(): void {
const el = this._getParentElement();
if (el && this.getTagDefinition(el.name).isVoid) {
this._elementStack.pop();
2016-07-21 13:56:58 -07:00
}
}
private _consumeStartTag(startTagToken: lex.Token) {
const [prefix, name] = startTagToken.parts;
2016-07-21 13:56:58 -07:00
const attrs: html.Attribute[] = [];
while (this._peek.type === lex.TokenType.ATTR_NAME) {
attrs.push(this._consumeAttr(this._advance()));
}
const fullName = this._getElementFullName(prefix, name, this._getParentElement());
let selfClosing = false;
// Note: There could have been a tokenizer error
// so that we don't get a token for the end tag...
if (this._peek.type === lex.TokenType.TAG_OPEN_END_VOID) {
this._advance();
selfClosing = true;
const tagDef = this.getTagDefinition(fullName);
if (!(tagDef.canSelfClose || getNsPrefix(fullName) !== null || tagDef.isVoid)) {
this.errors.push(TreeError.create(
2016-07-21 13:56:58 -07:00
fullName, startTagToken.sourceSpan,
`Only void and foreign elements can be self closed "${startTagToken.parts[1]}"`));
}
} else if (this._peek.type === lex.TokenType.TAG_OPEN_END) {
this._advance();
selfClosing = false;
}
const end = this._peek.sourceSpan.start;
const span = new ParseSourceSpan(
startTagToken.sourceSpan.start, end, startTagToken.sourceSpan.fullStart);
// Create a separate `startSpan` because `span` will be modified when there is an `end` span.
const startSpan = new ParseSourceSpan(
startTagToken.sourceSpan.start, end, startTagToken.sourceSpan.fullStart);
const el = new html.Element(fullName, attrs, [], span, startSpan, undefined);
2016-07-21 13:56:58 -07:00
this._pushElement(el);
if (selfClosing) {
fix(compiler): properly associate source spans for implicitly closed elements (#38126) HTML is very lenient when it comes to closing elements, so Angular's parser has rules that specify which elements are implicitly closed when closing a tag. The parser keeps track of the nesting of tag names using a stack and parsing a closing tag will pop as many elements off the stack as possible, provided that the elements can be implicitly closed. For example, consider the following templates: - `<div><br></div>`, the `<br>` is implicitly closed when parsing `</div>`, because `<br>` is a void element. - `<div><p></div>`, the `<p>` is implicitly closed when parsing `</div>`, as `<p>` is allowed to be closed by the closing of its parent element. - `<ul><li>A <li>B</ul>`, the first `<li>` is implicitly closed when parsing the second `<li>`, whereas the second `<li>` would be implicitly closed when parsing the `</ul>`. In all the cases above the parsed structure would be correct, however the source span of the closing `</div>` would incorrectly be assigned to the element that is implicitly closed. The problem was that closing an element would associate the source span with the element at the top of the stack, however this may not be the element that is actually being closed if some elements would be implicitly closed. This commit fixes the issue by assigning the end source span with the element on the stack that is actually being closed. Any implicitly closed elements that are popped off the stack will not be assigned an end source span, as the implicit closing implies that no ending element is present. Note that there is a difference between self-closed elements such as `<input/>` and implicitly closed elements such as `<input>`. The former does have an end source span (identical to its start source span) whereas the latter does not. Fixes #36118 Resolves FW-2004 PR Close #38126
2020-07-18 00:05:11 +02:00
// Elements that are self-closed have their `endSourceSpan` set to the full span, as the
// element start tag also represents the end tag.
this._popElement(fullName, span);
} else if (startTagToken.type === lex.TokenType.INCOMPLETE_TAG_OPEN) {
// We already know the opening tag is not complete, so it is unlikely it has a corresponding
// close tag. Let's optimistically parse it as a full element and emit an error.
this._popElement(fullName, null);
this.errors.push(
TreeError.create(fullName, span, `Opening tag "${fullName}" not terminated.`));
2016-07-21 13:56:58 -07:00
}
}
private _pushElement(el: html.Element) {
const parentEl = this._getParentElement();
if (parentEl && this.getTagDefinition(parentEl.name).isClosedByChild(el.name)) {
this._elementStack.pop();
2016-07-21 13:56:58 -07:00
}
this._addToParent(el);
this._elementStack.push(el);
}
private _consumeEndTag(endTagToken: lex.Token) {
const fullName = this._getElementFullName(
endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
if (this.getTagDefinition(fullName).isVoid) {
this.errors.push(TreeError.create(
2016-07-21 13:56:58 -07:00
fullName, endTagToken.sourceSpan,
`Void elements do not have end tags "${endTagToken.parts[1]}"`));
fix(compiler): properly associate source spans for implicitly closed elements (#38126) HTML is very lenient when it comes to closing elements, so Angular's parser has rules that specify which elements are implicitly closed when closing a tag. The parser keeps track of the nesting of tag names using a stack and parsing a closing tag will pop as many elements off the stack as possible, provided that the elements can be implicitly closed. For example, consider the following templates: - `<div><br></div>`, the `<br>` is implicitly closed when parsing `</div>`, because `<br>` is a void element. - `<div><p></div>`, the `<p>` is implicitly closed when parsing `</div>`, as `<p>` is allowed to be closed by the closing of its parent element. - `<ul><li>A <li>B</ul>`, the first `<li>` is implicitly closed when parsing the second `<li>`, whereas the second `<li>` would be implicitly closed when parsing the `</ul>`. In all the cases above the parsed structure would be correct, however the source span of the closing `</div>` would incorrectly be assigned to the element that is implicitly closed. The problem was that closing an element would associate the source span with the element at the top of the stack, however this may not be the element that is actually being closed if some elements would be implicitly closed. This commit fixes the issue by assigning the end source span with the element on the stack that is actually being closed. Any implicitly closed elements that are popped off the stack will not be assigned an end source span, as the implicit closing implies that no ending element is present. Note that there is a difference between self-closed elements such as `<input/>` and implicitly closed elements such as `<input>`. The former does have an end source span (identical to its start source span) whereas the latter does not. Fixes #36118 Resolves FW-2004 PR Close #38126
2020-07-18 00:05:11 +02:00
} else if (!this._popElement(fullName, endTagToken.sourceSpan)) {
const errMsg = `Unexpected closing tag "${
fullName}". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags`;
this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, errMsg));
2016-07-21 13:56:58 -07:00
}
}
/**
* Closes the nearest element with the tag name `fullName` in the parse tree.
* `endSourceSpan` is the span of the closing tag, or null if the element does
* not have a closing tag (for example, this happens when an incomplete
* opening tag is recovered).
*/
private _popElement(fullName: string, endSourceSpan: ParseSourceSpan|null): boolean {
2016-07-21 13:56:58 -07:00
for (let stackIndex = this._elementStack.length - 1; stackIndex >= 0; stackIndex--) {
const el = this._elementStack[stackIndex];
if (el.name == fullName) {
fix(compiler): properly associate source spans for implicitly closed elements (#38126) HTML is very lenient when it comes to closing elements, so Angular's parser has rules that specify which elements are implicitly closed when closing a tag. The parser keeps track of the nesting of tag names using a stack and parsing a closing tag will pop as many elements off the stack as possible, provided that the elements can be implicitly closed. For example, consider the following templates: - `<div><br></div>`, the `<br>` is implicitly closed when parsing `</div>`, because `<br>` is a void element. - `<div><p></div>`, the `<p>` is implicitly closed when parsing `</div>`, as `<p>` is allowed to be closed by the closing of its parent element. - `<ul><li>A <li>B</ul>`, the first `<li>` is implicitly closed when parsing the second `<li>`, whereas the second `<li>` would be implicitly closed when parsing the `</ul>`. In all the cases above the parsed structure would be correct, however the source span of the closing `</div>` would incorrectly be assigned to the element that is implicitly closed. The problem was that closing an element would associate the source span with the element at the top of the stack, however this may not be the element that is actually being closed if some elements would be implicitly closed. This commit fixes the issue by assigning the end source span with the element on the stack that is actually being closed. Any implicitly closed elements that are popped off the stack will not be assigned an end source span, as the implicit closing implies that no ending element is present. Note that there is a difference between self-closed elements such as `<input/>` and implicitly closed elements such as `<input>`. The former does have an end source span (identical to its start source span) whereas the latter does not. Fixes #36118 Resolves FW-2004 PR Close #38126
2020-07-18 00:05:11 +02:00
// Record the parse span with the element that is being closed. Any elements that are
// removed from the element stack at this point are closed implicitly, so they won't get
// an end source span (as there is no explicit closing element).
el.endSourceSpan = endSourceSpan;
el.sourceSpan.end = endSourceSpan !== null ? endSourceSpan.end : el.sourceSpan.end;
fix(compiler): properly associate source spans for implicitly closed elements (#38126) HTML is very lenient when it comes to closing elements, so Angular's parser has rules that specify which elements are implicitly closed when closing a tag. The parser keeps track of the nesting of tag names using a stack and parsing a closing tag will pop as many elements off the stack as possible, provided that the elements can be implicitly closed. For example, consider the following templates: - `<div><br></div>`, the `<br>` is implicitly closed when parsing `</div>`, because `<br>` is a void element. - `<div><p></div>`, the `<p>` is implicitly closed when parsing `</div>`, as `<p>` is allowed to be closed by the closing of its parent element. - `<ul><li>A <li>B</ul>`, the first `<li>` is implicitly closed when parsing the second `<li>`, whereas the second `<li>` would be implicitly closed when parsing the `</ul>`. In all the cases above the parsed structure would be correct, however the source span of the closing `</div>` would incorrectly be assigned to the element that is implicitly closed. The problem was that closing an element would associate the source span with the element at the top of the stack, however this may not be the element that is actually being closed if some elements would be implicitly closed. This commit fixes the issue by assigning the end source span with the element on the stack that is actually being closed. Any implicitly closed elements that are popped off the stack will not be assigned an end source span, as the implicit closing implies that no ending element is present. Note that there is a difference between self-closed elements such as `<input/>` and implicitly closed elements such as `<input>`. The former does have an end source span (identical to its start source span) whereas the latter does not. Fixes #36118 Resolves FW-2004 PR Close #38126
2020-07-18 00:05:11 +02:00
2016-10-21 15:14:44 -07:00
this._elementStack.splice(stackIndex, this._elementStack.length - stackIndex);
2016-07-21 13:56:58 -07:00
return true;
}
if (!this.getTagDefinition(el.name).closedByParent) {
return false;
}
}
return false;
}
private _consumeAttr(attrName: lex.Token): html.Attribute {
const fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]);
let end = attrName.sourceSpan.end;
let value = '';
let valueSpan: ParseSourceSpan = undefined!;
if (this._peek.type === lex.TokenType.ATTR_QUOTE) {
this._advance();
}
2016-07-21 13:56:58 -07:00
if (this._peek.type === lex.TokenType.ATTR_VALUE) {
const valueToken = this._advance();
value = valueToken.parts[0];
end = valueToken.sourceSpan.end;
valueSpan = valueToken.sourceSpan;
2016-07-21 13:56:58 -07:00
}
if (this._peek.type === lex.TokenType.ATTR_QUOTE) {
const quoteToken = this._advance();
end = quoteToken.sourceSpan.end;
}
return new html.Attribute(
fullName, value,
new ParseSourceSpan(attrName.sourceSpan.start, end, attrName.sourceSpan.fullStart),
valueSpan);
2016-07-21 13:56:58 -07:00
}
private _getParentElement(): html.Element|null {
2016-10-21 15:14:44 -07:00
return this._elementStack.length > 0 ? this._elementStack[this._elementStack.length - 1] : null;
2016-07-21 13:56:58 -07:00
}
private _addToParent(node: html.Node) {
const parent = this._getParentElement();
2017-03-02 09:37:01 -08:00
if (parent != null) {
2016-07-21 13:56:58 -07:00
parent.children.push(node);
} else {
this.rootNodes.push(node);
2016-07-21 13:56:58 -07:00
}
}
private _getElementFullName(prefix: string, localName: string, parentElement: html.Element|null):
2016-07-21 13:56:58 -07:00
string {
if (prefix === '') {
prefix = this.getTagDefinition(localName).implicitNamespacePrefix || '';
if (prefix === '' && parentElement != null) {
const parentTagName = splitNsName(parentElement.name)[1];
const parentTagDefinition = this.getTagDefinition(parentTagName);
if (!parentTagDefinition.preventNamespaceInheritance) {
prefix = getNsPrefix(parentElement.name);
}
2016-07-21 13:56:58 -07:00
}
}
return mergeNsAndName(prefix, localName);
}
}
function lastOnStack(stack: any[], element: any): boolean {
return stack.length > 0 && stack[stack.length - 1] === element;
}