2016-04-07 17:17:50 -07:00
|
|
|
import {isPresent, isBlank, StringWrapper, stringify, assertionsEnabled, StringJoiner, serializeEnum, CONST_EXPR} from 'angular2/src/facade/lang';
|
2015-11-10 15:56:25 -08:00
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
import {ListWrapper} from 'angular2/src/facade/collection';
|
2015-08-25 15:36:02 -07:00
|
|
|
|
2016-03-06 20:21:20 -08:00
|
|
|
import {HtmlAst, HtmlAttrAst, HtmlTextAst, HtmlCommentAst, HtmlElementAst} from './html_ast';
|
2015-09-11 13:35:46 -07:00
|
|
|
|
2015-09-14 15:59:09 -07:00
|
|
|
import {Injectable} from 'angular2/src/core/di';
|
2015-10-07 09:34:21 -07:00
|
|
|
import {HtmlToken, HtmlTokenType, tokenizeHtml} from './html_lexer';
|
|
|
|
import {ParseError, ParseLocation, ParseSourceSpan} from './parse_util';
|
2016-01-08 12:01:29 -08:00
|
|
|
import {HtmlTagDefinition, getHtmlTagDefinition, getNsPrefix, mergeNsAndName} from './html_tags';
|
2015-11-16 14:36:39 -08:00
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
export class HtmlTreeError extends ParseError {
|
2016-02-16 16:46:51 -08:00
|
|
|
static create(elementName: string, span: ParseSourceSpan, msg: string): HtmlTreeError {
|
|
|
|
return new HtmlTreeError(elementName, span, msg);
|
2015-08-25 15:36:02 -07:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
|
2016-02-16 16:46:51 -08:00
|
|
|
constructor(public elementName: string, span: ParseSourceSpan, msg: string) { super(span, msg); }
|
2015-08-25 15:36:02 -07:00
|
|
|
}
|
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
export class HtmlParseTreeResult {
|
|
|
|
constructor(public rootNodes: HtmlAst[], public errors: ParseError[]) {}
|
2015-08-25 15:36:02 -07:00
|
|
|
}
|
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
@Injectable()
|
|
|
|
export class HtmlParser {
|
|
|
|
parse(sourceContent: string, sourceUrl: string): HtmlParseTreeResult {
|
|
|
|
var tokensAndErrors = tokenizeHtml(sourceContent, sourceUrl);
|
|
|
|
var treeAndErrors = new TreeBuilder(tokensAndErrors.tokens).build();
|
2016-04-07 17:17:50 -07:00
|
|
|
return new HtmlParseTreeResult(
|
|
|
|
treeAndErrors.rootNodes,
|
|
|
|
(<ParseError[]>tokensAndErrors.errors).concat(treeAndErrors.errors));
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-08-25 15:36:02 -07:00
|
|
|
}
|
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
class TreeBuilder {
|
|
|
|
private index: number = -1;
|
|
|
|
private peek: HtmlToken;
|
|
|
|
|
|
|
|
private rootNodes: HtmlAst[] = [];
|
|
|
|
private errors: HtmlTreeError[] = [];
|
|
|
|
|
|
|
|
private elementStack: HtmlElementAst[] = [];
|
2015-10-07 09:34:21 -07:00
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
constructor(private tokens: HtmlToken[]) { this._advance(); }
|
|
|
|
|
|
|
|
build(): HtmlParseTreeResult {
|
|
|
|
while (this.peek.type !== HtmlTokenType.EOF) {
|
|
|
|
if (this.peek.type === HtmlTokenType.TAG_OPEN_START) {
|
|
|
|
this._consumeStartTag(this._advance());
|
|
|
|
} else if (this.peek.type === HtmlTokenType.TAG_CLOSE) {
|
|
|
|
this._consumeEndTag(this._advance());
|
|
|
|
} else if (this.peek.type === HtmlTokenType.CDATA_START) {
|
2015-12-01 13:01:05 -08:00
|
|
|
this._closeVoidElement();
|
2015-10-07 09:34:21 -07:00
|
|
|
this._consumeCdata(this._advance());
|
|
|
|
} else if (this.peek.type === HtmlTokenType.COMMENT_START) {
|
2015-12-01 13:01:05 -08:00
|
|
|
this._closeVoidElement();
|
2015-10-07 09:34:21 -07:00
|
|
|
this._consumeComment(this._advance());
|
2016-04-07 17:17:50 -07:00
|
|
|
} else if (
|
|
|
|
this.peek.type === HtmlTokenType.TEXT || this.peek.type === HtmlTokenType.RAW_TEXT ||
|
|
|
|
this.peek.type === HtmlTokenType.ESCAPABLE_RAW_TEXT) {
|
2015-12-01 13:01:05 -08:00
|
|
|
this._closeVoidElement();
|
2015-10-07 09:34:21 -07:00
|
|
|
this._consumeText(this._advance());
|
|
|
|
} else {
|
|
|
|
// Skip all other tokens...
|
|
|
|
this._advance();
|
|
|
|
}
|
2015-08-25 15:36:02 -07:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
return new HtmlParseTreeResult(this.rootNodes, this.errors);
|
|
|
|
}
|
|
|
|
|
|
|
|
private _advance(): HtmlToken {
|
|
|
|
var prev = this.peek;
|
|
|
|
if (this.index < this.tokens.length - 1) {
|
|
|
|
// Note: there is always an EOF token at the end
|
|
|
|
this.index++;
|
2015-09-11 13:35:46 -07:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
this.peek = this.tokens[this.index];
|
|
|
|
return prev;
|
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
private _advanceIf(type: HtmlTokenType): HtmlToken {
|
|
|
|
if (this.peek.type === type) {
|
|
|
|
return this._advance();
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-11-16 14:37:00 -08:00
|
|
|
return null;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
|
|
|
|
private _consumeCdata(startToken: HtmlToken) {
|
|
|
|
this._consumeText(this._advance());
|
|
|
|
this._advanceIf(HtmlTokenType.CDATA_END);
|
2015-11-16 14:36:39 -08:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
|
2016-03-06 20:21:20 -08:00
|
|
|
private _consumeComment(token: HtmlToken) {
|
|
|
|
var text = this._advanceIf(HtmlTokenType.RAW_TEXT);
|
2015-10-07 09:34:21 -07:00
|
|
|
this._advanceIf(HtmlTokenType.COMMENT_END);
|
2016-03-06 20:21:20 -08:00
|
|
|
var value = isPresent(text) ? text.parts[0].trim() : null;
|
2016-03-14 10:51:23 -07:00
|
|
|
this._addToParent(new HtmlCommentAst(value, token.sourceSpan));
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
private _consumeText(token: HtmlToken) {
|
2015-12-05 00:15:18 -08:00
|
|
|
let text = token.parts[0];
|
|
|
|
if (text.length > 0 && text[0] == '\n') {
|
|
|
|
let parent = this._getParentElement();
|
|
|
|
if (isPresent(parent) && parent.children.length == 0 &&
|
|
|
|
getHtmlTagDefinition(parent.name).ignoreFirstLf) {
|
|
|
|
text = text.substring(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (text.length > 0) {
|
|
|
|
this._addToParent(new HtmlTextAst(text, token.sourceSpan));
|
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
|
2015-12-01 13:01:05 -08:00
|
|
|
private _closeVoidElement(): void {
|
|
|
|
if (this.elementStack.length > 0) {
|
|
|
|
let el = ListWrapper.last(this.elementStack);
|
|
|
|
|
|
|
|
if (getHtmlTagDefinition(el.name).isVoid) {
|
|
|
|
this.elementStack.pop();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
private _consumeStartTag(startTagToken: HtmlToken) {
|
|
|
|
var prefix = startTagToken.parts[0];
|
|
|
|
var name = startTagToken.parts[1];
|
|
|
|
var attrs = [];
|
|
|
|
while (this.peek.type === HtmlTokenType.ATTR_NAME) {
|
|
|
|
attrs.push(this._consumeAttr(this._advance()));
|
|
|
|
}
|
2015-11-10 15:56:25 -08:00
|
|
|
var fullName = getElementFullName(prefix, name, this._getParentElement());
|
2015-12-02 10:11:01 -08:00
|
|
|
var selfClosing = false;
|
2015-10-07 09:34:21 -07:00
|
|
|
// Note: There could have been a tokenizer error
|
|
|
|
// so that we don't get a token for the end tag...
|
|
|
|
if (this.peek.type === HtmlTokenType.TAG_OPEN_END_VOID) {
|
|
|
|
this._advance();
|
2015-12-02 10:11:01 -08:00
|
|
|
selfClosing = true;
|
2015-12-09 09:32:15 -08:00
|
|
|
if (getNsPrefix(fullName) == null && !getHtmlTagDefinition(fullName).isVoid) {
|
2015-12-03 16:10:20 -08:00
|
|
|
this.errors.push(HtmlTreeError.create(
|
2016-02-16 16:46:51 -08:00
|
|
|
fullName, startTagToken.sourceSpan,
|
2015-12-03 16:10:20 -08:00
|
|
|
`Only void and foreign elements can be self closed "${startTagToken.parts[1]}"`));
|
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
} else if (this.peek.type === HtmlTokenType.TAG_OPEN_END) {
|
|
|
|
this._advance();
|
2015-12-02 10:11:01 -08:00
|
|
|
selfClosing = false;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
var end = this.peek.sourceSpan.start;
|
2016-03-23 13:43:28 -07:00
|
|
|
let span = new ParseSourceSpan(startTagToken.sourceSpan.start, end);
|
|
|
|
var el = new HtmlElementAst(fullName, attrs, [], span, span, null);
|
2015-10-07 09:34:21 -07:00
|
|
|
this._pushElement(el);
|
2015-12-02 10:11:01 -08:00
|
|
|
if (selfClosing) {
|
2015-10-07 09:34:21 -07:00
|
|
|
this._popElement(fullName);
|
2016-03-23 13:43:28 -07:00
|
|
|
el.endSourceSpan = span;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private _pushElement(el: HtmlElementAst) {
|
2015-11-10 15:56:25 -08:00
|
|
|
if (this.elementStack.length > 0) {
|
|
|
|
var parentEl = ListWrapper.last(this.elementStack);
|
|
|
|
if (getHtmlTagDefinition(parentEl.name).isClosedByChild(el.name)) {
|
|
|
|
this.elementStack.pop();
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var tagDef = getHtmlTagDefinition(el.name);
|
|
|
|
var parentEl = this._getParentElement();
|
|
|
|
if (tagDef.requireExtraParent(isPresent(parentEl) ? parentEl.name : null)) {
|
2016-04-07 17:17:50 -07:00
|
|
|
var newParent = new HtmlElementAst(
|
|
|
|
tagDef.parentToAdd, [], [el], el.sourceSpan, el.startSourceSpan, el.endSourceSpan);
|
2015-10-07 09:34:21 -07:00
|
|
|
this._addToParent(newParent);
|
|
|
|
this.elementStack.push(newParent);
|
|
|
|
this.elementStack.push(el);
|
|
|
|
} else {
|
|
|
|
this._addToParent(el);
|
|
|
|
this.elementStack.push(el);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private _consumeEndTag(endTagToken: HtmlToken) {
|
|
|
|
var fullName =
|
2015-11-10 15:56:25 -08:00
|
|
|
getElementFullName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
|
2015-12-02 10:11:01 -08:00
|
|
|
|
2016-03-23 13:43:28 -07:00
|
|
|
this._getParentElement().endSourceSpan = endTagToken.sourceSpan;
|
|
|
|
|
2015-12-03 15:53:44 -08:00
|
|
|
if (getHtmlTagDefinition(fullName).isVoid) {
|
2016-04-07 17:17:50 -07:00
|
|
|
this.errors.push(HtmlTreeError.create(
|
|
|
|
fullName, endTagToken.sourceSpan,
|
|
|
|
`Void elements do not have end tags "${endTagToken.parts[1]}"`));
|
2015-12-03 15:53:44 -08:00
|
|
|
} else if (!this._popElement(fullName)) {
|
2016-04-07 17:17:50 -07:00
|
|
|
this.errors.push(HtmlTreeError.create(
|
|
|
|
fullName, endTagToken.sourceSpan, `Unexpected closing tag "${endTagToken.parts[1]}"`));
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private _popElement(fullName: string): boolean {
|
2015-11-10 15:56:25 -08:00
|
|
|
for (let stackIndex = this.elementStack.length - 1; stackIndex >= 0; stackIndex--) {
|
2015-12-02 10:11:01 -08:00
|
|
|
let el = this.elementStack[stackIndex];
|
2015-11-23 16:02:19 -08:00
|
|
|
if (el.name == fullName) {
|
2015-11-10 15:56:25 -08:00
|
|
|
ListWrapper.splice(this.elementStack, stackIndex, this.elementStack.length - stackIndex);
|
|
|
|
return true;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-12-02 10:11:01 -08:00
|
|
|
|
2015-10-07 09:34:21 -07:00
|
|
|
if (!getHtmlTagDefinition(el.name).closedByParent) {
|
2015-11-10 15:56:25 -08:00
|
|
|
return false;
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
|
|
|
}
|
2015-11-10 15:56:25 -08:00
|
|
|
return false;
|
2015-11-16 14:36:39 -08:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
|
|
|
|
private _consumeAttr(attrName: HtmlToken): HtmlAttrAst {
|
2015-11-10 15:56:25 -08:00
|
|
|
var fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]);
|
2015-10-07 09:34:21 -07:00
|
|
|
var end = attrName.sourceSpan.end;
|
|
|
|
var value = '';
|
|
|
|
if (this.peek.type === HtmlTokenType.ATTR_VALUE) {
|
|
|
|
var valueToken = this._advance();
|
|
|
|
value = valueToken.parts[0];
|
|
|
|
end = valueToken.sourceSpan.end;
|
|
|
|
}
|
|
|
|
return new HtmlAttrAst(fullName, value, new ParseSourceSpan(attrName.sourceSpan.start, end));
|
|
|
|
}
|
|
|
|
|
|
|
|
private _getParentElement(): HtmlElementAst {
|
|
|
|
return this.elementStack.length > 0 ? ListWrapper.last(this.elementStack) : null;
|
|
|
|
}
|
|
|
|
|
|
|
|
private _addToParent(node: HtmlAst) {
|
|
|
|
var parent = this._getParentElement();
|
|
|
|
if (isPresent(parent)) {
|
|
|
|
parent.children.push(node);
|
|
|
|
} else {
|
|
|
|
this.rootNodes.push(node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-07 17:17:50 -07:00
|
|
|
function getElementFullName(
|
|
|
|
prefix: string, localName: string, parentElement: HtmlElementAst): string {
|
2015-10-07 09:34:21 -07:00
|
|
|
if (isBlank(prefix)) {
|
|
|
|
prefix = getHtmlTagDefinition(localName).implicitNamespacePrefix;
|
2015-11-10 15:56:25 -08:00
|
|
|
if (isBlank(prefix) && isPresent(parentElement)) {
|
2015-12-09 09:32:15 -08:00
|
|
|
prefix = getNsPrefix(parentElement.name);
|
2015-11-10 15:56:25 -08:00
|
|
|
}
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|
2015-11-10 15:56:25 -08:00
|
|
|
|
|
|
|
return mergeNsAndName(prefix, localName);
|
2015-10-07 09:34:21 -07:00
|
|
|
}
|