refactor(compiler): simplify tokenizer and parser results (#36741)

Move the creation of the results objects into the wrapper functions.
This makes it easier to reason about what the parser and lexer classes
are responsible for - you create a new object for each tokenization or
parsing activity and they hold the state of the activity.

PR Close #36741
This commit is contained in:
Pete Bacon Darwin 2020-04-26 18:15:43 +01:00 committed by Andrew Kushnir
parent 4172707346
commit e0aa39929b
2 changed files with 29 additions and 31 deletions

View File

@ -110,7 +110,9 @@ export interface TokenizeOptions {
export function tokenize( export function tokenize(
source: string, url: string, getTagDefinition: (tagName: string) => TagDefinition, source: string, url: string, getTagDefinition: (tagName: string) => TagDefinition,
options: TokenizeOptions = {}): TokenizeResult { options: TokenizeOptions = {}): TokenizeResult {
return new _Tokenizer(new ParseSourceFile(source, url), getTagDefinition, options).tokenize(); const tokenizer = new _Tokenizer(new ParseSourceFile(source, url), getTagDefinition, options);
tokenizer.tokenize();
return new TokenizeResult(mergeTextTokens(tokenizer.tokens), tokenizer.errors);
} }
const _CR_OR_CRLF_REGEXP = /\r\n?/g; const _CR_OR_CRLF_REGEXP = /\r\n?/g;
@ -177,7 +179,7 @@ class _Tokenizer {
return content.replace(_CR_OR_CRLF_REGEXP, '\n'); return content.replace(_CR_OR_CRLF_REGEXP, '\n');
} }
tokenize(): TokenizeResult { tokenize(): void {
while (this._cursor.peek() !== chars.$EOF) { while (this._cursor.peek() !== chars.$EOF) {
const start = this._cursor.clone(); const start = this._cursor.clone();
try { try {
@ -204,7 +206,6 @@ class _Tokenizer {
} }
this._beginToken(TokenType.EOF); this._beginToken(TokenType.EOF);
this._endToken([]); this._endToken([]);
return new TokenizeResult(mergeTextTokens(this.tokens), this.errors);
} }
/** /**

View File

@ -30,32 +30,29 @@ export class Parser {
constructor(public getTagDefinition: (tagName: string) => TagDefinition) {} constructor(public getTagDefinition: (tagName: string) => TagDefinition) {}
parse(source: string, url: string, options?: lex.TokenizeOptions): ParseTreeResult { parse(source: string, url: string, options?: lex.TokenizeOptions): ParseTreeResult {
const tokensAndErrors = lex.tokenize(source, url, this.getTagDefinition, options); const tokenizeResult = lex.tokenize(source, url, this.getTagDefinition, options);
const parser = new _TreeBuilder(tokenizeResult.tokens, this.getTagDefinition);
const treeAndErrors = new _TreeBuilder(tokensAndErrors.tokens, this.getTagDefinition).build(); parser.build();
return new ParseTreeResult( return new ParseTreeResult(
treeAndErrors.rootNodes, parser.rootNodes, (tokenizeResult.errors as ParseError[]).concat(parser.errors));
(<ParseError[]>tokensAndErrors.errors).concat(treeAndErrors.errors));
} }
} }
class _TreeBuilder { class _TreeBuilder {
private _index: number = -1; private _index: number = -1;
// TODO(issue/24571): remove '!'. // `_peek` will be initialized by the call to `advance()` in the constructor.
private _peek!: lex.Token; private _peek!: lex.Token;
private _rootNodes: html.Node[] = [];
private _errors: TreeError[] = [];
private _elementStack: html.Element[] = []; private _elementStack: html.Element[] = [];
rootNodes: html.Node[] = [];
errors: TreeError[] = [];
constructor( constructor(
private tokens: lex.Token[], private getTagDefinition: (tagName: string) => TagDefinition) { private tokens: lex.Token[], private getTagDefinition: (tagName: string) => TagDefinition) {
this._advance(); this._advance();
} }
build(): ParseTreeResult { build(): void {
while (this._peek.type !== lex.TokenType.EOF) { while (this._peek.type !== lex.TokenType.EOF) {
if (this._peek.type === lex.TokenType.TAG_OPEN_START) { if (this._peek.type === lex.TokenType.TAG_OPEN_START) {
this._consumeStartTag(this._advance()); this._consumeStartTag(this._advance());
@ -79,7 +76,6 @@ class _TreeBuilder {
this._advance(); this._advance();
} }
} }
return new ParseTreeResult(this._rootNodes, this._errors);
} }
private _advance(): lex.Token { private _advance(): lex.Token {
@ -99,7 +95,7 @@ class _TreeBuilder {
return null; return null;
} }
private _consumeCdata(startToken: lex.Token) { private _consumeCdata(_startToken: lex.Token) {
this._consumeText(this._advance()); this._consumeText(this._advance());
this._advanceIf(lex.TokenType.CDATA_END); this._advanceIf(lex.TokenType.CDATA_END);
} }
@ -126,7 +122,7 @@ class _TreeBuilder {
// read the final } // read the final }
if (this._peek.type !== lex.TokenType.EXPANSION_FORM_END) { if (this._peek.type !== lex.TokenType.EXPANSION_FORM_END) {
this._errors.push( this.errors.push(
TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '}'.`)); TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '}'.`));
return; return;
} }
@ -142,7 +138,7 @@ class _TreeBuilder {
// read { // read {
if (this._peek.type !== lex.TokenType.EXPANSION_CASE_EXP_START) { if (this._peek.type !== lex.TokenType.EXPANSION_CASE_EXP_START) {
this._errors.push( this.errors.push(
TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '{'.`)); TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '{'.`));
return null; return null;
} }
@ -157,16 +153,17 @@ class _TreeBuilder {
exp.push(new lex.Token(lex.TokenType.EOF, [], end.sourceSpan)); exp.push(new lex.Token(lex.TokenType.EOF, [], end.sourceSpan));
// parse everything in between { and } // parse everything in between { and }
const parsedExp = new _TreeBuilder(exp, this.getTagDefinition).build(); const expansionCaseParser = new _TreeBuilder(exp, this.getTagDefinition);
if (parsedExp.errors.length > 0) { expansionCaseParser.build();
this._errors = this._errors.concat(<TreeError[]>parsedExp.errors); if (expansionCaseParser.errors.length > 0) {
this.errors = this.errors.concat(expansionCaseParser.errors);
return null; return null;
} }
const sourceSpan = new ParseSourceSpan(value.sourceSpan.start, end.sourceSpan.end); const sourceSpan = new ParseSourceSpan(value.sourceSpan.start, end.sourceSpan.end);
const expSourceSpan = new ParseSourceSpan(start.sourceSpan.start, end.sourceSpan.end); const expSourceSpan = new ParseSourceSpan(start.sourceSpan.start, end.sourceSpan.end);
return new html.ExpansionCase( return new html.ExpansionCase(
value.parts[0], parsedExp.rootNodes, sourceSpan, value.sourceSpan, expSourceSpan); value.parts[0], expansionCaseParser.rootNodes, sourceSpan, value.sourceSpan, expSourceSpan);
} }
private _collectExpansionExpTokens(start: lex.Token): lex.Token[]|null { private _collectExpansionExpTokens(start: lex.Token): lex.Token[]|null {
@ -185,7 +182,7 @@ class _TreeBuilder {
if (expansionFormStack.length == 0) return exp; if (expansionFormStack.length == 0) return exp;
} else { } else {
this._errors.push( this.errors.push(
TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
return null; return null;
} }
@ -195,14 +192,14 @@ class _TreeBuilder {
if (lastOnStack(expansionFormStack, lex.TokenType.EXPANSION_FORM_START)) { if (lastOnStack(expansionFormStack, lex.TokenType.EXPANSION_FORM_START)) {
expansionFormStack.pop(); expansionFormStack.pop();
} else { } else {
this._errors.push( this.errors.push(
TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
return null; return null;
} }
} }
if (this._peek.type === lex.TokenType.EOF) { if (this._peek.type === lex.TokenType.EOF) {
this._errors.push( this.errors.push(
TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
return null; return null;
} }
@ -249,7 +246,7 @@ class _TreeBuilder {
selfClosing = true; selfClosing = true;
const tagDef = this.getTagDefinition(fullName); const tagDef = this.getTagDefinition(fullName);
if (!(tagDef.canSelfClose || getNsPrefix(fullName) !== null || tagDef.isVoid)) { if (!(tagDef.canSelfClose || getNsPrefix(fullName) !== null || tagDef.isVoid)) {
this._errors.push(TreeError.create( this.errors.push(TreeError.create(
fullName, startTagToken.sourceSpan, fullName, startTagToken.sourceSpan,
`Only void and foreign elements can be self closed "${startTagToken.parts[1]}"`)); `Only void and foreign elements can be self closed "${startTagToken.parts[1]}"`));
} }
@ -287,13 +284,13 @@ class _TreeBuilder {
} }
if (this.getTagDefinition(fullName).isVoid) { if (this.getTagDefinition(fullName).isVoid) {
this._errors.push(TreeError.create( this.errors.push(TreeError.create(
fullName, endTagToken.sourceSpan, fullName, endTagToken.sourceSpan,
`Void elements do not have end tags "${endTagToken.parts[1]}"`)); `Void elements do not have end tags "${endTagToken.parts[1]}"`));
} else if (!this._popElement(fullName)) { } else if (!this._popElement(fullName)) {
const errMsg = `Unexpected closing tag "${ const errMsg = `Unexpected closing tag "${
fullName}". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags`; fullName}". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags`;
this._errors.push(TreeError.create(fullName, endTagToken.sourceSpan, errMsg)); this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, errMsg));
} }
} }
@ -362,7 +359,7 @@ class _TreeBuilder {
if (parent != null) { if (parent != null) {
parent.children.push(node); parent.children.push(node);
} else { } else {
this._rootNodes.push(node); this.rootNodes.push(node);
} }
} }
@ -384,7 +381,7 @@ class _TreeBuilder {
const index = parent.children.indexOf(container); const index = parent.children.indexOf(container);
parent.children[index] = node; parent.children[index] = node;
} else { } else {
this._rootNodes.push(node); this.rootNodes.push(node);
} }
node.children.push(container); node.children.push(container);
this._elementStack.splice(this._elementStack.indexOf(container), 0, node); this._elementStack.splice(this._elementStack.indexOf(container), 0, node);