feat(html_lexer): support special forms used by i18n { exp, plural, =0 {} }

This commit is contained in:
vsavkin 2016-04-12 11:46:28 -07:00 committed by Victor Savkin
parent d99823e2fd
commit 7f297666ca
2 changed files with 202 additions and 11 deletions

View File

@ -25,6 +25,11 @@ export enum HtmlTokenType {
ATTR_NAME,
ATTR_VALUE,
DOC_TYPE,
EXPANSION_FORM_START,
EXPANSION_CASE_VALUE,
EXPANSION_CASE_EXP_START,
EXPANSION_CASE_EXP_END,
EXPANSION_FORM_END,
EOF
}
@ -43,8 +48,10 @@ export class HtmlTokenizeResult {
constructor(public tokens: HtmlToken[], public errors: HtmlTokenError[]) {}
}
export function tokenizeHtml(sourceContent: string, sourceUrl: string): HtmlTokenizeResult {
return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl)).tokenize();
export function tokenizeHtml(sourceContent: string, sourceUrl: string,
tokenizeExpansionForms: boolean = false): HtmlTokenizeResult {
return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl), tokenizeExpansionForms)
.tokenize();
}
const $EOF = 0;
@ -75,6 +82,9 @@ const $GT = 62;
const $QUESTION = 63;
const $LBRACKET = 91;
const $RBRACKET = 93;
const $LBRACE = 123;
const $RBRACE = 125;
const $COMMA = 44;
const $A = 65;
const $F = 70;
const $X = 88;
@ -108,16 +118,20 @@ class _HtmlTokenizer {
private length: number;
// Note: this is always lowercase!
private peek: number = -1;
private nextPeek: number = -1;
private index: number = -1;
private line: number = 0;
private column: number = -1;
private currentTokenStart: ParseLocation;
private currentTokenType: HtmlTokenType;
private inExpansionCase: boolean = false;
private inExpansionForm: boolean = false;
tokens: HtmlToken[] = [];
errors: HtmlTokenError[] = [];
constructor(private file: ParseSourceFile) {
constructor(private file: ParseSourceFile, private tokenizeExpansionForms: boolean) {
this.input = file.content;
this.length = file.content.length;
this._advance();
@ -149,6 +163,18 @@ class _HtmlTokenizer {
} else {
this._consumeTagOpen(start);
}
} else if (isSpecialFormStart(this.peek, this.nextPeek) && this.tokenizeExpansionForms) {
this._consumeExpansionFormStart();
} else if (this.peek === $EQ && this.tokenizeExpansionForms) {
this._consumeExpansionCaseStart();
} else if (this.peek === $RBRACE && this.inExpansionCase && this.tokenizeExpansionForms) {
this._consumeExpansionCaseEnd();
} else if (this.peek === $RBRACE && !this.inExpansionCase && this.tokenizeExpansionForms) {
this._consumeExpansionFormEnd();
} else {
this._consumeText();
}
@ -218,6 +244,8 @@ class _HtmlTokenizer {
}
this.index++;
this.peek = this.index >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index);
this.nextPeek =
this.index + 1 >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index + 1);
}
private _attemptCharCode(charCode: number): boolean {
@ -506,20 +534,109 @@ class _HtmlTokenizer {
this._endToken(prefixAndName);
}
private _consumeExpansionFormStart() {
this._beginToken(HtmlTokenType.EXPANSION_FORM_START, this._getLocation());
this._requireCharCode($LBRACE);
this._endToken([]);
this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation());
let condition = this._readUntil($COMMA);
this._endToken([condition], this._getLocation());
this._requireCharCode($COMMA);
this._attemptCharCodeUntilFn(isNotWhitespace);
this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation());
let type = this._readUntil($COMMA);
this._endToken([type], this._getLocation());
this._requireCharCode($COMMA);
this._attemptCharCodeUntilFn(isNotWhitespace);
this.inExpansionForm = true;
}
private _consumeExpansionCaseStart() {
this._requireCharCode($EQ);
this._beginToken(HtmlTokenType.EXPANSION_CASE_VALUE, this._getLocation());
let value = this._readUntil($LBRACE).trim();
this._endToken([value], this._getLocation());
this._attemptCharCodeUntilFn(isNotWhitespace);
this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_START, this._getLocation());
this._requireCharCode($LBRACE);
this._endToken([], this._getLocation());
this._attemptCharCodeUntilFn(isNotWhitespace);
this.inExpansionCase = true;
}
private _consumeExpansionCaseEnd() {
this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_END, this._getLocation());
this._requireCharCode($RBRACE);
this._endToken([], this._getLocation());
this._attemptCharCodeUntilFn(isNotWhitespace);
this.inExpansionCase = false;
}
private _consumeExpansionFormEnd() {
this._beginToken(HtmlTokenType.EXPANSION_FORM_END, this._getLocation());
this._requireCharCode($RBRACE);
this._endToken([]);
this.inExpansionForm = false;
}
private _consumeText() {
var start = this._getLocation();
this._beginToken(HtmlTokenType.TEXT, start);
var parts = [this._readChar(true)];
while (!isTextEnd(this.peek)) {
var parts = [];
let interpolation = false;
if (this.peek === $LBRACE && this.nextPeek === $LBRACE) {
parts.push(this._readChar(true));
parts.push(this._readChar(true));
interpolation = true;
} else {
parts.push(this._readChar(true));
}
while (!this.isTextEnd(interpolation)) {
if (this.peek === $LBRACE && this.nextPeek === $LBRACE) {
parts.push(this._readChar(true));
parts.push(this._readChar(true));
interpolation = true;
} else if (this.peek === $RBRACE && this.nextPeek === $RBRACE && interpolation) {
parts.push(this._readChar(true));
parts.push(this._readChar(true));
interpolation = false;
} else {
parts.push(this._readChar(true));
}
}
this._endToken([this._processCarriageReturns(parts.join(''))]);
}
private isTextEnd(interpolation: boolean): boolean {
if (this.peek === $LT || this.peek === $EOF) return true;
if (this.tokenizeExpansionForms) {
if (isSpecialFormStart(this.peek, this.nextPeek)) return true;
if (this.peek === $RBRACE && !interpolation && this.inExpansionForm) return true;
}
return false;
}
private _savePosition(): number[] {
return [this.peek, this.index, this.column, this.line, this.tokens.length];
}
private _readUntil(char: number): string {
let start = this.index;
this._attemptUntilChar(char);
return this.input.substring(start, this.index);
}
private _restorePosition(position: number[]): void {
this.peek = position[0];
this.index = position[1];
@ -558,8 +675,8 @@ function isNamedEntityEnd(code: number): boolean {
return code == $SEMICOLON || code == $EOF || !isAsciiLetter(code);
}
function isTextEnd(code: number): boolean {
return code === $LT || code === $EOF;
function isSpecialFormStart(peek: number, nextPeek: number): boolean {
return peek === $LBRACE && nextPeek != $LBRACE;
}
function isAsciiLetter(code: number): boolean {

View File

@ -576,6 +576,78 @@ export function main() {
});
describe("expansion forms", () => {
it("should parse an expansion form", () => {
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four} =5 {five} }', true))
.toEqual([
[HtmlTokenType.EXPANSION_FORM_START],
[HtmlTokenType.RAW_TEXT, 'one.two'],
[HtmlTokenType.RAW_TEXT, 'three'],
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'four'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_CASE_VALUE, '5'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'five'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_FORM_END],
[HtmlTokenType.EOF]
]);
});
it("should parse an expansion form with text elements surrounding it", () => {
expect(tokenizeAndHumanizeParts('before{one.two, three, =4 {four}}after', true))
.toEqual([
[HtmlTokenType.TEXT, "before"],
[HtmlTokenType.EXPANSION_FORM_START],
[HtmlTokenType.RAW_TEXT, 'one.two'],
[HtmlTokenType.RAW_TEXT, 'three'],
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'four'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_FORM_END],
[HtmlTokenType.TEXT, "after"],
[HtmlTokenType.EOF]
]);
});
it("should parse an expansion forms with elements in it", () => {
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four <b>a</b>}}', true))
.toEqual([
[HtmlTokenType.EXPANSION_FORM_START],
[HtmlTokenType.RAW_TEXT, 'one.two'],
[HtmlTokenType.RAW_TEXT, 'three'],
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'four '],
[HtmlTokenType.TAG_OPEN_START, null, 'b'],
[HtmlTokenType.TAG_OPEN_END],
[HtmlTokenType.TEXT, 'a'],
[HtmlTokenType.TAG_CLOSE, null, 'b'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_FORM_END],
[HtmlTokenType.EOF]
]);
});
it("should parse an expansion forms with interpolation in it", () => {
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four {{a}}}}', true))
.toEqual([
[HtmlTokenType.EXPANSION_FORM_START],
[HtmlTokenType.RAW_TEXT, 'one.two'],
[HtmlTokenType.RAW_TEXT, 'three'],
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'four {{a}}'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_FORM_END],
[HtmlTokenType.EOF]
]);
});
});
describe('errors', () => {
it('should include 2 lines of context in message', () => {
let src = "111\n222\n333\nE\n444\n555\n666\n";
@ -604,8 +676,9 @@ export function main() {
});
}
function tokenizeWithoutErrors(input: string): HtmlToken[] {
var tokenizeResult = tokenizeHtml(input, 'someUrl');
function tokenizeWithoutErrors(input: string,
tokenizeExpansionForms: boolean = false): HtmlToken[] {
var tokenizeResult = tokenizeHtml(input, 'someUrl', tokenizeExpansionForms);
if (tokenizeResult.errors.length > 0) {
var errorString = tokenizeResult.errors.join('\n');
throw new BaseException(`Unexpected parse errors:\n${errorString}`);
@ -613,8 +686,9 @@ function tokenizeWithoutErrors(input: string): HtmlToken[] {
return tokenizeResult.tokens;
}
function tokenizeAndHumanizeParts(input: string): any[] {
return tokenizeWithoutErrors(input).map(token => [<any>token.type].concat(token.parts));
function tokenizeAndHumanizeParts(input: string, tokenizeExpansionForms: boolean = false): any[] {
return tokenizeWithoutErrors(input, tokenizeExpansionForms)
.map(token => [<any>token.type].concat(token.parts));
}
function tokenizeAndHumanizeSourceSpans(input: string): any[] {