feat(html_lexer): support special forms used by i18n { exp, plural, =0 {} }
This commit is contained in:
parent
d99823e2fd
commit
7f297666ca
|
@ -25,6 +25,11 @@ export enum HtmlTokenType {
|
|||
ATTR_NAME,
|
||||
ATTR_VALUE,
|
||||
DOC_TYPE,
|
||||
EXPANSION_FORM_START,
|
||||
EXPANSION_CASE_VALUE,
|
||||
EXPANSION_CASE_EXP_START,
|
||||
EXPANSION_CASE_EXP_END,
|
||||
EXPANSION_FORM_END,
|
||||
EOF
|
||||
}
|
||||
|
||||
|
@ -43,8 +48,10 @@ export class HtmlTokenizeResult {
|
|||
constructor(public tokens: HtmlToken[], public errors: HtmlTokenError[]) {}
|
||||
}
|
||||
|
||||
export function tokenizeHtml(sourceContent: string, sourceUrl: string): HtmlTokenizeResult {
|
||||
return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl)).tokenize();
|
||||
export function tokenizeHtml(sourceContent: string, sourceUrl: string,
|
||||
tokenizeExpansionForms: boolean = false): HtmlTokenizeResult {
|
||||
return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl), tokenizeExpansionForms)
|
||||
.tokenize();
|
||||
}
|
||||
|
||||
const $EOF = 0;
|
||||
|
@ -75,6 +82,9 @@ const $GT = 62;
|
|||
const $QUESTION = 63;
|
||||
const $LBRACKET = 91;
|
||||
const $RBRACKET = 93;
|
||||
const $LBRACE = 123;
|
||||
const $RBRACE = 125;
|
||||
const $COMMA = 44;
|
||||
const $A = 65;
|
||||
const $F = 70;
|
||||
const $X = 88;
|
||||
|
@ -108,16 +118,20 @@ class _HtmlTokenizer {
|
|||
private length: number;
|
||||
// Note: this is always lowercase!
|
||||
private peek: number = -1;
|
||||
private nextPeek: number = -1;
|
||||
private index: number = -1;
|
||||
private line: number = 0;
|
||||
private column: number = -1;
|
||||
private currentTokenStart: ParseLocation;
|
||||
private currentTokenType: HtmlTokenType;
|
||||
|
||||
private inExpansionCase: boolean = false;
|
||||
private inExpansionForm: boolean = false;
|
||||
|
||||
tokens: HtmlToken[] = [];
|
||||
errors: HtmlTokenError[] = [];
|
||||
|
||||
constructor(private file: ParseSourceFile) {
|
||||
constructor(private file: ParseSourceFile, private tokenizeExpansionForms: boolean) {
|
||||
this.input = file.content;
|
||||
this.length = file.content.length;
|
||||
this._advance();
|
||||
|
@ -149,6 +163,18 @@ class _HtmlTokenizer {
|
|||
} else {
|
||||
this._consumeTagOpen(start);
|
||||
}
|
||||
} else if (isSpecialFormStart(this.peek, this.nextPeek) && this.tokenizeExpansionForms) {
|
||||
this._consumeExpansionFormStart();
|
||||
|
||||
} else if (this.peek === $EQ && this.tokenizeExpansionForms) {
|
||||
this._consumeExpansionCaseStart();
|
||||
|
||||
} else if (this.peek === $RBRACE && this.inExpansionCase && this.tokenizeExpansionForms) {
|
||||
this._consumeExpansionCaseEnd();
|
||||
|
||||
} else if (this.peek === $RBRACE && !this.inExpansionCase && this.tokenizeExpansionForms) {
|
||||
this._consumeExpansionFormEnd();
|
||||
|
||||
} else {
|
||||
this._consumeText();
|
||||
}
|
||||
|
@ -218,6 +244,8 @@ class _HtmlTokenizer {
|
|||
}
|
||||
this.index++;
|
||||
this.peek = this.index >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index);
|
||||
this.nextPeek =
|
||||
this.index + 1 >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index + 1);
|
||||
}
|
||||
|
||||
private _attemptCharCode(charCode: number): boolean {
|
||||
|
@ -506,20 +534,109 @@ class _HtmlTokenizer {
|
|||
this._endToken(prefixAndName);
|
||||
}
|
||||
|
||||
private _consumeExpansionFormStart() {
|
||||
this._beginToken(HtmlTokenType.EXPANSION_FORM_START, this._getLocation());
|
||||
this._requireCharCode($LBRACE);
|
||||
this._endToken([]);
|
||||
|
||||
this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation());
|
||||
let condition = this._readUntil($COMMA);
|
||||
this._endToken([condition], this._getLocation());
|
||||
this._requireCharCode($COMMA);
|
||||
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||
|
||||
this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation());
|
||||
let type = this._readUntil($COMMA);
|
||||
this._endToken([type], this._getLocation());
|
||||
this._requireCharCode($COMMA);
|
||||
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||
|
||||
this.inExpansionForm = true;
|
||||
}
|
||||
|
||||
private _consumeExpansionCaseStart() {
|
||||
this._requireCharCode($EQ);
|
||||
|
||||
this._beginToken(HtmlTokenType.EXPANSION_CASE_VALUE, this._getLocation());
|
||||
let value = this._readUntil($LBRACE).trim();
|
||||
this._endToken([value], this._getLocation());
|
||||
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||
|
||||
this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_START, this._getLocation());
|
||||
this._requireCharCode($LBRACE);
|
||||
this._endToken([], this._getLocation());
|
||||
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||
|
||||
this.inExpansionCase = true;
|
||||
}
|
||||
|
||||
private _consumeExpansionCaseEnd() {
|
||||
this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_END, this._getLocation());
|
||||
this._requireCharCode($RBRACE);
|
||||
this._endToken([], this._getLocation());
|
||||
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||
|
||||
this.inExpansionCase = false;
|
||||
}
|
||||
|
||||
private _consumeExpansionFormEnd() {
|
||||
this._beginToken(HtmlTokenType.EXPANSION_FORM_END, this._getLocation());
|
||||
this._requireCharCode($RBRACE);
|
||||
this._endToken([]);
|
||||
|
||||
this.inExpansionForm = false;
|
||||
}
|
||||
|
||||
private _consumeText() {
|
||||
var start = this._getLocation();
|
||||
this._beginToken(HtmlTokenType.TEXT, start);
|
||||
var parts = [this._readChar(true)];
|
||||
while (!isTextEnd(this.peek)) {
|
||||
|
||||
var parts = [];
|
||||
let interpolation = false;
|
||||
|
||||
if (this.peek === $LBRACE && this.nextPeek === $LBRACE) {
|
||||
parts.push(this._readChar(true));
|
||||
parts.push(this._readChar(true));
|
||||
interpolation = true;
|
||||
} else {
|
||||
parts.push(this._readChar(true));
|
||||
}
|
||||
|
||||
while (!this.isTextEnd(interpolation)) {
|
||||
if (this.peek === $LBRACE && this.nextPeek === $LBRACE) {
|
||||
parts.push(this._readChar(true));
|
||||
parts.push(this._readChar(true));
|
||||
interpolation = true;
|
||||
} else if (this.peek === $RBRACE && this.nextPeek === $RBRACE && interpolation) {
|
||||
parts.push(this._readChar(true));
|
||||
parts.push(this._readChar(true));
|
||||
interpolation = false;
|
||||
} else {
|
||||
parts.push(this._readChar(true));
|
||||
}
|
||||
}
|
||||
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
||||
}
|
||||
|
||||
private isTextEnd(interpolation: boolean): boolean {
|
||||
if (this.peek === $LT || this.peek === $EOF) return true;
|
||||
if (this.tokenizeExpansionForms) {
|
||||
if (isSpecialFormStart(this.peek, this.nextPeek)) return true;
|
||||
if (this.peek === $RBRACE && !interpolation && this.inExpansionForm) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private _savePosition(): number[] {
|
||||
return [this.peek, this.index, this.column, this.line, this.tokens.length];
|
||||
}
|
||||
|
||||
private _readUntil(char: number): string {
|
||||
let start = this.index;
|
||||
this._attemptUntilChar(char);
|
||||
return this.input.substring(start, this.index);
|
||||
}
|
||||
|
||||
private _restorePosition(position: number[]): void {
|
||||
this.peek = position[0];
|
||||
this.index = position[1];
|
||||
|
@ -558,8 +675,8 @@ function isNamedEntityEnd(code: number): boolean {
|
|||
return code == $SEMICOLON || code == $EOF || !isAsciiLetter(code);
|
||||
}
|
||||
|
||||
function isTextEnd(code: number): boolean {
|
||||
return code === $LT || code === $EOF;
|
||||
function isSpecialFormStart(peek: number, nextPeek: number): boolean {
|
||||
return peek === $LBRACE && nextPeek != $LBRACE;
|
||||
}
|
||||
|
||||
function isAsciiLetter(code: number): boolean {
|
||||
|
|
|
@ -576,6 +576,78 @@ export function main() {
|
|||
|
||||
});
|
||||
|
||||
describe("expansion forms", () => {
|
||||
it("should parse an expansion form", () => {
|
||||
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four} =5 {five} }', true))
|
||||
.toEqual([
|
||||
[HtmlTokenType.EXPANSION_FORM_START],
|
||||
[HtmlTokenType.RAW_TEXT, 'one.two'],
|
||||
[HtmlTokenType.RAW_TEXT, 'three'],
|
||||
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||
[HtmlTokenType.TEXT, 'four'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||
[HtmlTokenType.EXPANSION_CASE_VALUE, '5'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||
[HtmlTokenType.TEXT, 'five'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||
[HtmlTokenType.EXPANSION_FORM_END],
|
||||
[HtmlTokenType.EOF]
|
||||
]);
|
||||
});
|
||||
|
||||
it("should parse an expansion form with text elements surrounding it", () => {
|
||||
expect(tokenizeAndHumanizeParts('before{one.two, three, =4 {four}}after', true))
|
||||
.toEqual([
|
||||
[HtmlTokenType.TEXT, "before"],
|
||||
[HtmlTokenType.EXPANSION_FORM_START],
|
||||
[HtmlTokenType.RAW_TEXT, 'one.two'],
|
||||
[HtmlTokenType.RAW_TEXT, 'three'],
|
||||
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||
[HtmlTokenType.TEXT, 'four'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||
[HtmlTokenType.EXPANSION_FORM_END],
|
||||
[HtmlTokenType.TEXT, "after"],
|
||||
[HtmlTokenType.EOF]
|
||||
]);
|
||||
});
|
||||
|
||||
it("should parse an expansion forms with elements in it", () => {
|
||||
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four <b>a</b>}}', true))
|
||||
.toEqual([
|
||||
[HtmlTokenType.EXPANSION_FORM_START],
|
||||
[HtmlTokenType.RAW_TEXT, 'one.two'],
|
||||
[HtmlTokenType.RAW_TEXT, 'three'],
|
||||
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||
[HtmlTokenType.TEXT, 'four '],
|
||||
[HtmlTokenType.TAG_OPEN_START, null, 'b'],
|
||||
[HtmlTokenType.TAG_OPEN_END],
|
||||
[HtmlTokenType.TEXT, 'a'],
|
||||
[HtmlTokenType.TAG_CLOSE, null, 'b'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||
[HtmlTokenType.EXPANSION_FORM_END],
|
||||
[HtmlTokenType.EOF]
|
||||
]);
|
||||
});
|
||||
|
||||
it("should parse an expansion forms with interpolation in it", () => {
|
||||
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four {{a}}}}', true))
|
||||
.toEqual([
|
||||
[HtmlTokenType.EXPANSION_FORM_START],
|
||||
[HtmlTokenType.RAW_TEXT, 'one.two'],
|
||||
[HtmlTokenType.RAW_TEXT, 'three'],
|
||||
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||
[HtmlTokenType.TEXT, 'four {{a}}'],
|
||||
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||
[HtmlTokenType.EXPANSION_FORM_END],
|
||||
[HtmlTokenType.EOF]
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('errors', () => {
|
||||
it('should include 2 lines of context in message', () => {
|
||||
let src = "111\n222\n333\nE\n444\n555\n666\n";
|
||||
|
@ -604,8 +676,9 @@ export function main() {
|
|||
});
|
||||
}
|
||||
|
||||
function tokenizeWithoutErrors(input: string): HtmlToken[] {
|
||||
var tokenizeResult = tokenizeHtml(input, 'someUrl');
|
||||
function tokenizeWithoutErrors(input: string,
|
||||
tokenizeExpansionForms: boolean = false): HtmlToken[] {
|
||||
var tokenizeResult = tokenizeHtml(input, 'someUrl', tokenizeExpansionForms);
|
||||
if (tokenizeResult.errors.length > 0) {
|
||||
var errorString = tokenizeResult.errors.join('\n');
|
||||
throw new BaseException(`Unexpected parse errors:\n${errorString}`);
|
||||
|
@ -613,8 +686,9 @@ function tokenizeWithoutErrors(input: string): HtmlToken[] {
|
|||
return tokenizeResult.tokens;
|
||||
}
|
||||
|
||||
function tokenizeAndHumanizeParts(input: string): any[] {
|
||||
return tokenizeWithoutErrors(input).map(token => [<any>token.type].concat(token.parts));
|
||||
function tokenizeAndHumanizeParts(input: string, tokenizeExpansionForms: boolean = false): any[] {
|
||||
return tokenizeWithoutErrors(input, tokenizeExpansionForms)
|
||||
.map(token => [<any>token.type].concat(token.parts));
|
||||
}
|
||||
|
||||
function tokenizeAndHumanizeSourceSpans(input: string): any[] {
|
||||
|
|
Loading…
Reference in New Issue