feat(html_lexer): support special forms used by i18n { exp, plural, =0 {} }
This commit is contained in:
parent
d99823e2fd
commit
7f297666ca
|
@ -25,6 +25,11 @@ export enum HtmlTokenType {
|
||||||
ATTR_NAME,
|
ATTR_NAME,
|
||||||
ATTR_VALUE,
|
ATTR_VALUE,
|
||||||
DOC_TYPE,
|
DOC_TYPE,
|
||||||
|
EXPANSION_FORM_START,
|
||||||
|
EXPANSION_CASE_VALUE,
|
||||||
|
EXPANSION_CASE_EXP_START,
|
||||||
|
EXPANSION_CASE_EXP_END,
|
||||||
|
EXPANSION_FORM_END,
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,8 +48,10 @@ export class HtmlTokenizeResult {
|
||||||
constructor(public tokens: HtmlToken[], public errors: HtmlTokenError[]) {}
|
constructor(public tokens: HtmlToken[], public errors: HtmlTokenError[]) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function tokenizeHtml(sourceContent: string, sourceUrl: string): HtmlTokenizeResult {
|
export function tokenizeHtml(sourceContent: string, sourceUrl: string,
|
||||||
return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl)).tokenize();
|
tokenizeExpansionForms: boolean = false): HtmlTokenizeResult {
|
||||||
|
return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl), tokenizeExpansionForms)
|
||||||
|
.tokenize();
|
||||||
}
|
}
|
||||||
|
|
||||||
const $EOF = 0;
|
const $EOF = 0;
|
||||||
|
@ -75,6 +82,9 @@ const $GT = 62;
|
||||||
const $QUESTION = 63;
|
const $QUESTION = 63;
|
||||||
const $LBRACKET = 91;
|
const $LBRACKET = 91;
|
||||||
const $RBRACKET = 93;
|
const $RBRACKET = 93;
|
||||||
|
const $LBRACE = 123;
|
||||||
|
const $RBRACE = 125;
|
||||||
|
const $COMMA = 44;
|
||||||
const $A = 65;
|
const $A = 65;
|
||||||
const $F = 70;
|
const $F = 70;
|
||||||
const $X = 88;
|
const $X = 88;
|
||||||
|
@ -108,16 +118,20 @@ class _HtmlTokenizer {
|
||||||
private length: number;
|
private length: number;
|
||||||
// Note: this is always lowercase!
|
// Note: this is always lowercase!
|
||||||
private peek: number = -1;
|
private peek: number = -1;
|
||||||
|
private nextPeek: number = -1;
|
||||||
private index: number = -1;
|
private index: number = -1;
|
||||||
private line: number = 0;
|
private line: number = 0;
|
||||||
private column: number = -1;
|
private column: number = -1;
|
||||||
private currentTokenStart: ParseLocation;
|
private currentTokenStart: ParseLocation;
|
||||||
private currentTokenType: HtmlTokenType;
|
private currentTokenType: HtmlTokenType;
|
||||||
|
|
||||||
|
private inExpansionCase: boolean = false;
|
||||||
|
private inExpansionForm: boolean = false;
|
||||||
|
|
||||||
tokens: HtmlToken[] = [];
|
tokens: HtmlToken[] = [];
|
||||||
errors: HtmlTokenError[] = [];
|
errors: HtmlTokenError[] = [];
|
||||||
|
|
||||||
constructor(private file: ParseSourceFile) {
|
constructor(private file: ParseSourceFile, private tokenizeExpansionForms: boolean) {
|
||||||
this.input = file.content;
|
this.input = file.content;
|
||||||
this.length = file.content.length;
|
this.length = file.content.length;
|
||||||
this._advance();
|
this._advance();
|
||||||
|
@ -149,6 +163,18 @@ class _HtmlTokenizer {
|
||||||
} else {
|
} else {
|
||||||
this._consumeTagOpen(start);
|
this._consumeTagOpen(start);
|
||||||
}
|
}
|
||||||
|
} else if (isSpecialFormStart(this.peek, this.nextPeek) && this.tokenizeExpansionForms) {
|
||||||
|
this._consumeExpansionFormStart();
|
||||||
|
|
||||||
|
} else if (this.peek === $EQ && this.tokenizeExpansionForms) {
|
||||||
|
this._consumeExpansionCaseStart();
|
||||||
|
|
||||||
|
} else if (this.peek === $RBRACE && this.inExpansionCase && this.tokenizeExpansionForms) {
|
||||||
|
this._consumeExpansionCaseEnd();
|
||||||
|
|
||||||
|
} else if (this.peek === $RBRACE && !this.inExpansionCase && this.tokenizeExpansionForms) {
|
||||||
|
this._consumeExpansionFormEnd();
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
this._consumeText();
|
this._consumeText();
|
||||||
}
|
}
|
||||||
|
@ -218,6 +244,8 @@ class _HtmlTokenizer {
|
||||||
}
|
}
|
||||||
this.index++;
|
this.index++;
|
||||||
this.peek = this.index >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index);
|
this.peek = this.index >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index);
|
||||||
|
this.nextPeek =
|
||||||
|
this.index + 1 >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private _attemptCharCode(charCode: number): boolean {
|
private _attemptCharCode(charCode: number): boolean {
|
||||||
|
@ -506,20 +534,109 @@ class _HtmlTokenizer {
|
||||||
this._endToken(prefixAndName);
|
this._endToken(prefixAndName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private _consumeExpansionFormStart() {
|
||||||
|
this._beginToken(HtmlTokenType.EXPANSION_FORM_START, this._getLocation());
|
||||||
|
this._requireCharCode($LBRACE);
|
||||||
|
this._endToken([]);
|
||||||
|
|
||||||
|
this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation());
|
||||||
|
let condition = this._readUntil($COMMA);
|
||||||
|
this._endToken([condition], this._getLocation());
|
||||||
|
this._requireCharCode($COMMA);
|
||||||
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||||
|
|
||||||
|
this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation());
|
||||||
|
let type = this._readUntil($COMMA);
|
||||||
|
this._endToken([type], this._getLocation());
|
||||||
|
this._requireCharCode($COMMA);
|
||||||
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||||
|
|
||||||
|
this.inExpansionForm = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private _consumeExpansionCaseStart() {
|
||||||
|
this._requireCharCode($EQ);
|
||||||
|
|
||||||
|
this._beginToken(HtmlTokenType.EXPANSION_CASE_VALUE, this._getLocation());
|
||||||
|
let value = this._readUntil($LBRACE).trim();
|
||||||
|
this._endToken([value], this._getLocation());
|
||||||
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||||
|
|
||||||
|
this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_START, this._getLocation());
|
||||||
|
this._requireCharCode($LBRACE);
|
||||||
|
this._endToken([], this._getLocation());
|
||||||
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||||
|
|
||||||
|
this.inExpansionCase = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private _consumeExpansionCaseEnd() {
|
||||||
|
this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_END, this._getLocation());
|
||||||
|
this._requireCharCode($RBRACE);
|
||||||
|
this._endToken([], this._getLocation());
|
||||||
|
this._attemptCharCodeUntilFn(isNotWhitespace);
|
||||||
|
|
||||||
|
this.inExpansionCase = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private _consumeExpansionFormEnd() {
|
||||||
|
this._beginToken(HtmlTokenType.EXPANSION_FORM_END, this._getLocation());
|
||||||
|
this._requireCharCode($RBRACE);
|
||||||
|
this._endToken([]);
|
||||||
|
|
||||||
|
this.inExpansionForm = false;
|
||||||
|
}
|
||||||
|
|
||||||
private _consumeText() {
|
private _consumeText() {
|
||||||
var start = this._getLocation();
|
var start = this._getLocation();
|
||||||
this._beginToken(HtmlTokenType.TEXT, start);
|
this._beginToken(HtmlTokenType.TEXT, start);
|
||||||
var parts = [this._readChar(true)];
|
|
||||||
while (!isTextEnd(this.peek)) {
|
var parts = [];
|
||||||
|
let interpolation = false;
|
||||||
|
|
||||||
|
if (this.peek === $LBRACE && this.nextPeek === $LBRACE) {
|
||||||
parts.push(this._readChar(true));
|
parts.push(this._readChar(true));
|
||||||
|
parts.push(this._readChar(true));
|
||||||
|
interpolation = true;
|
||||||
|
} else {
|
||||||
|
parts.push(this._readChar(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!this.isTextEnd(interpolation)) {
|
||||||
|
if (this.peek === $LBRACE && this.nextPeek === $LBRACE) {
|
||||||
|
parts.push(this._readChar(true));
|
||||||
|
parts.push(this._readChar(true));
|
||||||
|
interpolation = true;
|
||||||
|
} else if (this.peek === $RBRACE && this.nextPeek === $RBRACE && interpolation) {
|
||||||
|
parts.push(this._readChar(true));
|
||||||
|
parts.push(this._readChar(true));
|
||||||
|
interpolation = false;
|
||||||
|
} else {
|
||||||
|
parts.push(this._readChar(true));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
this._endToken([this._processCarriageReturns(parts.join(''))]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private isTextEnd(interpolation: boolean): boolean {
|
||||||
|
if (this.peek === $LT || this.peek === $EOF) return true;
|
||||||
|
if (this.tokenizeExpansionForms) {
|
||||||
|
if (isSpecialFormStart(this.peek, this.nextPeek)) return true;
|
||||||
|
if (this.peek === $RBRACE && !interpolation && this.inExpansionForm) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private _savePosition(): number[] {
|
private _savePosition(): number[] {
|
||||||
return [this.peek, this.index, this.column, this.line, this.tokens.length];
|
return [this.peek, this.index, this.column, this.line, this.tokens.length];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private _readUntil(char: number): string {
|
||||||
|
let start = this.index;
|
||||||
|
this._attemptUntilChar(char);
|
||||||
|
return this.input.substring(start, this.index);
|
||||||
|
}
|
||||||
|
|
||||||
private _restorePosition(position: number[]): void {
|
private _restorePosition(position: number[]): void {
|
||||||
this.peek = position[0];
|
this.peek = position[0];
|
||||||
this.index = position[1];
|
this.index = position[1];
|
||||||
|
@ -558,8 +675,8 @@ function isNamedEntityEnd(code: number): boolean {
|
||||||
return code == $SEMICOLON || code == $EOF || !isAsciiLetter(code);
|
return code == $SEMICOLON || code == $EOF || !isAsciiLetter(code);
|
||||||
}
|
}
|
||||||
|
|
||||||
function isTextEnd(code: number): boolean {
|
function isSpecialFormStart(peek: number, nextPeek: number): boolean {
|
||||||
return code === $LT || code === $EOF;
|
return peek === $LBRACE && nextPeek != $LBRACE;
|
||||||
}
|
}
|
||||||
|
|
||||||
function isAsciiLetter(code: number): boolean {
|
function isAsciiLetter(code: number): boolean {
|
||||||
|
|
|
@ -576,6 +576,78 @@ export function main() {
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("expansion forms", () => {
|
||||||
|
it("should parse an expansion form", () => {
|
||||||
|
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four} =5 {five} }', true))
|
||||||
|
.toEqual([
|
||||||
|
[HtmlTokenType.EXPANSION_FORM_START],
|
||||||
|
[HtmlTokenType.RAW_TEXT, 'one.two'],
|
||||||
|
[HtmlTokenType.RAW_TEXT, 'three'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||||
|
[HtmlTokenType.TEXT, 'four'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_VALUE, '5'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||||
|
[HtmlTokenType.TEXT, 'five'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||||
|
[HtmlTokenType.EXPANSION_FORM_END],
|
||||||
|
[HtmlTokenType.EOF]
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should parse an expansion form with text elements surrounding it", () => {
|
||||||
|
expect(tokenizeAndHumanizeParts('before{one.two, three, =4 {four}}after', true))
|
||||||
|
.toEqual([
|
||||||
|
[HtmlTokenType.TEXT, "before"],
|
||||||
|
[HtmlTokenType.EXPANSION_FORM_START],
|
||||||
|
[HtmlTokenType.RAW_TEXT, 'one.two'],
|
||||||
|
[HtmlTokenType.RAW_TEXT, 'three'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||||
|
[HtmlTokenType.TEXT, 'four'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||||
|
[HtmlTokenType.EXPANSION_FORM_END],
|
||||||
|
[HtmlTokenType.TEXT, "after"],
|
||||||
|
[HtmlTokenType.EOF]
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should parse an expansion forms with elements in it", () => {
|
||||||
|
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four <b>a</b>}}', true))
|
||||||
|
.toEqual([
|
||||||
|
[HtmlTokenType.EXPANSION_FORM_START],
|
||||||
|
[HtmlTokenType.RAW_TEXT, 'one.two'],
|
||||||
|
[HtmlTokenType.RAW_TEXT, 'three'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||||
|
[HtmlTokenType.TEXT, 'four '],
|
||||||
|
[HtmlTokenType.TAG_OPEN_START, null, 'b'],
|
||||||
|
[HtmlTokenType.TAG_OPEN_END],
|
||||||
|
[HtmlTokenType.TEXT, 'a'],
|
||||||
|
[HtmlTokenType.TAG_CLOSE, null, 'b'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||||
|
[HtmlTokenType.EXPANSION_FORM_END],
|
||||||
|
[HtmlTokenType.EOF]
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should parse an expansion forms with interpolation in it", () => {
|
||||||
|
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four {{a}}}}', true))
|
||||||
|
.toEqual([
|
||||||
|
[HtmlTokenType.EXPANSION_FORM_START],
|
||||||
|
[HtmlTokenType.RAW_TEXT, 'one.two'],
|
||||||
|
[HtmlTokenType.RAW_TEXT, 'three'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_START],
|
||||||
|
[HtmlTokenType.TEXT, 'four {{a}}'],
|
||||||
|
[HtmlTokenType.EXPANSION_CASE_EXP_END],
|
||||||
|
[HtmlTokenType.EXPANSION_FORM_END],
|
||||||
|
[HtmlTokenType.EOF]
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('errors', () => {
|
describe('errors', () => {
|
||||||
it('should include 2 lines of context in message', () => {
|
it('should include 2 lines of context in message', () => {
|
||||||
let src = "111\n222\n333\nE\n444\n555\n666\n";
|
let src = "111\n222\n333\nE\n444\n555\n666\n";
|
||||||
|
@ -604,8 +676,9 @@ export function main() {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function tokenizeWithoutErrors(input: string): HtmlToken[] {
|
function tokenizeWithoutErrors(input: string,
|
||||||
var tokenizeResult = tokenizeHtml(input, 'someUrl');
|
tokenizeExpansionForms: boolean = false): HtmlToken[] {
|
||||||
|
var tokenizeResult = tokenizeHtml(input, 'someUrl', tokenizeExpansionForms);
|
||||||
if (tokenizeResult.errors.length > 0) {
|
if (tokenizeResult.errors.length > 0) {
|
||||||
var errorString = tokenizeResult.errors.join('\n');
|
var errorString = tokenizeResult.errors.join('\n');
|
||||||
throw new BaseException(`Unexpected parse errors:\n${errorString}`);
|
throw new BaseException(`Unexpected parse errors:\n${errorString}`);
|
||||||
|
@ -613,8 +686,9 @@ function tokenizeWithoutErrors(input: string): HtmlToken[] {
|
||||||
return tokenizeResult.tokens;
|
return tokenizeResult.tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
function tokenizeAndHumanizeParts(input: string): any[] {
|
function tokenizeAndHumanizeParts(input: string, tokenizeExpansionForms: boolean = false): any[] {
|
||||||
return tokenizeWithoutErrors(input).map(token => [<any>token.type].concat(token.parts));
|
return tokenizeWithoutErrors(input, tokenizeExpansionForms)
|
||||||
|
.map(token => [<any>token.type].concat(token.parts));
|
||||||
}
|
}
|
||||||
|
|
||||||
function tokenizeAndHumanizeSourceSpans(input: string): any[] {
|
function tokenizeAndHumanizeSourceSpans(input: string): any[] {
|
||||||
|
|
Loading…
Reference in New Issue