feat(i18n): implement an i18n-aware html parser

Closes #7738
This commit is contained in:
vsavkin 2016-03-23 13:45:04 -07:00 committed by Victor Savkin
parent 73a84a7098
commit d272f96e23
3 changed files with 577 additions and 4 deletions

View File

@ -57,6 +57,10 @@ class ParseException extends BaseException {
}
}
export class SplitInterpolation {
constructor(public strings: string[], public expressions: string[]) {}
}
@Injectable()
export class Parser {
/** @internal */
@ -118,6 +122,21 @@ export class Parser {
}
parseInterpolation(input: string, location: any): ASTWithSource {
let split = this.splitInterpolation(input, location);
if (split == null) return null;
let expressions = [];
for (let i = 0; i < split.expressions.length; ++i) {
var tokens = this._lexer.tokenize(split.expressions[i]);
var ast = new _ParseAST(input, location, tokens, this._reflector, false).parseChain();
expressions.push(ast);
}
return new ASTWithSource(new Interpolation(split.strings, expressions), input, location);
}
splitInterpolation(input: string, location: string): SplitInterpolation {
var parts = StringWrapper.split(input, INTERPOLATION_REGEXP);
if (parts.length <= 1) {
return null;
@ -131,16 +150,14 @@ export class Parser {
// fixed string
strings.push(part);
} else if (part.trim().length > 0) {
var tokens = this._lexer.tokenize(part);
var ast = new _ParseAST(input, location, tokens, this._reflector, false).parseChain();
expressions.push(ast);
expressions.push(part);
} else {
throw new ParseException('Blank expressions are not allowed in interpolated strings', input,
`at column ${this._findInterpolationErrorColumn(parts, i)} in`,
location);
}
}
return new ASTWithSource(new Interpolation(strings, expressions), input, location);
return new SplitInterpolation(strings, expressions);
}
wrapLiteralPrimitive(input: string, location: any): ASTWithSource {

View File

@ -0,0 +1,353 @@
import {HtmlParser, HtmlParseTreeResult} from 'angular2/src/compiler/html_parser';
import {ParseSourceSpan, ParseError} from 'angular2/src/compiler/parse_util';
import {
HtmlAst,
HtmlAstVisitor,
HtmlElementAst,
HtmlAttrAst,
HtmlTextAst,
HtmlCommentAst,
htmlVisitAll
} from 'angular2/src/compiler/html_ast';
import {ListWrapper, StringMapWrapper} from 'angular2/src/facade/collection';
import {RegExpWrapper, NumberWrapper, isPresent} from 'angular2/src/facade/lang';
import {BaseException} from 'angular2/src/facade/exceptions';
import {Parser} from 'angular2/src/core/change_detection/parser/parser';
import {Message, id} from './message';
import {
messageFromAttribute,
I18nError,
isI18nAttr,
partition,
Part,
stringifyNodes,
meaning
} from './shared';
const I18N_ATTR = "i18n";
const PLACEHOLDER_ELEMENT = "ph";
const NAME_ATTR = "name";
const I18N_ATTR_PREFIX = "i18n-";
let PLACEHOLDER_REGEXP = RegExpWrapper.create(`\\<ph(\\s)+name=("(\\d)+")\\/\\>`);
let PLACEHOLDER_EXPANDED_REGEXP = RegExpWrapper.create(`\\<ph(\\s)+name=("(\\d)+")\\>\\<\\/ph\\>`);
/**
* Creates an i18n-ed version of the parsed template.
*
* Algorithm:
*
* To understand the algorithm, you need to know how partitioning works.
* Partitioning is required as we can use two i18n comments to group node siblings together.
* That is why we cannot just use nodes.
*
* Partitioning transforms an array of HtmlAst into an array of Part.
* A part can optionally contain a root element or a root text node. And it can also contain
* children.
* A part can contain i18n property, in which case it needs to be transalted.
*
* Example:
*
* The following array of nodes will be split into four parts:
*
* ```
* <a>A</a>
* <b i18n>B</b>
* <!-- i18n -->
* <c>C</c>
* D
* <!-- /i18n -->
* E
* ```
*
* Part 1 containing the a tag. It should not be translated.
* Part 2 containing the b tag. It should be translated.
* Part 3 containing the c tag and the D text node. It should be translated.
* Part 4 containing the E text node. It should not be translated.
*
*
* It is also important to understand how we stringify nodes to create a message.
*
* We walk the tree and replace every element node with a placeholder. We also replace
* all expressions in interpolation with placeholders. We also insert a placeholder element
* to wrap a text node containing interpolation.
*
* Example:
*
* The following tree:
*
* ```
* <a>A{{I}}</a><b>B</b>
* ```
*
* will be stringified into:
* ```
* <ph name="e0"><ph name="t1">A<ph name="0"/></ph></ph><ph name="e2">B</ph>
* ```
*
* This is what the algorithm does:
*
* 1. Use the provided html parser to get the html AST of the template.
* 2. Partition the root nodes, and process each part separately.
* 3. If a part does not have the i18n attribute, recurse to process children and attributes.
* 4. If a part has the i18n attribute, merge the translated i18n part with the original tree.
*
* This is how the merging works:
*
* 1. Use the stringify function to get the message id. Look up the message in the map.
* 2. Parse the translated message. At this point we have two trees: the original tree
* and the translated tree, where all the elements are replaced with placeholders.
* 3. Use the original tree to create a mapping Index:number -> HtmlAst.
* 4. Walk the translated tree.
* 5. If we encounter a placeholder element, get is name property.
* 6. Get the type and the index of the node using the name property.
* 7. If the type is 'e', which means element, then:
* - translate the attributes of the original element
* - recurse to merge the children
* - create a new element using the original element name, original position,
* and translated children and attributes
* 8. If the type if 't', which means text, then:
* - get the list of expressions from the original node.
* - get the string version of the interpolation subtree
* - find all the placeholders in the translated message, and replace them with the
* corresponding original expressions
*/
export class I18nHtmlParser implements HtmlParser {
errors: ParseError[];
constructor(private _htmlParser: HtmlParser, private _parser: Parser,
private _messages: {[key: string]: string}) {}
parse(sourceContent: string, sourceUrl: string): HtmlParseTreeResult {
this.errors = [];
let res = this._htmlParser.parse(sourceContent, sourceUrl);
if (res.errors.length > 0) {
return res;
} else {
let nodes = this._recurse(res.rootNodes);
return this.errors.length > 0 ? new HtmlParseTreeResult([], this.errors) :
new HtmlParseTreeResult(nodes, []);
}
}
private _processI18nPart(p: Part): HtmlAst[] {
try {
return p.hasI18n ? this._mergeI18Part(p) : this._recurseIntoI18nPart(p);
} catch (e) {
if (e instanceof I18nError) {
this.errors.push(e);
return [];
} else {
throw e;
}
}
}
private _mergeI18Part(p: Part): HtmlAst[] {
let messageId = id(p.createMessage(this._parser));
if (!StringMapWrapper.contains(this._messages, messageId)) {
throw new I18nError(p.sourceSpan, `Cannot find message for id '${messageId}'`);
}
// get the message and expand a placeholder so <ph/> becomes <ph></ph>
// we need to do it cause we use HtmlParser to parse the message
let message = _expandPlaceholder(this._messages[messageId]);
let parsedMessage = this._htmlParser.parse(message, "source");
if (parsedMessage.errors.length > 0) {
this.errors = this.errors.concat(parsedMessage.errors);
return [];
} else {
return this._mergeTrees(p, message, parsedMessage.rootNodes, p.children);
}
}
private _recurseIntoI18nPart(p: Part): HtmlAst[] {
// we found an element without an i18n attribute
// we need to recurse in cause its children may have i18n set
// we also need to translate its attributes
if (isPresent(p.rootElement)) {
let root = p.rootElement;
let children = this._recurse(p.children);
let attrs = this._i18nAttributes(root);
return [
new HtmlElementAst(root.name, attrs, children, root.sourceSpan, root.startSourceSpan,
root.endSourceSpan)
];
// a text node without i18n or interpolation, nothing to do
} else if (isPresent(p.rootTextNode)) {
return [p.rootTextNode];
} else {
return this._recurse(p.children);
}
}
private _recurse(nodes: HtmlAst[]): HtmlAst[] {
let ps = partition(nodes, this.errors);
return ListWrapper.flatten(ps.map(p => this._processI18nPart(p)));
}
private _mergeTrees(p: Part, translatedSource: string, translated: HtmlAst[],
original: HtmlAst[]): HtmlAst[] {
let l = new _CreateNodeMapping();
htmlVisitAll(l, original);
// merge the translated tree with the original tree.
// we do it by preserving the source code position of the original tree
let merged = this._mergeTreesHelper(translatedSource, translated, l.mapping);
// if the root element is present, we need to create a new root element with its attributes
// translated
if (isPresent(p.rootElement)) {
let root = p.rootElement;
let attrs = this._i18nAttributes(root);
return [
new HtmlElementAst(root.name, attrs, merged, root.sourceSpan, root.startSourceSpan,
root.endSourceSpan)
];
// this should never happen with a part. Parts that have root text node should not be merged.
} else if (isPresent(p.rootTextNode)) {
throw new BaseException("should not be reached");
} else {
return merged;
}
}
private _mergeTreesHelper(translatedSource: string, translated: HtmlAst[],
mapping: HtmlAst[]): HtmlAst[] {
return translated.map(t => {
if (t instanceof HtmlElementAst) {
return this._mergeElementOrInterpolation(t, translatedSource, translated, mapping);
} else if (t instanceof HtmlTextAst) {
return t;
} else {
throw new BaseException("should not be reached");
}
});
}
private _mergeElementOrInterpolation(t: HtmlElementAst, translatedSource: string,
translated: HtmlAst[], mapping: HtmlAst[]): HtmlAst {
let name = this._getName(t);
let type = name[0];
let index = NumberWrapper.parseInt(name.substring(1), 10);
let originalNode = mapping[index];
if (type == "t") {
return this._mergeTextInterpolation(t, <HtmlTextAst>originalNode, translatedSource);
} else if (type == "e") {
return this._mergeElement(t, <HtmlElementAst>originalNode, mapping, translatedSource);
} else {
throw new BaseException("should not be reached");
}
}
private _getName(t: HtmlElementAst): string {
if (t.name != PLACEHOLDER_ELEMENT) {
throw new I18nError(
t.sourceSpan,
`Unexpected tag "${t.name}". Only "${PLACEHOLDER_ELEMENT}" tags are allowed.`);
}
let names = t.attrs.filter(a => a.name == NAME_ATTR);
if (names.length == 0) {
throw new I18nError(t.sourceSpan, `Missing "${NAME_ATTR}" attribute.`);
}
return names[0].value;
}
private _mergeTextInterpolation(t: HtmlElementAst, originalNode: HtmlTextAst,
translatedSource: string): HtmlTextAst {
let split =
this._parser.splitInterpolation(originalNode.value, originalNode.sourceSpan.toString());
let exps = isPresent(split) ? split.expressions : [];
let messageSubstring =
translatedSource.substring(t.startSourceSpan.end.offset, t.endSourceSpan.start.offset);
let translated =
this._replacePlaceholdersWithExpressions(messageSubstring, exps, originalNode.sourceSpan);
return new HtmlTextAst(translated, originalNode.sourceSpan);
}
private _mergeElement(t: HtmlElementAst, originalNode: HtmlElementAst, mapping: HtmlAst[],
translatedSource: string): HtmlElementAst {
let children = this._mergeTreesHelper(translatedSource, t.children, mapping);
return new HtmlElementAst(originalNode.name, this._i18nAttributes(originalNode), children,
originalNode.sourceSpan, originalNode.startSourceSpan,
originalNode.endSourceSpan);
}
private _i18nAttributes(el: HtmlElementAst): HtmlAttrAst[] {
let res = [];
el.attrs.forEach(attr => {
if (isI18nAttr(attr.name)) {
let messageId = id(messageFromAttribute(this._parser, el, attr));
let expectedName = attr.name.substring(5);
let m = el.attrs.filter(a => a.name == expectedName)[0];
if (StringMapWrapper.contains(this._messages, messageId)) {
let split = this._parser.splitInterpolation(m.value, m.sourceSpan.toString());
let exps = isPresent(split) ? split.expressions : [];
let message = this._replacePlaceholdersWithExpressions(
_expandPlaceholder(this._messages[messageId]), exps, m.sourceSpan);
res.push(new HtmlAttrAst(m.name, message, m.sourceSpan));
} else {
throw new I18nError(m.sourceSpan, `Cannot find message for id '${messageId}'`);
}
}
});
return res;
}
private _replacePlaceholdersWithExpressions(message: string, exps: string[],
sourceSpan: ParseSourceSpan): string {
return RegExpWrapper.replaceAll(PLACEHOLDER_EXPANDED_REGEXP, message, (match) => {
let nameWithQuotes = match[2];
let name = nameWithQuotes.substring(1, nameWithQuotes.length - 1);
let index = NumberWrapper.parseInt(name, 10);
return this._convertIntoExpression(index, exps, sourceSpan);
});
}
private _convertIntoExpression(index: number, exps: string[], sourceSpan: ParseSourceSpan) {
if (index >= 0 && index < exps.length) {
return `{{${exps[index]}}}`;
} else {
throw new I18nError(sourceSpan, `Invalid interpolation index '${index}'`);
}
}
}
class _CreateNodeMapping implements HtmlAstVisitor {
mapping: HtmlAst[] = [];
visitElement(ast: HtmlElementAst, context: any): any {
this.mapping.push(ast);
htmlVisitAll(this, ast.children);
return null;
}
visitAttr(ast: HtmlAttrAst, context: any): any { return null; }
visitText(ast: HtmlTextAst, context: any): any {
this.mapping.push(ast);
return null;
}
visitComment(ast: HtmlCommentAst, context: any): any { return ""; }
}
function _expandPlaceholder(input: string): string {
return RegExpWrapper.replaceAll(PLACEHOLDER_REGEXP, input, (match) => {
let nameWithQuotes = match[2];
return `<ph name=${nameWithQuotes}></ph>`;
});
}

View File

@ -0,0 +1,203 @@
import {
AsyncTestCompleter,
beforeEach,
ddescribe,
describe,
expect,
iit,
inject,
it,
xdescribe,
xit
} from 'angular2/testing_internal';
import {I18nHtmlParser} from 'angular2/src/i18n/i18n_html_parser';
import {Message, id} from 'angular2/src/i18n/message';
import {Parser} from 'angular2/src/core/change_detection/parser/parser';
import {Lexer} from 'angular2/src/core/change_detection/parser/lexer';
import {HtmlParser, HtmlParseTreeResult} from 'angular2/src/compiler/html_parser';
import {
HtmlAst,
HtmlAstVisitor,
HtmlElementAst,
HtmlAttrAst,
HtmlTextAst,
HtmlCommentAst,
htmlVisitAll
} from 'angular2/src/compiler/html_ast';
import {ParseError, ParseLocation} from 'angular2/src/compiler/parse_util';
import {humanizeDom} from '../../test/compiler/html_ast_spec_utils';
export function main() {
describe('I18nHtmlParser', () => {
function parse(template: string, messages: {[key: string]: string}): HtmlParseTreeResult {
var parser = new Parser(new Lexer());
let htmlParser = new HtmlParser();
return new I18nHtmlParser(htmlParser, parser, messages).parse(template, "someurl");
}
it("should delegate to the provided parser when no i18n", () => {
expect(humanizeDom(parse('<div>a</div>', {})))
.toEqual([[HtmlElementAst, 'div', 0], [HtmlTextAst, 'a', 1]]);
});
it("should replace attributes", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message("some message", "meaning", null))] = "another message";
expect(humanizeDom(parse("<div value='some message' i18n-value='meaning|comment'></div>",
translations)))
.toEqual([[HtmlElementAst, 'div', 0], [HtmlAttrAst, 'value', 'another message']]);
});
it("should replace elements with the i18n attr", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message("message", "meaning", null))] = "another message";
expect(humanizeDom(parse("<div i18n='meaning|desc'>message</div>", translations)))
.toEqual([[HtmlElementAst, 'div', 0], [HtmlTextAst, 'another message', 1]]);
});
it("should handle interpolation", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message('<ph name="0"/> and <ph name="1"/>', null, null))] =
'<ph name="1"/> or <ph name="0"/>';
expect(humanizeDom(parse("<div value='{{a}} and {{b}}' i18n-value></div>", translations)))
.toEqual([[HtmlElementAst, 'div', 0], [HtmlAttrAst, 'value', '{{b}} or {{a}}']]);
});
it("should handle nested html", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message('<ph name="e0">a</ph><ph name="e2">b</ph>', null, null))] =
'<ph name="e2">B</ph><ph name="e0">A</ph>';
expect(humanizeDom(parse('<div i18n><a>a</a><b>b</b></div>', translations)))
.toEqual([
[HtmlElementAst, 'div', 0],
[HtmlElementAst, 'b', 1],
[HtmlTextAst, 'B', 2],
[HtmlElementAst, 'a', 1],
[HtmlTextAst, 'A', 2],
]);
});
it("should support interpolation", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message(
'<ph name="e0">a</ph><ph name="e2"><ph name="t3">b<ph name="0"/></ph></ph>', null,
null))] = '<ph name="e2"><ph name="t3"><ph name="0"/>B</ph></ph><ph name="e0">A</ph>';
expect(humanizeDom(parse('<div i18n><a>a</a><b>b{{i}}</b></div>', translations)))
.toEqual([
[HtmlElementAst, 'div', 0],
[HtmlElementAst, 'b', 1],
[HtmlTextAst, '{{i}}B', 2],
[HtmlElementAst, 'a', 1],
[HtmlTextAst, 'A', 2],
]);
});
it("should i18n attributes of placeholder elements", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message('<ph name="e0">a</ph>', null, null))] = '<ph name="e0">A</ph>';
translations[id(new Message('b', null, null))] = 'B';
expect(humanizeDom(parse('<div i18n><a value="b" i18n-value>a</a></div>', translations)))
.toEqual([
[HtmlElementAst, 'div', 0],
[HtmlElementAst, 'a', 1],
[HtmlAttrAst, 'value', "B"],
[HtmlTextAst, 'A', 2],
]);
});
it('should extract from partitions', () => {
let translations: {[key: string]: string} = {};
translations[id(new Message('message1', 'meaning1', null))] = 'another message1';
translations[id(new Message('message2', 'meaning2', null))] = 'another message2';
let res = parse(`<!-- i18n: meaning1|desc1 -->message1<!-- /i18n --><!-- i18n: meaning2|desc2 -->message2<!-- /i18n -->`, translations);
expect(humanizeDom(res))
.toEqual([
[HtmlTextAst, 'another message1', 0],
[HtmlTextAst, 'another message2', 0],
]);
});
it("should preserve original positions", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message('<ph name="e0">a</ph><ph name="e2">b</ph>', null, null))] =
'<ph name="e2">B</ph><ph name="e0">A</ph>';
let res =
(<any>parse('<div i18n><a>a</a><b>b</b></div>', translations).rootNodes[0]).children;
expect(res[0].sourceSpan.start.offset).toEqual(18);
expect(res[1].sourceSpan.start.offset).toEqual(10);
});
describe("errors", () => {
it("should error when giving an invalid template", () => {
expect(humanizeErrors(parse("<a>a</b>", {}).errors))
.toEqual(['Unexpected closing tag "b"']);
});
it("should error when no matching message (attr)", () => {
let mid = id(new Message("some message", null, null));
expect(humanizeErrors(parse("<div value='some message' i18n-value></div>", {}).errors))
.toEqual([`Cannot find message for id '${mid}'`]);
});
it("should error when no matching message (text)", () => {
let mid = id(new Message("some message", null, null));
expect(humanizeErrors(parse("<div i18n>some message</div>", {}).errors))
.toEqual([`Cannot find message for id '${mid}'`]);
});
it("should error when message cannot be parsed", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message("some message", null, null))] = "<a>a</b>";
expect(humanizeErrors(parse("<div i18n>some message</div>", translations).errors))
.toEqual([`Unexpected closing tag "b"`]);
});
it("should error when a non-placeholder element appears in translation", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message("some message", null, null))] = "<a>a</a>";
expect(humanizeErrors(parse("<div i18n>some message</div>", translations).errors))
.toEqual([`Unexpected tag "a". Only "ph" tags are allowed.`]);
});
it("should error when a placeholder element does not have the name attribute", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message("some message", null, null))] = "<ph>a</ph>";
expect(humanizeErrors(parse("<div i18n>some message</div>", translations).errors))
.toEqual([`Missing "name" attribute.`]);
});
it("should error when no matching attribute", () => {
expect(humanizeErrors(parse("<div i18n-value></div>", {}).errors))
.toEqual([`Missing attribute 'value'.`]);
});
it("should error when the translation refers to an invalid expression", () => {
let translations: {[key: string]: string} = {};
translations[id(new Message('hi <ph name="0"/>', null, null))] = 'hi <ph name="99"/>';
expect(
humanizeErrors(parse("<div value='hi {{a}}' i18n-value></div>", translations).errors))
.toEqual(["Invalid interpolation index '99'"]);
});
});
});
}
function humanizeErrors(errors: ParseError[]): string[] {
return errors.map(error => error.msg);
}