refactor(i18n): message extractor

fixes #8802
This commit is contained in:
Victor Berchet 2016-07-01 17:29:54 -07:00
parent f7258ea52a
commit 2be50bdbb0
4 changed files with 581 additions and 21 deletions

View File

@ -0,0 +1,264 @@
/**
* @license
* Copyright Google Inc. All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
import {HtmlAst, HtmlAstVisitor, HtmlAttrAst, HtmlCommentAst, HtmlElementAst, HtmlExpansionAst, HtmlExpansionCaseAst, HtmlTextAst} from '../html_ast';
import {I18nError, I18N_ATTR_PREFIX, getI18nAttr, meaning, description, isOpeningComment, isClosingComment,} from './shared';
import {htmlVisitAll} from '@angular/compiler/src/html_ast';
export function extractAstMessages(
sourceAst: HtmlAst[], implicitTags: string[],
implicitAttrs: {[k: string]: string[]}): ExtractionResult {
const visitor = new _ExtractVisitor(implicitTags, implicitAttrs);
return visitor.extract(sourceAst);
}
export class ExtractionResult {
constructor(public messages: AstMessage[], public errors: I18nError[]) {}
}
class _ExtractVisitor implements HtmlAstVisitor {
// <el i18n>...</el>
private _inI18nNode = false;
private _depth: number = 0;
// <!--i18n-->...<!--/i18n-->
private _blockMeaningAndDesc: string;
private _blockChildren: HtmlAst[];
private _blockStartDepth: number;
private _inI18nBlock: boolean;
// {<icu message>}
private _inIcu = false;
private _sectionStartIndex: number;
private _errors: I18nError[];
constructor(private _implicitTags: string[], private _implicitAttrs: {[k: string]: string[]}) {}
extract(source: HtmlAst[]): ExtractionResult {
const messages: AstMessage[] = [];
this._inI18nBlock = false;
this._inI18nNode = false;
this._depth = 0;
this._inIcu = false;
this._sectionStartIndex = void 0;
this._errors = [];
source.forEach(node => node.visit(this, messages));
if (this._inI18nBlock) {
this._reportError(source[source.length - 1], 'Unclosed block');
}
return new ExtractionResult(messages, this._errors);
}
visitExpansionCase(part: HtmlExpansionCaseAst, messages: AstMessage[]): any {
htmlVisitAll(this, part.expression, messages);
}
visitExpansion(icu: HtmlExpansionAst, messages: AstMessage[]): any {
this._mayBeAddBlockChildren(icu);
const wasInIcu = this._inIcu;
if (!this._inIcu) {
if (this._inI18nNode || this._inI18nBlock) {
this._addMessage(messages, [icu]);
}
this._inIcu = true;
}
htmlVisitAll(this, icu.cases, messages);
this._inIcu = wasInIcu;
}
visitComment(comment: HtmlCommentAst, messages: AstMessage[]): any {
const isOpening = isOpeningComment(comment);
if (isOpening && (this._inI18nBlock || this._inI18nNode)) {
this._reportError(comment, 'Could not start a block inside a translatable section');
return;
}
const isClosing = isClosingComment(comment);
if (isClosing && !this._inI18nBlock) {
this._reportError(comment, 'Trying to close an unopened block');
return;
}
if (!(this._inI18nNode || this._inIcu)) {
if (!this._inI18nBlock) {
if (isOpening) {
this._inI18nBlock = true;
this._blockStartDepth = this._depth;
this._blockChildren = [];
this._blockMeaningAndDesc = comment.value.replace(/^i18n:?/, '').trim();
this._startSection(messages);
}
} else {
if (isClosing) {
if (this._depth == this._blockStartDepth) {
this._endSection(messages, this._blockChildren);
this._inI18nBlock = false;
this._addMessage(messages, this._blockChildren, this._blockMeaningAndDesc);
} else {
this._reportError(comment, 'I18N blocks should not cross element boundaries');
return;
}
}
}
}
}
visitText(text: HtmlTextAst, messages: AstMessage[]): any { this._mayBeAddBlockChildren(text); }
visitElement(el: HtmlElementAst, messages: AstMessage[]): any {
this._mayBeAddBlockChildren(el);
this._depth++;
const wasInI18nNode = this._inI18nNode;
let useSection = false;
// Extract only top level nodes with the (implicit) "i18n" attribute if not in a block or an ICU
// message
const i18nAttr = getI18nAttr(el);
const isImplicitI18n =
this._implicitTags.some((tagName: string): boolean => el.name === tagName);
if (!(this._inI18nNode || this._inIcu || this._inI18nBlock)) {
if (i18nAttr) {
this._inI18nNode = true;
this._addMessage(messages, el.children, i18nAttr.value);
useSection = true;
} else if (isImplicitI18n) {
this._inI18nNode = true;
this._addMessage(messages, el.children);
}
} else {
if (i18nAttr || isImplicitI18n) {
// TODO(vicb): we should probably allow nested implicit element (ie <div>)
this._reportError(
el, 'Could not mark an element as translatable inside a translatable section');
}
}
this._extractFromAttributes(el, messages);
if (useSection) {
this._startSection(messages);
htmlVisitAll(this, el.children, messages);
this._endSection(messages, el.children);
} else {
htmlVisitAll(this, el.children, messages);
}
this._depth--;
this._inI18nNode = wasInI18nNode;
}
visitAttr(ast: HtmlAttrAst, messages: AstMessage[]): any { throw new Error('unreachable code'); }
private _extractFromAttributes(el: HtmlElementAst, messages: AstMessage[]): void {
const explicitAttrNameToValue: Map<string, string> = new Map();
const implicitAttrNames: string[] = this._implicitAttrs[el.name] || [];
el.attrs.filter(attr => attr.name.startsWith(I18N_ATTR_PREFIX))
.forEach(
attr => explicitAttrNameToValue.set(
attr.name.substring(I18N_ATTR_PREFIX.length), attr.value));
el.attrs.forEach(attr => {
if (explicitAttrNameToValue.has(attr.name)) {
this._addMessage(messages, [attr], explicitAttrNameToValue.get(attr.name));
} else if (implicitAttrNames.some(name => attr.name === name)) {
this._addMessage(messages, [attr]);
}
});
}
private _addMessage(messages: AstMessage[], ast: HtmlAst[], meaningAndDesc?: string): void {
if (ast.length == 0 ||
ast.length == 1 && ast[0] instanceof HtmlAttrAst && !(<HtmlAttrAst>ast[0]).value) {
// Do not create empty messages
return;
}
messages.push(new AstMessage(ast, meaning(meaningAndDesc), description(meaningAndDesc)));
}
/**
* Add the node as a child of the block when:
* - we are in a block,
* - we are not inside a ICU message (those are handled separately),
* - the node is a "direct child" of the block
*/
private _mayBeAddBlockChildren(ast: HtmlAst): void {
if (this._inI18nBlock && !this._inIcu && this._depth == this._blockStartDepth) {
this._blockChildren.push(ast);
}
}
/**
* Marks the start of a section, see `_endSection`
*/
private _startSection(messages: AstMessage[]): void {
if (this._sectionStartIndex !== void 0) {
throw new Error('Unexpected section start');
}
this._sectionStartIndex = messages.length;
}
/**
* Terminates a section.
*
* If a section has only one significant children (comments not significant) then we should not
* keep the message
* from this children:
*
* `<p i18n="meaning|description">{ICU message}</p>` would produce two messages:
* - one for the <p> content with meaning and description,
* - another one for the ICU message.
*
* In this case the last message is discarded as it contains less information (the AST is
* otherwise identical).
*
* Note that we should still keep messages extracted from attributes inside the section (ie in the
* ICU message here)
*/
private _endSection(messages: AstMessage[], directChildren: HtmlAst[]): void {
if (this._sectionStartIndex === void 0) {
throw new Error('Unexpected section end');
}
const startIndex = this._sectionStartIndex;
const significantChildren: number = directChildren.reduce(
(count: number, node: HtmlAst): number => count + (node instanceof HtmlCommentAst ? 0 : 1),
0);
if (significantChildren == 1) {
for (let i = startIndex; i < messages.length; i++) {
let ast = messages[i].ast;
if (!(ast.length == 1 && ast[0] instanceof HtmlAttrAst)) {
messages.splice(i, 1);
break;
}
}
}
this._sectionStartIndex = void 0;
}
private _reportError(astNode: HtmlAst, msg: string): void {
this._errors.push(new I18nError(astNode.sourceSpan, msg));
}
}
export class AstMessage {
constructor(public ast: HtmlAst[], public meaning: string, public description: string) {}
}

View File

@ -6,11 +6,13 @@
* found in the LICENSE file at https://angular.io/license
*/
import {normalizeBlank} from '../../../router-deprecated/src/facade/lang';
import {Parser as ExpressionParser} from '../expression_parser/parser';
import {StringWrapper, isBlank, isPresent} from '../facade/lang';
import {HtmlAst, HtmlAstVisitor, HtmlAttrAst, HtmlCommentAst, HtmlElementAst, HtmlExpansionAst, HtmlExpansionCaseAst, HtmlTextAst, htmlVisitAll} from '../html_ast';
import {InterpolationConfig} from '../interpolation_config';
import {ParseError, ParseSourceSpan} from '../parse_util';
import {Message} from './message';
export const I18N_ATTR = 'i18n';
@ -31,10 +33,10 @@ export function partition(nodes: HtmlAst[], errors: ParseError[], implicitTags:
let node = nodes[i];
let msgNodes: HtmlAst[] = [];
// Nodes between `<!-- i18n -->` and `<!-- /i18n -->`
if (_isOpeningComment(node)) {
if (isOpeningComment(node)) {
let i18n = (<HtmlCommentAst>node).value.replace(/^i18n:?/, '').trim();
while (++i < nodes.length && !_isClosingComment(nodes[i])) {
while (++i < nodes.length && !isClosingComment(nodes[i])) {
msgNodes.push(nodes[i]);
}
@ -46,7 +48,7 @@ export function partition(nodes: HtmlAst[], errors: ParseError[], implicitTags:
parts.push(new Part(null, null, msgNodes, i18n, true));
} else if (node instanceof HtmlElementAst) {
// Node with an `i18n` attribute
let i18n = _findI18nAttr(node);
let i18n = getI18nAttr(node);
let hasI18n: boolean = isPresent(i18n) || implicitTags.indexOf(node.name) > -1;
parts.push(new Part(node, null, node.children, isPresent(i18n) ? i18n.value : null, hasI18n));
} else if (node instanceof HtmlTextAst) {
@ -83,33 +85,27 @@ export class Part {
}
}
function _isOpeningComment(n: HtmlAst): boolean {
export function isOpeningComment(n: HtmlAst): boolean {
return n instanceof HtmlCommentAst && isPresent(n.value) && n.value.startsWith('i18n');
}
function _isClosingComment(n: HtmlAst): boolean {
export function isClosingComment(n: HtmlAst): boolean {
return n instanceof HtmlCommentAst && isPresent(n.value) && n.value === '/i18n';
}
function _findI18nAttr(p: HtmlElementAst): HtmlAttrAst {
let attrs = p.attrs;
for (let i = 0; i < attrs.length; i++) {
if (attrs[i].name === I18N_ATTR) {
return attrs[i];
}
}
return null;
export function getI18nAttr(p: HtmlElementAst): HtmlAttrAst {
return normalizeBlank(p.attrs.find(attr => attr.name === I18N_ATTR));
}
export function meaning(i18n: string): string {
if (isBlank(i18n) || i18n == '') return null;
if (isBlank(i18n) || i18n == '') return '';
return i18n.split('|')[0];
}
export function description(i18n: string): string {
if (isBlank(i18n) || i18n == '') return null;
if (isBlank(i18n) || i18n == '') return '';
let parts = i18n.split('|', 2);
return parts.length > 1 ? parts[1] : null;
return parts.length > 1 ? parts[1] : '';
}
/**

View File

@ -3,7 +3,7 @@ import {HtmlParser} from '@angular/compiler/src/html_parser';
import {beforeEach, ddescribe, describe, expect, it} from '@angular/core/testing/testing_internal';
export function main() {
ddescribe('HtmlAst serilaizer', () => {
describe('HtmlAst serilaizer', () => {
var parser: HtmlParser;
beforeEach(() => { parser = new HtmlParser(); });
@ -54,7 +54,7 @@ export function main() {
class _SerializerVisitor implements HtmlAstVisitor {
visitElement(ast: HtmlElementAst, context: any): any {
return `<${ast.name}${this._visitAll(ast.attrs)}>${this._visitAll(ast.children)}</${ast.name}>`;
return `<${ast.name}${this._visitAll(ast.attrs, ' ')}>${this._visitAll(ast.children)}</${ast.name}>`;
}
visitAttr(ast: HtmlAttrAst, context: any): any { return `${ast.name}="${ast.value}"`; }
@ -71,11 +71,16 @@ class _SerializerVisitor implements HtmlAstVisitor {
return ` ${ast.value} {${this._visitAll(ast.expression)}}`;
}
private _visitAll(ast: HtmlAst[]) { return ast.map(a => a.visit(this, null)).join(''); }
private _visitAll(ast: HtmlAst[], join: string = ''): string {
if (ast.length == 0) {
return '';
}
return join + ast.map(a => a.visit(this, null)).join(join);
}
}
const serializerVisitor = new _SerializerVisitor();
export function serializeHtmlAst(ast: HtmlAst[]) {
export function serializeHtmlAst(ast: HtmlAst[]): string[] {
return ast.map(a => a.visit(serializerVisitor, null));
}

View File

@ -0,0 +1,295 @@
/**
* @license
* Copyright Google Inc. All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
import {HtmlParser} from '@angular/compiler/src/html_parser';
import {ExtractionResult, extractAstMessages} from '@angular/compiler/src/i18n/extractor';
import {beforeEach, ddescribe, describe, expect, iit, inject, it, xdescribe, xit} from '@angular/core/testing/testing_internal';
import {serializeHtmlAst} from '../html_ast_serializer_spec'
export function main() {
ddescribe(
'MessageExtractor',
() => {
function getExtractionResult(
html: string, implicitTags: string[],
implicitAttrs: {[k: string]: string[]}): ExtractionResult {
const htmlParser = new HtmlParser();
const parseResult = htmlParser.parse(html, 'extractor spec', true);
if (parseResult.errors.length > 1) {
throw Error(`unexpected parse errors: ${parseResult.errors.join('\n')}`);
}
return extractAstMessages(parseResult.rootNodes, implicitTags, implicitAttrs);
}
function extract(
html: string, implicitTags: string[] = [],
implicitAttrs: {[k: string]: string[]} = {}): [string[], string, string][] {
const messages = getExtractionResult(html, implicitTags, implicitAttrs).messages;
// clang-format off
// https://github.com/angular/clang-format/issues/35
return messages.map(
message => [serializeHtmlAst(message.ast), message.meaning, message.description, ]) as [string[], string, string][];
// clang-format on
}
function extractErrors(
html: string, implicitTags: string[] = [],
implicitAttrs: {[k: string]: string[]} = {}): any[] {
const errors = getExtractionResult(html, implicitTags, implicitAttrs).errors;
return errors.map((e): [string, string] => [e.msg, e.span.toString()]);
}
describe('elements', () => {
it('should extract from elements', () => {
expect(extract('<div i18n="m|d">text<span>nested</span></div>')).toEqual([
[['text', '<span>nested</span>'], 'm', 'd'],
]);
});
it('should not create a message for empty elements',
() => { expect(extract('<div i18n="m|d"></div>')).toEqual([]); });
});
describe('blocks', () => {
it('should extract from blocks', () => {
expect(extract(`<!-- i18n: meaning1|desc1 -->message1<!-- /i18n -->
<!-- i18n: meaning2 -->message2<!-- /i18n -->
<!-- i18n -->message3<!-- /i18n -->`))
.toEqual([
[['message1'], 'meaning1', 'desc1'],
[['message2'], 'meaning2', ''],
[['message3'], '', ''],
]);
});
it('should extract all siblings', () => {
expect(
extract(
`<!-- i18n -->text<p>html<b>nested</b></p>{count, plural, =0 {<span>html</span>}}{{interp}}<!-- /i18n -->`))
.toEqual([
[['{count, plural, =0 {<span>html</span>}}'], '', ''],
[
[
'text', '<p>html<b>nested</b></p>', '{count, plural, =0 {<span>html</span>}}',
'{{interp}}'
],
'', ''
],
]);
});
it('should ignore other comments', () => {
expect(extract(`<!-- i18n: meaning1|desc1 --><!-- other -->message1<!-- /i18n -->`))
.toEqual([
[['message1'], 'meaning1', 'desc1'],
]);
});
it('should not create a message for empty blocks',
() => { expect(extract(`<!-- i18n: meaning1|desc1 --><!-- /i18n -->`)).toEqual([]); });
});
describe('ICU messages', () => {
it('should extract ICU messages from translatable elements', () => {
// single message when ICU is the only children
expect(extract('<div i18n="m|d">{count, plural, =0 {text}}</div>')).toEqual([
[['{count, plural, =0 {text}}'], 'm', 'd'],
]);
// one message for the element content and one message for the ICU
expect(extract('<div i18n="m|d">before{count, plural, =0 {text}}after</div>')).toEqual([
[['before', '{count, plural, =0 {text}}', 'after'], 'm', 'd'],
[['{count, plural, =0 {text}}'], '', ''],
]);
});
it('should extract ICU messages from translatable block', () => {
// single message when ICU is the only children
expect(extract('<!-- i18n:m|d -->{count, plural, =0 {text}}<!-- /i18n -->')).toEqual([
[['{count, plural, =0 {text}}'], 'm', 'd'],
]);
// one message for the block content and one message for the ICU
expect(extract('<!-- i18n:m|d -->before{count, plural, =0 {text}}after<!-- /i18n -->'))
.toEqual([
[['{count, plural, =0 {text}}'], '', ''],
[['before', '{count, plural, =0 {text}}', 'after'], 'm', 'd'],
]);
});
it('should not extract ICU messages outside of i18n sections',
() => { expect(extract('{count, plural, =0 {text}}')).toEqual([]); });
it('should not extract nested ICU messages', () => {
expect(extract('<div i18n="m|d">{count, plural, =0 { {sex, gender, =m {m}} }}</div>'))
.toEqual([
[['{count, plural, =0 {{sex, gender, =m {m}} }}'], 'm', 'd'],
]);
});
});
describe('attributes', () => {
it('should extract from attributes outside of translatable section', () => {
expect(extract('<div i18n-title="m|d" title="msg"></div>')).toEqual([
[['title="msg"'], 'm', 'd'],
]);
});
it('should extract from attributes in translatable element', () => {
expect(extract('<div i18n><p><b i18n-title="m|d" title="msg"></b></p></div>')).toEqual([
[['<p><b i18n-title="m|d" title="msg"></b></p>'], '', ''],
[['title="msg"'], 'm', 'd'],
]);
});
it('should extract from attributes in translatable block', () => {
expect(
extract('<!-- i18n --><p><b i18n-title="m|d" title="msg"></b></p><!-- /i18n -->'))
.toEqual([
[['title="msg"'], 'm', 'd'],
[['<p><b i18n-title="m|d" title="msg"></b></p>'], '', ''],
]);
});
it('should extract from attributes in translatable ICU', () => {
expect(
extract(
'<!-- i18n -->{count, plural, =0 {<p><b i18n-title="m|d" title="msg"></b></p>}}<!-- /i18n -->'))
.toEqual([
[['title="msg"'], 'm', 'd'],
[['{count, plural, =0 {<p><b i18n-title="m|d" title="msg"></b></p>}}'], '', ''],
]);
});
it('should extract from attributes in non translatable ICU', () => {
expect(extract('{count, plural, =0 {<p><b i18n-title="m|d" title="msg"></b></p>}}'))
.toEqual([
[['title="msg"'], 'm', 'd'],
]);
});
it('should not create a message for empty attributes',
() => { expect(extract('<div i18n-title="m|d" title></div>')).toEqual([]); });
});
describe('implicit tags', () => {
it('should extract from implicit tags', () => {
expect(extract('<b>bold</b><i>italic</i>', ['b'])).toEqual([
[['bold'], '', ''],
]);
});
});
describe('implicit attributes', () => {
it('should extract implicit attributes', () => {
expect(extract('<b title="bb">bold</b><i title="ii">italic</i>', [], {'b': ['title']}))
.toEqual([
[['title="bb"'], '', ''],
]);
});
});
describe('errors', () => {
describe('elements', () => {
it('should report nested translatable elements', () => {
expect(extractErrors(`<p i18n><b i18n></b></p>`)).toEqual([
[
'Could not mark an element as translatable inside a translatable section',
'<b i18n>'
],
]);
});
it('should report translatable elements in implicit elements', () => {
expect(extractErrors(`<p><b i18n></b></p>`, ['p'])).toEqual([
[
'Could not mark an element as translatable inside a translatable section',
'<b i18n>'
],
]);
});
it('should report translatable elements in translatable blocks', () => {
expect(extractErrors(`<!-- i18n --><b i18n></b><!-- /i18n -->`)).toEqual([
[
'Could not mark an element as translatable inside a translatable section',
'<b i18n>'
],
]);
});
});
describe('blocks', () => {
it('should report nested blocks', () => {
expect(extractErrors(`<!-- i18n --><!-- i18n --><!-- /i18n --><!-- /i18n -->`))
.toEqual([
['Could not start a block inside a translatable section', '<!--'],
['Trying to close an unopened block', '<!--'],
]);
});
it('should report unclosed blocks', () => {
expect(extractErrors(`<!-- i18n -->`)).toEqual([
['Unclosed block', '<!--'],
]);
});
it('should report translatable blocks in translatable elements', () => {
expect(extractErrors(`<p i18n><!-- i18n --><!-- /i18n --></p>`)).toEqual([
['Could not start a block inside a translatable section', '<!--'],
['Trying to close an unopened block', '<!--'],
]);
});
it('should report translatable blocks in implicit elements', () => {
expect(extractErrors(`<p><!-- i18n --><!-- /i18n --></p>`, ['p'])).toEqual([
['Could not start a block inside a translatable section', '<!--'],
['Trying to close an unopened block', '<!--'],
]);
});
it('should report when start and end of a block are not at the same level', () => {
expect(extractErrors(`<!-- i18n --><p><!-- /i18n --></p>`)).toEqual([
['I18N blocks should not cross element boundaries', '<!--'],
['Unclosed block', '<p>'],
]);
expect(extractErrors(`<p><!-- i18n --></p><!-- /i18n -->`)).toEqual([
['I18N blocks should not cross element boundaries', '<!--'],
['Unclosed block', '<!--'],
]);
});
});
describe('implicit elements', () => {
it('should report nested implicit elements', () => {
expect(extractErrors(`<p><b></b></p>`, ['p', 'b'])).toEqual([
['Could not mark an element as translatable inside a translatable section', '<b>'],
]);
});
it('should report implicit element in translatable element', () => {
expect(extractErrors(`<p i18n><b></b></p>`, ['b'])).toEqual([
['Could not mark an element as translatable inside a translatable section', '<b>'],
]);
});
it('should report implicit element in translatable blocks', () => {
expect(extractErrors(`<!-- i18n --><b></b><!-- /i18n -->`, ['b'])).toEqual([
['Could not mark an element as translatable inside a translatable section', '<b>'],
]);
});
});
});
});
}