From 095db673c50ba1457a6bfb63e8cc2b993106ce03 Mon Sep 17 00:00:00 2001 From: vsavkin Date: Mon, 14 Mar 2016 10:51:23 -0700 Subject: [PATCH] feat(i18n): implement a simple version of message extractor Closes #7454 --- modules/angular2/src/compiler/html_parser.ts | 2 +- .../angular2/src/i18n/message_extractor.ts | 261 ++++++++++++++++++ .../test/i18n/message_extractor_spec.ts | 162 +++++++++++ 3 files changed, 424 insertions(+), 1 deletion(-) create mode 100644 modules/angular2/src/i18n/message_extractor.ts create mode 100644 modules/angular2/test/i18n/message_extractor_spec.ts diff --git a/modules/angular2/src/compiler/html_parser.ts b/modules/angular2/src/compiler/html_parser.ts index bdce900fb6..d1eb270c84 100644 --- a/modules/angular2/src/compiler/html_parser.ts +++ b/modules/angular2/src/compiler/html_parser.ts @@ -102,7 +102,7 @@ class TreeBuilder { var text = this._advanceIf(HtmlTokenType.RAW_TEXT); this._advanceIf(HtmlTokenType.COMMENT_END); var value = isPresent(text) ? text.parts[0].trim() : null; - this._addToParent(new HtmlCommentAst(value, token.sourceSpan)) + this._addToParent(new HtmlCommentAst(value, token.sourceSpan)); } private _consumeText(token: HtmlToken) { diff --git a/modules/angular2/src/i18n/message_extractor.ts b/modules/angular2/src/i18n/message_extractor.ts new file mode 100644 index 0000000000..9fdf0d2b7e --- /dev/null +++ b/modules/angular2/src/i18n/message_extractor.ts @@ -0,0 +1,261 @@ +import {HtmlParser} from 'angular2/src/compiler/html_parser'; +import {ParseSourceSpan, ParseError} from 'angular2/src/compiler/parse_util'; +import { + HtmlAst, + HtmlAstVisitor, + HtmlElementAst, + HtmlAttrAst, + HtmlTextAst, + HtmlCommentAst, + htmlVisitAll +} from 'angular2/src/compiler/html_ast'; +import {isPresent, isBlank} from 'angular2/src/facade/lang'; +import {StringMapWrapper} from 'angular2/src/facade/collection'; +import {Parser} from 'angular2/src/core/change_detection/parser/parser'; +import {Interpolation} from 'angular2/src/core/change_detection/parser/ast'; + +const I18N_ATTR = "i18n"; +const I18N_ATTR_PREFIX = "i18n-"; + +/** + * A message extracted from a template. + * + * The identity of a message is comprised of `content` and `meaning`. + * + * `description` is additional information provided to the translator. + */ +export class Message { + constructor(public content: string, public meaning: string, public description: string) {} +} + +/** + * All messages extracted from a template. + */ +export class ExtractionResult { + constructor(public messages: Message[], public errors: ParseError[]) {} +} + +/** + * An extraction error. + */ +export class I18nExtractionError extends ParseError { + constructor(span: ParseSourceSpan, msg: string) { super(span, msg); } +} + +/** + * Removes duplicate messages. + * + * E.g. + * + * ``` + * var m = [new Message("message", "meaning", "desc1"), new Message("message", "meaning", + * "desc2")]; + * expect(removeDuplicates(m)).toEqual([new Message("message", "meaning", "desc1")]); + * ``` + */ +export function removeDuplicates(messages: Message[]): Message[] { + let uniq: {[key: string]: Message} = {}; + messages.forEach(m => { + let key = `$ng__${m.meaning}__|${m.content}`; + if (!StringMapWrapper.contains(uniq, key)) { + uniq[key] = m; + } + }); + return StringMapWrapper.values(uniq); +} + +/** + * Extracts all messages from a template. + * + * It works like this. First, the extractor uses the provided html parser to get + * the html AST of the template. Then it partitions the root nodes into parts. + * Everything between two i18n comments becomes a single part. Every other nodes becomes + * a part too. + * + * We process every part as follows. Say we have a part A. + * + * If the part has the i18n attribute, it gets converted into a message. + * And we do not recurse into that part, except to extract messages from the attributes. + * + * If the part doesn't have the i18n attribute, we recurse into that part and + * partition its children. + * + * While walking the AST we also remove i18n attributes from messages. + */ +export class MessageExtractor { + messages: Message[]; + errors: ParseError[]; + + constructor(private _htmlParser: HtmlParser, private _parser: Parser) {} + + extract(template: string, sourceUrl: string): ExtractionResult { + this.messages = []; + this.errors = []; + + let res = this._htmlParser.parse(template, sourceUrl); + if (res.errors.length > 0) { + return new ExtractionResult([], res.errors); + } else { + let ps = this._partition(res.rootNodes); + ps.forEach(p => this._extractMessagesFromPart(p)); + return new ExtractionResult(this.messages, this.errors); + } + } + + private _extractMessagesFromPart(p: _Part): void { + if (p.hasI18n) { + this.messages.push(new Message(_stringifyNodes(p.children, this._parser), _meaning(p.i18n), + _description(p.i18n))); + this._recurseToExtractMessagesFromAttributes(p.children); + } else { + this._recurse(p.children); + } + + if (isPresent(p.rootElement)) { + this._extractMessagesFromAttributes(p.rootElement); + } + } + + private _recurse(nodes: HtmlAst[]): void { + let ps = this._partition(nodes); + ps.forEach(p => this._extractMessagesFromPart(p)); + } + + private _recurseToExtractMessagesFromAttributes(nodes: HtmlAst[]): void { + nodes.forEach(n => { + if (n instanceof HtmlElementAst) { + this._extractMessagesFromAttributes(n); + this._recurseToExtractMessagesFromAttributes(n.children); + } + }); + } + + private _extractMessagesFromAttributes(p: HtmlElementAst): void { + p.attrs.forEach(attr => { + if (attr.name.startsWith(I18N_ATTR_PREFIX)) { + let expectedName = attr.name.substring(5); + let matching = p.attrs.filter(a => a.name == expectedName); + + if (matching.length > 0) { + let value = _removeInterpolation(matching[0].value, p.sourceSpan, this._parser); + this.messages.push(new Message(value, _meaning(attr.value), _description(attr.value))); + } else { + this.errors.push( + new I18nExtractionError(p.sourceSpan, `Missing attribute '${expectedName}'.`)); + } + } + }); + } + + // Man, this is so ugly! + private _partition(nodes: HtmlAst[]): _Part[] { + let res = []; + + for (let i = 0; i < nodes.length; ++i) { + let n = nodes[i]; + let temp = []; + if (_isOpeningComment(n)) { + let i18n = (n).value.substring(5).trim(); + i++; + while (!_isClosingComment(nodes[i])) { + temp.push(nodes[i++]); + if (i === nodes.length) { + this.errors.push( + new I18nExtractionError(n.sourceSpan, "Missing closing 'i18n' comment.")); + break; + } + } + res.push(new _Part(null, temp, i18n, true)); + + } else if (n instanceof HtmlElementAst) { + let i18n = _findI18nAttr(n); + res.push(new _Part(n, n.children, isPresent(i18n) ? i18n.value : null, isPresent(i18n))); + } + } + + return res; + } +} + +class _Part { + constructor(public rootElement: HtmlElementAst, public children: HtmlAst[], public i18n: string, + public hasI18n: boolean) {} +} + +function _isOpeningComment(n: HtmlAst): boolean { + return n instanceof HtmlCommentAst && isPresent(n.value) && n.value.startsWith("i18n:"); +} + +function _isClosingComment(n: HtmlAst): boolean { + return n instanceof HtmlCommentAst && isPresent(n.value) && n.value == "/i18n"; +} + +function _stringifyNodes(nodes: HtmlAst[], parser: Parser) { + let visitor = new _StringifyVisitor(parser); + return htmlVisitAll(visitor, nodes).join(""); +} + +class _StringifyVisitor implements HtmlAstVisitor { + constructor(private _parser: Parser) {} + + visitElement(ast: HtmlElementAst, context: any): any { + let attrs = this._join(htmlVisitAll(this, ast.attrs), " "); + let children = this._join(htmlVisitAll(this, ast.children), ""); + return `<${ast.name} ${attrs}>${children}`; + } + + visitAttr(ast: HtmlAttrAst, context: any): any { + if (ast.name.startsWith(I18N_ATTR_PREFIX)) { + return ""; + } else { + return `${ast.name}="${ast.value}"`; + } + } + + visitText(ast: HtmlTextAst, context: any): any { + return _removeInterpolation(ast.value, ast.sourceSpan, this._parser); + } + + visitComment(ast: HtmlCommentAst, context: any): any { return ""; } + + private _join(strs: string[], str: string): string { + return strs.filter(s => s.length > 0).join(str); + } +} + +function _removeInterpolation(value: string, source: ParseSourceSpan, parser: Parser): string { + try { + let parsed = parser.parseInterpolation(value, source.toString()); + if (isPresent(parsed)) { + let ast: Interpolation = parsed.ast; + let res = ""; + for (let i = 0; i < ast.strings.length; ++i) { + res += ast.strings[i]; + if (i != ast.strings.length - 1) { + res += `{{I${i}}}`; + } + } + return res; + } else { + return value; + } + } catch (e) { + return value; + } +} + +function _findI18nAttr(p: HtmlElementAst): HtmlAttrAst { + let i18n = p.attrs.filter(a => a.name == I18N_ATTR); + return i18n.length == 0 ? null : i18n[0]; +} + +function _meaning(i18n: string): string { + if (isBlank(i18n) || i18n == "") return null; + return i18n.split("|")[0]; +} + +function _description(i18n: string): string { + if (isBlank(i18n) || i18n == "") return null; + let parts = i18n.split("|"); + return parts.length > 1 ? parts[1] : null; +} \ No newline at end of file diff --git a/modules/angular2/test/i18n/message_extractor_spec.ts b/modules/angular2/test/i18n/message_extractor_spec.ts new file mode 100644 index 0000000000..ab15741ce0 --- /dev/null +++ b/modules/angular2/test/i18n/message_extractor_spec.ts @@ -0,0 +1,162 @@ +import { + AsyncTestCompleter, + beforeEach, + ddescribe, + describe, + expect, + iit, + inject, + it, + xdescribe, + xit +} from 'angular2/testing_internal'; + +import {HtmlParser} from 'angular2/src/compiler/html_parser'; +import {MessageExtractor, Message, removeDuplicates} from 'angular2/src/i18n/message_extractor'; +import {Parser} from 'angular2/src/core/change_detection/parser/parser'; +import {Lexer} from 'angular2/src/core/change_detection/parser/lexer'; + +export function main() { + describe('MessageExtractor', () => { + let extractor: MessageExtractor; + + beforeEach(() => { + let htmlParser = new HtmlParser(); + var parser = new Parser(new Lexer()); + extractor = new MessageExtractor(htmlParser, parser); + }); + + it('should extract from elements with the i18n attr', () => { + let res = extractor.extract("
message
", "someurl"); + expect(res.messages).toEqual([new Message("message", 'meaning', 'desc')]); + }); + + it('should extract from elements with the i18n attr without a desc', () => { + let res = extractor.extract("
message
", "someurl"); + expect(res.messages).toEqual([new Message("message", 'meaning', null)]); + }); + + it('should extract from elements with the i18n attr without a meaning', () => { + let res = extractor.extract("
message
", "someurl"); + expect(res.messages).toEqual([new Message("message", null, null)]); + }); + + it('should extract from attributes', () => { + let res = extractor.extract(` +
+
+ `, + "someurl"); + + expect(res.messages) + .toEqual([ + new Message("message1", "meaning1", "desc1"), + new Message("message2", "meaning2", "desc2") + ]); + }); + + it('should error on i18n attributes without matching "real" attributes', () => { + let res = extractor.extract(` +
+
+ `, + "someurl"); + + expect(res.errors.length).toEqual(1); + expect(res.errors[0].msg).toEqual("Missing attribute 'title2'."); + }); + + it('should extract from partitions', () => { + let res = extractor.extract(` + message1 + message2`, + "someUrl"); + + expect(res.messages) + .toEqual([ + new Message("message1", "meaning1", "desc1"), + new Message("message2", "meaning2", "desc2") + ]); + }); + + it('should ignore other comments', () => { + let res = extractor.extract(` + message1`, + "someUrl"); + + expect(res.messages).toEqual([new Message("message1", "meaning1", "desc1")]); + }); + + it('should error when cannot find a matching desc', () => { + let res = extractor.extract(` + message1`, + "someUrl"); + + expect(res.errors.length).toEqual(1); + expect(res.errors[0].msg).toEqual("Missing closing 'i18n' comment."); + }); + + it('should replace interpolation with placeholders (text nodes)', () => { + let res = extractor.extract("
Hi {{one}} and {{two}}
", "someurl"); + expect(res.messages).toEqual([new Message("Hi {{I0}} and {{I1}}", null, null)]); + }); + + it('should replace interpolation with placeholders (attributes)', () => { + let res = + extractor.extract("
", "someurl"); + expect(res.messages).toEqual([new Message("Hi {{I0}} and {{I1}}", null, null)]); + }); + + it('should ignore errors in interpolation', () => { + let res = extractor.extract("
Hi {{on???.s}}
", "someurl"); + expect(res.messages).toEqual([new Message("Hi {{on???.s}}", null, null)]); + }); + + it("should return parse errors when the template is invalid", () => { + let res = extractor.extract(" { + let res = extractor.extract('
message
', "someurl"); + expect(res.messages).toEqual([new Message('
message
', null, null)]); + }); + + it("should extract from nested elements", () => { + let res = extractor.extract( + '
message2
', + "someurl"); + expect(res.messages) + .toEqual([ + new Message("message2", "meaning2", "desc2"), + new Message("message1", "meaning1", "desc1") + ]); + }); + + it("should extract messages from attributes in i18n blocks", () => { + let res = extractor.extract( + '
message
', "someurl"); + expect(res.messages) + .toEqual([ + new Message('
message
', null, null), + new Message('value', "meaning", "desc") + ]); + }); + + it("should remove duplicate messages", () => { + let res = extractor.extract(` + message + message`, + "someUrl"); + + expect(removeDuplicates(res.messages)) + .toEqual([ + new Message("message", "meaning", "desc1"), + ]); + }); + }); +}