From 095db673c50ba1457a6bfb63e8cc2b993106ce03 Mon Sep 17 00:00:00 2001
From: vsavkin <avix1000@gmail.com>
Date: Mon, 14 Mar 2016 10:51:23 -0700
Subject: [PATCH] feat(i18n): implement a simple version of message extractor

Closes #7454
---
 modules/angular2/src/compiler/html_parser.ts  |   2 +-
 .../angular2/src/i18n/message_extractor.ts    | 261 ++++++++++++++++++
 .../test/i18n/message_extractor_spec.ts       | 162 +++++++++++
 3 files changed, 424 insertions(+), 1 deletion(-)
 create mode 100644 modules/angular2/src/i18n/message_extractor.ts
 create mode 100644 modules/angular2/test/i18n/message_extractor_spec.ts

diff --git a/modules/angular2/src/compiler/html_parser.ts b/modules/angular2/src/compiler/html_parser.ts
index bdce900fb6..d1eb270c84 100644
--- a/modules/angular2/src/compiler/html_parser.ts
+++ b/modules/angular2/src/compiler/html_parser.ts
@@ -102,7 +102,7 @@ class TreeBuilder {
     var text = this._advanceIf(HtmlTokenType.RAW_TEXT);
     this._advanceIf(HtmlTokenType.COMMENT_END);
     var value = isPresent(text) ? text.parts[0].trim() : null;
-    this._addToParent(new HtmlCommentAst(value, token.sourceSpan))
+    this._addToParent(new HtmlCommentAst(value, token.sourceSpan));
   }
 
   private _consumeText(token: HtmlToken) {
diff --git a/modules/angular2/src/i18n/message_extractor.ts b/modules/angular2/src/i18n/message_extractor.ts
new file mode 100644
index 0000000000..9fdf0d2b7e
--- /dev/null
+++ b/modules/angular2/src/i18n/message_extractor.ts
@@ -0,0 +1,261 @@
+import {HtmlParser} from 'angular2/src/compiler/html_parser';
+import {ParseSourceSpan, ParseError} from 'angular2/src/compiler/parse_util';
+import {
+  HtmlAst,
+  HtmlAstVisitor,
+  HtmlElementAst,
+  HtmlAttrAst,
+  HtmlTextAst,
+  HtmlCommentAst,
+  htmlVisitAll
+} from 'angular2/src/compiler/html_ast';
+import {isPresent, isBlank} from 'angular2/src/facade/lang';
+import {StringMapWrapper} from 'angular2/src/facade/collection';
+import {Parser} from 'angular2/src/core/change_detection/parser/parser';
+import {Interpolation} from 'angular2/src/core/change_detection/parser/ast';
+
+const I18N_ATTR = "i18n";
+const I18N_ATTR_PREFIX = "i18n-";
+
+/**
+ * A message extracted from a template.
+ *
+ * The identity of a message is comprised of `content` and `meaning`.
+ *
+ * `description` is additional information provided to the translator.
+ */
+export class Message {
+  constructor(public content: string, public meaning: string, public description: string) {}
+}
+
+/**
+ * All messages extracted from a template.
+ */
+export class ExtractionResult {
+  constructor(public messages: Message[], public errors: ParseError[]) {}
+}
+
+/**
+ * An extraction error.
+ */
+export class I18nExtractionError extends ParseError {
+  constructor(span: ParseSourceSpan, msg: string) { super(span, msg); }
+}
+
+/**
+ * Removes duplicate messages.
+ *
+ * E.g.
+ *
+ * ```
+ *  var m = [new Message("message", "meaning", "desc1"), new Message("message", "meaning",
+ * "desc2")];
+ *  expect(removeDuplicates(m)).toEqual([new Message("message", "meaning", "desc1")]);
+ * ```
+ */
+export function removeDuplicates(messages: Message[]): Message[] {
+  let uniq: {[key: string]: Message} = {};
+  messages.forEach(m => {
+    let key = `$ng__${m.meaning}__|${m.content}`;
+    if (!StringMapWrapper.contains(uniq, key)) {
+      uniq[key] = m;
+    }
+  });
+  return StringMapWrapper.values(uniq);
+}
+
+/**
+ * Extracts all messages from a template.
+ *
+ * It works like this. First, the extractor uses the provided html parser to get
+ * the html AST of the template. Then it partitions the root nodes into parts.
+ * Everything between two i18n comments becomes a single part. Every other nodes becomes
+ * a part too.
+ *
+ * We process every part as follows. Say we have a part A.
+ *
+ * If the part has the i18n attribute, it gets converted into a message.
+ * And we do not recurse into that part, except to extract messages from the attributes.
+ *
+ * If the part doesn't have the i18n attribute, we recurse into that part and
+ * partition its children.
+ *
+ * While walking the AST we also remove i18n attributes from messages.
+ */
+export class MessageExtractor {
+  messages: Message[];
+  errors: ParseError[];
+
+  constructor(private _htmlParser: HtmlParser, private _parser: Parser) {}
+
+  extract(template: string, sourceUrl: string): ExtractionResult {
+    this.messages = [];
+    this.errors = [];
+
+    let res = this._htmlParser.parse(template, sourceUrl);
+    if (res.errors.length > 0) {
+      return new ExtractionResult([], res.errors);
+    } else {
+      let ps = this._partition(res.rootNodes);
+      ps.forEach(p => this._extractMessagesFromPart(p));
+      return new ExtractionResult(this.messages, this.errors);
+    }
+  }
+
+  private _extractMessagesFromPart(p: _Part): void {
+    if (p.hasI18n) {
+      this.messages.push(new Message(_stringifyNodes(p.children, this._parser), _meaning(p.i18n),
+                                     _description(p.i18n)));
+      this._recurseToExtractMessagesFromAttributes(p.children);
+    } else {
+      this._recurse(p.children);
+    }
+
+    if (isPresent(p.rootElement)) {
+      this._extractMessagesFromAttributes(p.rootElement);
+    }
+  }
+
+  private _recurse(nodes: HtmlAst[]): void {
+    let ps = this._partition(nodes);
+    ps.forEach(p => this._extractMessagesFromPart(p));
+  }
+
+  private _recurseToExtractMessagesFromAttributes(nodes: HtmlAst[]): void {
+    nodes.forEach(n => {
+      if (n instanceof HtmlElementAst) {
+        this._extractMessagesFromAttributes(n);
+        this._recurseToExtractMessagesFromAttributes(n.children);
+      }
+    });
+  }
+
+  private _extractMessagesFromAttributes(p: HtmlElementAst): void {
+    p.attrs.forEach(attr => {
+      if (attr.name.startsWith(I18N_ATTR_PREFIX)) {
+        let expectedName = attr.name.substring(5);
+        let matching = p.attrs.filter(a => a.name == expectedName);
+
+        if (matching.length > 0) {
+          let value = _removeInterpolation(matching[0].value, p.sourceSpan, this._parser);
+          this.messages.push(new Message(value, _meaning(attr.value), _description(attr.value)));
+        } else {
+          this.errors.push(
+              new I18nExtractionError(p.sourceSpan, `Missing attribute '${expectedName}'.`));
+        }
+      }
+    });
+  }
+
+  // Man, this is so ugly!
+  private _partition(nodes: HtmlAst[]): _Part[] {
+    let res = [];
+
+    for (let i = 0; i < nodes.length; ++i) {
+      let n = nodes[i];
+      let temp = [];
+      if (_isOpeningComment(n)) {
+        let i18n = (<HtmlCommentAst>n).value.substring(5).trim();
+        i++;
+        while (!_isClosingComment(nodes[i])) {
+          temp.push(nodes[i++]);
+          if (i === nodes.length) {
+            this.errors.push(
+                new I18nExtractionError(n.sourceSpan, "Missing closing 'i18n' comment."));
+            break;
+          }
+        }
+        res.push(new _Part(null, temp, i18n, true));
+
+      } else if (n instanceof HtmlElementAst) {
+        let i18n = _findI18nAttr(n);
+        res.push(new _Part(n, n.children, isPresent(i18n) ? i18n.value : null, isPresent(i18n)));
+      }
+    }
+
+    return res;
+  }
+}
+
+class _Part {
+  constructor(public rootElement: HtmlElementAst, public children: HtmlAst[], public i18n: string,
+              public hasI18n: boolean) {}
+}
+
+function _isOpeningComment(n: HtmlAst): boolean {
+  return n instanceof HtmlCommentAst && isPresent(n.value) && n.value.startsWith("i18n:");
+}
+
+function _isClosingComment(n: HtmlAst): boolean {
+  return n instanceof HtmlCommentAst && isPresent(n.value) && n.value == "/i18n";
+}
+
+function _stringifyNodes(nodes: HtmlAst[], parser: Parser) {
+  let visitor = new _StringifyVisitor(parser);
+  return htmlVisitAll(visitor, nodes).join("");
+}
+
+class _StringifyVisitor implements HtmlAstVisitor {
+  constructor(private _parser: Parser) {}
+
+  visitElement(ast: HtmlElementAst, context: any): any {
+    let attrs = this._join(htmlVisitAll(this, ast.attrs), " ");
+    let children = this._join(htmlVisitAll(this, ast.children), "");
+    return `<${ast.name} ${attrs}>${children}</${ast.name}>`;
+  }
+
+  visitAttr(ast: HtmlAttrAst, context: any): any {
+    if (ast.name.startsWith(I18N_ATTR_PREFIX)) {
+      return "";
+    } else {
+      return `${ast.name}="${ast.value}"`;
+    }
+  }
+
+  visitText(ast: HtmlTextAst, context: any): any {
+    return _removeInterpolation(ast.value, ast.sourceSpan, this._parser);
+  }
+
+  visitComment(ast: HtmlCommentAst, context: any): any { return ""; }
+
+  private _join(strs: string[], str: string): string {
+    return strs.filter(s => s.length > 0).join(str);
+  }
+}
+
+function _removeInterpolation(value: string, source: ParseSourceSpan, parser: Parser): string {
+  try {
+    let parsed = parser.parseInterpolation(value, source.toString());
+    if (isPresent(parsed)) {
+      let ast: Interpolation = <any>parsed.ast;
+      let res = "";
+      for (let i = 0; i < ast.strings.length; ++i) {
+        res += ast.strings[i];
+        if (i != ast.strings.length - 1) {
+          res += `{{I${i}}}`;
+        }
+      }
+      return res;
+    } else {
+      return value;
+    }
+  } catch (e) {
+    return value;
+  }
+}
+
+function _findI18nAttr(p: HtmlElementAst): HtmlAttrAst {
+  let i18n = p.attrs.filter(a => a.name == I18N_ATTR);
+  return i18n.length == 0 ? null : i18n[0];
+}
+
+function _meaning(i18n: string): string {
+  if (isBlank(i18n) || i18n == "") return null;
+  return i18n.split("|")[0];
+}
+
+function _description(i18n: string): string {
+  if (isBlank(i18n) || i18n == "") return null;
+  let parts = i18n.split("|");
+  return parts.length > 1 ? parts[1] : null;
+}
\ No newline at end of file
diff --git a/modules/angular2/test/i18n/message_extractor_spec.ts b/modules/angular2/test/i18n/message_extractor_spec.ts
new file mode 100644
index 0000000000..ab15741ce0
--- /dev/null
+++ b/modules/angular2/test/i18n/message_extractor_spec.ts
@@ -0,0 +1,162 @@
+import {
+  AsyncTestCompleter,
+  beforeEach,
+  ddescribe,
+  describe,
+  expect,
+  iit,
+  inject,
+  it,
+  xdescribe,
+  xit
+} from 'angular2/testing_internal';
+
+import {HtmlParser} from 'angular2/src/compiler/html_parser';
+import {MessageExtractor, Message, removeDuplicates} from 'angular2/src/i18n/message_extractor';
+import {Parser} from 'angular2/src/core/change_detection/parser/parser';
+import {Lexer} from 'angular2/src/core/change_detection/parser/lexer';
+
+export function main() {
+  describe('MessageExtractor', () => {
+    let extractor: MessageExtractor;
+
+    beforeEach(() => {
+      let htmlParser = new HtmlParser();
+      var parser = new Parser(new Lexer());
+      extractor = new MessageExtractor(htmlParser, parser);
+    });
+
+    it('should extract from elements with the i18n attr', () => {
+      let res = extractor.extract("<div i18n='meaning|desc'>message</div>", "someurl");
+      expect(res.messages).toEqual([new Message("message", 'meaning', 'desc')]);
+    });
+
+    it('should extract from elements with the i18n attr without a desc', () => {
+      let res = extractor.extract("<div i18n='meaning'>message</div>", "someurl");
+      expect(res.messages).toEqual([new Message("message", 'meaning', null)]);
+    });
+
+    it('should extract from elements with the i18n attr without a meaning', () => {
+      let res = extractor.extract("<div i18n>message</div>", "someurl");
+      expect(res.messages).toEqual([new Message("message", null, null)]);
+    });
+
+    it('should extract from attributes', () => {
+      let res = extractor.extract(`
+        <div
+          title1='message1' i18n-title1='meaning1|desc1'
+          title2='message2' i18n-title2='meaning2|desc2'>
+        </div>
+      `,
+                                  "someurl");
+
+      expect(res.messages)
+          .toEqual([
+            new Message("message1", "meaning1", "desc1"),
+            new Message("message2", "meaning2", "desc2")
+          ]);
+    });
+
+    it('should error on i18n attributes without matching "real" attributes', () => {
+      let res = extractor.extract(`
+        <div
+          title1='message1' i18n-title1='meaning1|desc1' i18n-title2='meaning2|desc2'>
+        </div>
+      `,
+                                  "someurl");
+
+      expect(res.errors.length).toEqual(1);
+      expect(res.errors[0].msg).toEqual("Missing attribute 'title2'.");
+    });
+
+    it('should extract from partitions', () => {
+      let res = extractor.extract(`
+         <!-- i18n: meaning1|desc1 -->message1<!-- /i18n -->
+         <!-- i18n: meaning2|desc2 -->message2<!-- /i18n -->`,
+                                  "someUrl");
+
+      expect(res.messages)
+          .toEqual([
+            new Message("message1", "meaning1", "desc1"),
+            new Message("message2", "meaning2", "desc2")
+          ]);
+    });
+
+    it('should ignore other comments', () => {
+      let res = extractor.extract(`
+         <!-- i18n: meaning1|desc1 --><!-- other -->message1<!-- /i18n -->`,
+                                  "someUrl");
+
+      expect(res.messages).toEqual([new Message("message1", "meaning1", "desc1")]);
+    });
+
+    it('should error when cannot find a matching desc', () => {
+      let res = extractor.extract(`
+         <!-- i18n: meaning1|desc1 -->message1`,
+                                  "someUrl");
+
+      expect(res.errors.length).toEqual(1);
+      expect(res.errors[0].msg).toEqual("Missing closing 'i18n' comment.");
+    });
+
+    it('should replace interpolation with placeholders (text nodes)', () => {
+      let res = extractor.extract("<div i18n>Hi {{one}} and {{two}}</div>", "someurl");
+      expect(res.messages).toEqual([new Message("Hi {{I0}} and {{I1}}", null, null)]);
+    });
+
+    it('should replace interpolation with placeholders (attributes)', () => {
+      let res =
+          extractor.extract("<div title='Hi {{one}} and {{two}}' i18n-title></div>", "someurl");
+      expect(res.messages).toEqual([new Message("Hi {{I0}} and {{I1}}", null, null)]);
+    });
+
+    it('should ignore errors in interpolation', () => {
+      let res = extractor.extract("<div i18n>Hi {{on???.s}}</div>", "someurl");
+      expect(res.messages).toEqual([new Message("Hi {{on???.s}}", null, null)]);
+    });
+
+    it("should return parse errors when the template is invalid", () => {
+      let res = extractor.extract("<input&#Besfs", "someurl");
+      expect(res.errors.length).toEqual(1);
+      expect(res.errors[0].msg).toEqual('Unexpected character "s"');
+    });
+
+    it("should handle html content", () => {
+      let res = extractor.extract('<div i18n><div attr="value">message</div></div>', "someurl");
+      expect(res.messages).toEqual([new Message('<div attr="value">message</div>', null, null)]);
+    });
+
+    it("should extract from nested elements", () => {
+      let res = extractor.extract(
+          '<div title="message1" i18n-title="meaning1|desc1"><div i18n="meaning2|desc2">message2</div></div>',
+          "someurl");
+      expect(res.messages)
+          .toEqual([
+            new Message("message2", "meaning2", "desc2"),
+            new Message("message1", "meaning1", "desc1")
+          ]);
+    });
+
+    it("should extract messages from attributes in i18n blocks", () => {
+      let res = extractor.extract(
+          '<div i18n><div attr="value" i18n-attr="meaning|desc">message</div></div>', "someurl");
+      expect(res.messages)
+          .toEqual([
+            new Message('<div attr="value">message</div>', null, null),
+            new Message('value', "meaning", "desc")
+          ]);
+    });
+
+    it("should remove duplicate messages", () => {
+      let res = extractor.extract(`
+         <!-- i18n: meaning|desc1 -->message<!-- /i18n -->
+         <!-- i18n: meaning|desc2 -->message<!-- /i18n -->`,
+                                  "someUrl");
+
+      expect(removeDuplicates(res.messages))
+          .toEqual([
+            new Message("message", "meaning", "desc1"),
+          ]);
+    });
+  });
+}