refactor: extract checker functions

2018-03-03 08:13:52 +08:00 · 2018-03-03 08:13:52 +08:00 · 19f4ad46ae
commit 19f4ad46ae
parent c2f0045a5e
14 changed files with 107464 additions and 9612 deletions
--- a/aio/content/guide/architecture.md
+++ b/aio/content/guide/architecture.md
@ -184,6 +184,7 @@ JavaScript 中，每个_文件_是一个模块，文件中定义的所有对象


 <a href="http://exploringjs.com/es6/ch_modules.html" >Learn more about the JavaScript module system on the web.</a>
+
 <a href="http://exploringjs.com/es6/ch_modules.html" target="_blank">学习更多关于 JavaScript 模块的知识。</a>
 </div>

@ -245,6 +246,7 @@ Hang in there. The confusion yields to clarity with time and experience.


 Learn more from the [NgModules](guide/ngmodules) page.
+
 更多信息，见 [Angular 模块](guide/ngmodule)。

 </div>
--- a/aio/content/guide/npm-packages.md
+++ b/aio/content/guide/npm-packages.md
@ -19,6 +19,7 @@ The Angular CLI uses `yarn` by default to install npm packages when you create a
 <div class="l-sub-section">

 Node.js and npm are essential to Angular development. 
+
 Node.js和npm是做Angular开发的基础。

 [Get them now](https://docs.npmjs.com/getting-started/installing-node "Installing Node.js and updating npm")
--- a/aio/content/guide/reactive-forms.md
+++ b/aio/content/guide/reactive-forms.md
@ -1148,8 +1148,10 @@ such as one of the following:

      `true` if the control user has not yet entered the HTML control
       and triggered its blur event. Its opposite is `myControl.touched`.
-                         如果用户尚未进入这个HTML控件，也没有触发过它的`blur`（失去焦点）事件，则为`true`。
+       
+      如果用户尚未进入这个HTML控件，也没有触发过它的`blur`（失去焦点）事件，则为`true`。
      它是`myControl.touched`的反义词。
+      
    </td>

  </tr>
--- a/aio/tools/translator/checker.spec.ts
+++ b/aio/tools/translator/checker.spec.ts
@ -1,7 +1,16 @@
 import { expect } from 'chai';
-import { DictEntry } from './dict-entry';
 import { dirs } from './dirs';
-import { gatherFromMarkdownFiles, isTranslation } from './extractor';
+import { gatherFromMarkdownFiles } from './extractor';
+import {
+  isHead,
+  isNotCheatSheet,
+  isNotCnPages,
+  isNotImg,
+  isNotMarketingDocs,
+  originalIsNotChinese,
+  originalIsNotTag,
+  translationHasNotCodeExample,
+} from './utils';

 describe('自动检查翻译结果', function () {
  const entries = gatherFromMarkdownFiles(dirs.content)
@ -10,12 +19,12 @@ describe('自动检查翻译结果', function () {
    .filter(isNotCnPages);

  it('译文里不应该出现 <code-example>', function () {
-    const codeExamples = entries.filter(entry => entry.translation.indexOf('<code-example') !== -1);
+    const codeExamples = entries.filter(translationHasNotCodeExample);
    expect(codeExamples).eql([]);
  });

  it('原文中不应该有汉语', function () {
-    const lines = entries.filter(entry => isTranslation(entry.original))
+    const lines = entries.filter(originalIsNotChinese)
      .filter(isNotImg);
    expect(lines).eql([]);
  });
@ -33,27 +42,7 @@ describe('自动检查翻译结果', function () {
  });

  it('原文不应该是以 <div 开头的', function () {
-    const lines = entries.filter(entry => /^ *<div.*/.test(entry.original));
+    const lines = entries.filter(originalIsNotTag);
    expect(lines).eql([]);
  });
 });
-
-function isNotImg(entry: DictEntry): boolean {
-  return !/^<(img|figure)/.test(entry.translation);
-}
-
-function isNotCheatSheet(entry: DictEntry): boolean {
-  return !/cheatsheet.md$/.test(entry.sourceFile);
-}
-
-function isNotMarketingDocs(entry: DictEntry): boolean {
-  return !/marketing\/docs.md$/.test(entry.sourceFile);
-}
-
-function isNotCnPages(entry: DictEntry): boolean {
-  return !/cn\/.*?.md$/.test(entry.sourceFile);
-}
-
-function isHead(line: string): boolean {
-  return /^#/.test(line);
-}
--- a/aio/tools/translator/dict-1.json
+++ b/aio/tools/translator/dict-1.json
--- a/aio/tools/translator/dict-2.json
+++ b/aio/tools/translator/dict-2.json
--- a/aio/tools/translator/dict-3.json
+++ b/aio/tools/translator/dict-3.json
--- a/aio/tools/translator/dict-entry.ts
+++ b/aio/tools/translator/dict-entry.ts
@ -1,5 +1,5 @@
 export class DictEntry {
  original: string;
  translation: string;
-  sourceFile: string;
+  sourceFile?: string;
 }
--- a/aio/tools/translator/dict-final.json
+++ b/aio/tools/translator/dict-final.json
--- a/aio/tools/translator/extractor.spec.ts
+++ b/aio/tools/translator/extractor.spec.ts
@ -1,4 +1,5 @@
 import { expect } from 'chai';
+import { DictEntry } from './dict-entry';
 import { dirs } from './dirs';
 import { gatherFromMarkdownFiles, gatherTranslations, listMarkdownFiles, splitAndTrim } from './extractor';

@ -42,12 +43,17 @@ describe('从对照翻译文件中采集生成字典', () => {
  });

  it('从对照文本的文件夹中采集生成字典（非测试）', () => {
-    const entries = gatherFromMarkdownFiles(dirs.content);
-    const dict = JSON.stringify(entries, null, 2);
-    const fs = require('fs');
-    fs.writeFileSync(dirs.here + 'dict-3.json', dict, 'utf-8');
-    expect(entries.length).greaterThan(100);
+    gatherFromDirectory(dirs.aio + '../../content-1/', dirs.here + 'dict-1.json');
+    gatherFromDirectory(dirs.aio + '../../content-2/', dirs.here + 'dict-2.json');
+    gatherFromDirectory(dirs.aio + '../../content-3/', dirs.here + 'dict-3.json');
  });

 });

+function gatherFromDirectory(directory: string, dictFile: string): DictEntry[] {
+  const entries = gatherFromMarkdownFiles(directory);
+  const dict = JSON.stringify(entries, null, 2);
+  const fs = require('fs');
+  fs.writeFileSync(dictFile, dict, 'utf-8');
+  return entries;
+}
--- a/aio/tools/translator/extractor.ts
+++ b/aio/tools/translator/extractor.ts
@ -1,27 +1,43 @@
 import * as globby from 'globby';
 import { DictEntry } from './dict-entry';
+import { normalizeLines } from './translate';
+import {
+  isNotCnPages,
+  originalIsNotChinese,
+  originalIsNotTag,
+  originalIsOnlyTag,
+  translationHasNotCodeExample,
+} from './utils';

 export function splitAndTrim(text = ''): string[] {
  return text.split(/\n+\s*\n+/).map(line => line.trim()).filter(line => !!line);
 }

+// tslint:disable:max-line-length
+const pattern = /[\u2E80-\u2EFF\u2F00-\u2FDF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\u3400-\u4DBF\u4DC0-\u4DFF\u4E00-\u9FBF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]/;
+
 export function isTranslation(text) {
-  return text &&
-    /[\u2E80-\u2EFF\u2F00-\u2FDF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\u3400-\u4DBF\u4DC0-\u4DFF\u4E00-\u9FBF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]/.test(text);
+  return text && pattern.test(text);
 }

 export function gatherTranslations(text: string): DictEntry[] {
-  const lines = splitAndTrim(text);
+  const lines = splitAndTrim(normalizeLines(text));

  const result = [];
  for (let i = 1; i < lines.length; ++i) {
-    const translation = lines[i];
-    const original = lines[i - 1];
+    const translation = purifyText(lines[i]);
+    const original = purifyText(lines[i - 1]);
    if (isTranslation(translation)) {
      result.push({original, translation});
    }
  }
-  return result;
+  return result
+    .filter(isNotCnPages)
+    .filter(translationHasNotCodeExample)
+    .filter(originalIsNotChinese)
+    .filter(originalIsNotTag)
+    .filter(originalIsOnlyTag)
+    .map(purifyEntry);
 }

 export function listMarkdownFiles(directory: string): string[] {
@ -42,6 +58,20 @@ export function gatherFromMarkdownFiles(directory: string): DictEntry[] {
  return entries.reduce((result, value) => result.concat(value), []);
 }

+export function purifyText(text): string {
+  return text
+    .replace(/^<(\w+)[\s\S]*?>([\s\S]*)<\/\1>$/, '$2')
+    .replace(/^(.*)<code-example .*$/, '$1')
+    .trim();
+}
+
+export function purifyEntry(entry: DictEntry): DictEntry {
+  return {
+    original: purifyText(entry.original),
+    translation: purifyText(entry.translation),
+  };
+}
+
 const contentDirectory = process.argv[2];

 gatherFromMarkdownFiles(contentDirectory);
--- a/aio/tools/translator/translate.spec.ts
+++ b/aio/tools/translator/translate.spec.ts
@ -1,13 +1,9 @@
 import { expect } from 'chai';
 import { dirs } from './dirs';
-import { dict, kernelText, lookup, normalizeLines, translate } from './translate';
+import { kernelText, lookup, normalizeLines, translate } from './translate';


 describe('根据字典进行翻译', () => {
-  it('忽略明显错误的条目', function () {
-    expect(dict.filter(entry => /^<div/.test(entry.original))).eql([]);
-  });
-
  it('抽取核心字符', function () {
    expect(kernelText(' # Forms   ABC ')).eql('# Forms ABC');
  });
@ -21,6 +17,22 @@ describe('根据字典进行翻译', () => {
    expect(lines).eql('1. abc\n\n11. def\n');
  });

+  it('把 html tag 拆解开', function () {
+    const lines = normalizeLines(`
+  <header>
+    Angular forms don't require a style library
+  </header>
+`);
+    expect(lines).eq(`
+
+  <header>
+
+    Angular forms don't require a style library
+
+  </header>
+
+`);
+  });
  it('自动根据字典翻译单个文件', function () {
    const fs = require('fs');
    const content = fs.readFileSync(__dirname + '/../../../../content-en/' + 'guide/forms.md', 'utf-8');
--- a/aio/tools/translator/translate.ts
+++ b/aio/tools/translator/translate.ts
@ -1,14 +1,10 @@
 import * as _ from 'lodash';
 import { DictEntry } from './dict-entry';

-const dict1 = require('./dict-1.json') as DictEntry[];
-const dict2 = require('./dict-2.json') as DictEntry[];
-const dict3 = require('./dict-3.json') as DictEntry[];
-export const dict = dict1.concat(dict2).concat(dict3)
-  .filter(entry => !/^\s*<div/.test(entry.original));
+export const dict = require('./dict-3.json') as DictEntry[];

 export function lookup(english: string, filename: RegExp = /.*/): DictEntry[] {
-  let entries = dict3
+  let entries = dict
    .filter(entry => filename.test(entry.sourceFile))
    .filter(entry => kernelText(entry.original) === kernelText(english));
  return _.uniqBy(entries, 'translation');
@ -61,5 +57,9 @@ function repeat(indent: number): string {
 }

 export function normalizeLines(text: string): string {
-  return text.replace(/(?=\n *(\d+\.|-|\*|#|<) )\n/g, '\n\n');
+  // 列表、标题等自带换行含义的markdown
+  const blockElementPattern = /(?=\n *(\d+\.|-|\*|#|<) )\n/g;
+  const htmlTagPattern = /\n(\s*<.*?>\s*)\n/g;
+  return text.replace(blockElementPattern, '\n\n')
+    .replace(htmlTagPattern, '\n\n$1\n\n');
 }
--- a/aio/tools/translator/utils.ts
+++ b/aio/tools/translator/utils.ts
@ -0,0 +1,38 @@
+import { DictEntry } from './dict-entry';
+import { isTranslation } from './extractor';
+
+export function translationHasNotCodeExample(entry: DictEntry): boolean {
+  return entry.translation.indexOf('<code-example') === -1;
+}
+
+export function originalIsNotChinese(entry: DictEntry): boolean {
+  return !isTranslation(entry.original);
+}
+
+export function originalIsNotTag(entry: DictEntry): boolean {
+  return !/^\s*<div.*/.test(entry.original);
+}
+
+export function originalIsOnlyTag(entry: DictEntry): boolean {
+  return !/^\s*<\w+>\s*$/.test(entry.original);
+}
+
+export function isNotImg(entry: DictEntry): boolean {
+  return !/^<(img|figure)/.test(entry.translation);
+}
+
+export function isNotCheatSheet(entry: DictEntry): boolean {
+  return !/cheatsheet.md$/.test(entry.sourceFile);
+}
+
+export function isNotMarketingDocs(entry: DictEntry): boolean {
+  return !/marketing\/docs.md$/.test(entry.sourceFile);
+}
+
+export function isNotCnPages(entry: DictEntry): boolean {
+  return !/cn\/.*?.md$/.test(entry.sourceFile);
+}
+
+export function isHead(line: string): boolean {
+  return /^#/.test(line);
+}