refactor: extract checker functions

2018-03-03 08:13:52 +08:00 · 2018-03-03 08:13:52 +08:00 · 19f4ad46ae
commit 19f4ad46ae
parent c2f0045a5e
14 changed files with 107464 additions and 9612 deletions
--- a/aio/content/guide/architecture.md
+++ b/aio/content/guide/architecture.md
@ -184,6 +184,7 @@ JavaScript 中，每个_文件_是一个模块，文件中定义的所有对象
 <a href="http://exploringjs.com/es6/ch_modules.html" >Learn more about the JavaScript module system on the web.</a>
 <a href="http://exploringjs.com/es6/ch_modules.html" target="_blank">学习更多关于 JavaScript 模块的知识。</a>
 </div>
@ -245,6 +246,7 @@ Hang in there. The confusion yields to clarity with time and experience.
 Learn more from the [NgModules](guide/ngmodules) page.
 更多信息，见 [Angular 模块](guide/ngmodule)。
 </div>
--- a/aio/content/guide/npm-packages.md
+++ b/aio/content/guide/npm-packages.md
@ -19,6 +19,7 @@ The Angular CLI uses `yarn` by default to install npm packages when you create a
 <div class="l-sub-section">
 Node.js and npm are essential to Angular development. 
 Node.js和npm是做Angular开发的基础。
 [Get them now](https://docs.npmjs.com/getting-started/installing-node "Installing Node.js and updating npm")
--- a/aio/content/guide/reactive-forms.md
+++ b/aio/content/guide/reactive-forms.md
@ -1148,8 +1148,10 @@ such as one of the following:
      `true` if the control user has not yet entered the HTML control
       and triggered its blur event. Its opposite is `myControl.touched`.
-                         如果用户尚未进入这个HTML控件，也没有触发过它的`blur`（失去焦点）事件，则为`true`。
+       
      如果用户尚未进入这个HTML控件，也没有触发过它的`blur`（失去焦点）事件，则为`true`。
      它是`myControl.touched`的反义词。
    </td>
  </tr>
--- a/aio/tools/translator/checker.spec.ts
+++ b/aio/tools/translator/checker.spec.ts
@ -1,7 +1,16 @@
 import { expect } from 'chai';
 import { DictEntry } from './dict-entry';
 import { dirs } from './dirs';
-import { gatherFromMarkdownFiles, isTranslation } from './extractor';
+import { gatherFromMarkdownFiles } from './extractor';
 import {
  isHead,
  isNotCheatSheet,
  isNotCnPages,
  isNotImg,
  isNotMarketingDocs,
  originalIsNotChinese,
  originalIsNotTag,
  translationHasNotCodeExample,
 } from './utils';
 describe('自动检查翻译结果', function () {
  const entries = gatherFromMarkdownFiles(dirs.content)
@ -10,12 +19,12 @@ describe('自动检查翻译结果', function () {
    .filter(isNotCnPages);
  it('译文里不应该出现 <code-example>', function () {
-    const codeExamples = entries.filter(entry => entry.translation.indexOf('<code-example') !== -1);
+    const codeExamples = entries.filter(translationHasNotCodeExample);
    expect(codeExamples).eql([]);
  });
  it('原文中不应该有汉语', function () {
-    const lines = entries.filter(entry => isTranslation(entry.original))
+    const lines = entries.filter(originalIsNotChinese)
      .filter(isNotImg);
    expect(lines).eql([]);
  });
@ -33,27 +42,7 @@ describe('自动检查翻译结果', function () {
  });
  it('原文不应该是以 <div 开头的', function () {
-    const lines = entries.filter(entry => /^ *<div.*/.test(entry.original));
+    const lines = entries.filter(originalIsNotTag);
    expect(lines).eql([]);
  });
 });
 function isNotImg(entry: DictEntry): boolean {
  return !/^<(img|figure)/.test(entry.translation);
 }
 function isNotCheatSheet(entry: DictEntry): boolean {
  return !/cheatsheet.md$/.test(entry.sourceFile);
 }
 function isNotMarketingDocs(entry: DictEntry): boolean {
  return !/marketing\/docs.md$/.test(entry.sourceFile);
 }
 function isNotCnPages(entry: DictEntry): boolean {
  return !/cn\/.*?.md$/.test(entry.sourceFile);
 }
 function isHead(line: string): boolean {
  return /^#/.test(line);
 }
--- a/aio/tools/translator/dict-1.json
+++ b/aio/tools/translator/dict-1.json
--- a/aio/tools/translator/dict-2.json
+++ b/aio/tools/translator/dict-2.json
--- a/aio/tools/translator/dict-3.json
+++ b/aio/tools/translator/dict-3.json
--- a/aio/tools/translator/dict-entry.ts
+++ b/aio/tools/translator/dict-entry.ts
@ -1,5 +1,5 @@
 export class DictEntry {
  original: string;
  translation: string;
-  sourceFile: string;
+  sourceFile?: string;
 }
--- a/aio/tools/translator/dict-final.json
+++ b/aio/tools/translator/dict-final.json
--- a/aio/tools/translator/extractor.spec.ts
+++ b/aio/tools/translator/extractor.spec.ts
@ -1,4 +1,5 @@
 import { expect } from 'chai';
 import { DictEntry } from './dict-entry';
 import { dirs } from './dirs';
 import { gatherFromMarkdownFiles, gatherTranslations, listMarkdownFiles, splitAndTrim } from './extractor';
@ -42,12 +43,17 @@ describe('从对照翻译文件中采集生成字典', () => {
  });
  it('从对照文本的文件夹中采集生成字典（非测试）', () => {
-    const entries = gatherFromMarkdownFiles(dirs.content);
+    gatherFromDirectory(dirs.aio + '../../content-1/', dirs.here + 'dict-1.json');
-    const dict = JSON.stringify(entries, null, 2);
+    gatherFromDirectory(dirs.aio + '../../content-2/', dirs.here + 'dict-2.json');
-    const fs = require('fs');
+    gatherFromDirectory(dirs.aio + '../../content-3/', dirs.here + 'dict-3.json');
    fs.writeFileSync(dirs.here + 'dict-3.json', dict, 'utf-8');
    expect(entries.length).greaterThan(100);
  });
 });
 function gatherFromDirectory(directory: string, dictFile: string): DictEntry[] {
  const entries = gatherFromMarkdownFiles(directory);
  const dict = JSON.stringify(entries, null, 2);
  const fs = require('fs');
  fs.writeFileSync(dictFile, dict, 'utf-8');
  return entries;
 }
--- a/aio/tools/translator/extractor.ts
+++ b/aio/tools/translator/extractor.ts
@ -1,27 +1,43 @@
 import * as globby from 'globby';
 import { DictEntry } from './dict-entry';
 import { normalizeLines } from './translate';
 import {
  isNotCnPages,
  originalIsNotChinese,
  originalIsNotTag,
  originalIsOnlyTag,
  translationHasNotCodeExample,
 } from './utils';
 export function splitAndTrim(text = ''): string[] {
  return text.split(/\n+\s*\n+/).map(line => line.trim()).filter(line => !!line);
 }
 // tslint:disable:max-line-length
 const pattern = /[\u2E80-\u2EFF\u2F00-\u2FDF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\u3400-\u4DBF\u4DC0-\u4DFF\u4E00-\u9FBF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]/;
 export function isTranslation(text) {
-  return text &&
+  return text && pattern.test(text);
    /[\u2E80-\u2EFF\u2F00-\u2FDF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\u3400-\u4DBF\u4DC0-\u4DFF\u4E00-\u9FBF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]/.test(text);
 }
 export function gatherTranslations(text: string): DictEntry[] {
-  const lines = splitAndTrim(text);
+  const lines = splitAndTrim(normalizeLines(text));
  const result = [];
  for (let i = 1; i < lines.length; ++i) {
-    const translation = lines[i];
+    const translation = purifyText(lines[i]);
-    const original = lines[i - 1];
+    const original = purifyText(lines[i - 1]);
    if (isTranslation(translation)) {
      result.push({original, translation});
    }
  }
-  return result;
+  return result
    .filter(isNotCnPages)
    .filter(translationHasNotCodeExample)
    .filter(originalIsNotChinese)
    .filter(originalIsNotTag)
    .filter(originalIsOnlyTag)
    .map(purifyEntry);
 }
 export function listMarkdownFiles(directory: string): string[] {
@ -42,6 +58,20 @@ export function gatherFromMarkdownFiles(directory: string): DictEntry[] {
  return entries.reduce((result, value) => result.concat(value), []);
 }
 export function purifyText(text): string {
  return text
    .replace(/^<(\w+)[\s\S]*?>([\s\S]*)<\/\1>$/, '$2')
    .replace(/^(.*)<code-example .*$/, '$1')
    .trim();
 }
 export function purifyEntry(entry: DictEntry): DictEntry {
  return {
    original: purifyText(entry.original),
    translation: purifyText(entry.translation),
  };
 }
 const contentDirectory = process.argv[2];
 gatherFromMarkdownFiles(contentDirectory);
--- a/aio/tools/translator/translate.spec.ts
+++ b/aio/tools/translator/translate.spec.ts
@ -1,13 +1,9 @@
 import { expect } from 'chai';
 import { dirs } from './dirs';
-import { dict, kernelText, lookup, normalizeLines, translate } from './translate';
+import { kernelText, lookup, normalizeLines, translate } from './translate';
 describe('根据字典进行翻译', () => {
  it('忽略明显错误的条目', function () {
    expect(dict.filter(entry => /^<div/.test(entry.original))).eql([]);
  });
  it('抽取核心字符', function () {
    expect(kernelText(' # Forms   ABC ')).eql('# Forms ABC');
  });
@ -21,6 +17,22 @@ describe('根据字典进行翻译', () => {
    expect(lines).eql('1. abc\n\n11. def\n');
  });
  it('把 html tag 拆解开', function () {
    const lines = normalizeLines(`
  <header>
    Angular forms don't require a style library
  </header>
 `);
    expect(lines).eq(`
  <header>
    Angular forms don't require a style library
  </header>
 `);
  });
  it('自动根据字典翻译单个文件', function () {
    const fs = require('fs');
    const content = fs.readFileSync(__dirname + '/../../../../content-en/' + 'guide/forms.md', 'utf-8');
--- a/aio/tools/translator/translate.ts
+++ b/aio/tools/translator/translate.ts
@ -1,14 +1,10 @@
 import * as _ from 'lodash';
 import { DictEntry } from './dict-entry';
-const dict1 = require('./dict-1.json') as DictEntry[];
+export const dict = require('./dict-3.json') as DictEntry[];
 const dict2 = require('./dict-2.json') as DictEntry[];
 const dict3 = require('./dict-3.json') as DictEntry[];
 export const dict = dict1.concat(dict2).concat(dict3)
  .filter(entry => !/^\s*<div/.test(entry.original));
 export function lookup(english: string, filename: RegExp = /.*/): DictEntry[] {
-  let entries = dict3
+  let entries = dict
    .filter(entry => filename.test(entry.sourceFile))
    .filter(entry => kernelText(entry.original) === kernelText(english));
  return _.uniqBy(entries, 'translation');
@ -61,5 +57,9 @@ function repeat(indent: number): string {
 }
 export function normalizeLines(text: string): string {
-  return text.replace(/(?=\n *(\d+\.|-|\*|#|<) )\n/g, '\n\n');
+  // 列表、标题等自带换行含义的markdown
  const blockElementPattern = /(?=\n *(\d+\.|-|\*|#|<) )\n/g;
  const htmlTagPattern = /\n(\s*<.*?>\s*)\n/g;
  return text.replace(blockElementPattern, '\n\n')
    .replace(htmlTagPattern, '\n\n$1\n\n');
 }
--- a/aio/tools/translator/utils.ts
+++ b/aio/tools/translator/utils.ts
@ -0,0 +1,38 @@
 import { DictEntry } from './dict-entry';
 import { isTranslation } from './extractor';
 export function translationHasNotCodeExample(entry: DictEntry): boolean {
  return entry.translation.indexOf('<code-example') === -1;
 }
 export function originalIsNotChinese(entry: DictEntry): boolean {
  return !isTranslation(entry.original);
 }
 export function originalIsNotTag(entry: DictEntry): boolean {
  return !/^\s*<div.*/.test(entry.original);
 }
 export function originalIsOnlyTag(entry: DictEntry): boolean {
  return !/^\s*<\w+>\s*$/.test(entry.original);
 }
 export function isNotImg(entry: DictEntry): boolean {
  return !/^<(img|figure)/.test(entry.translation);
 }
 export function isNotCheatSheet(entry: DictEntry): boolean {
  return !/cheatsheet.md$/.test(entry.sourceFile);
 }
 export function isNotMarketingDocs(entry: DictEntry): boolean {
  return !/marketing\/docs.md$/.test(entry.sourceFile);
 }
 export function isNotCnPages(entry: DictEntry): boolean {
  return !/cn\/.*?.md$/.test(entry.sourceFile);
 }
 export function isHead(line: string): boolean {
  return /^#/.test(line);
 }