refactor: extract checker functions

This commit is contained in:
Zhicheng Wang 2018-03-03 08:13:52 +08:00
parent c2f0045a5e
commit 19f4ad46ae
14 changed files with 107464 additions and 9612 deletions

View File

@ -184,6 +184,7 @@ JavaScript 中每个_文件_是一个模块文件中定义的所有对象
<a href="http://exploringjs.com/es6/ch_modules.html" >Learn more about the JavaScript module system on the web.</a>
<a href="http://exploringjs.com/es6/ch_modules.html" target="_blank">学习更多关于 JavaScript 模块的知识。</a>
</div>
@ -245,6 +246,7 @@ Hang in there. The confusion yields to clarity with time and experience.
Learn more from the [NgModules](guide/ngmodules) page.
更多信息,见 [Angular 模块](guide/ngmodule)。
</div>

View File

@ -19,6 +19,7 @@ The Angular CLI uses `yarn` by default to install npm packages when you create a
<div class="l-sub-section">
Node.js and npm are essential to Angular development.
Node.js和npm是做Angular开发的基础。
[Get them now](https://docs.npmjs.com/getting-started/installing-node "Installing Node.js and updating npm")

View File

@ -1148,8 +1148,10 @@ such as one of the following:
`true` if the control user has not yet entered the HTML control
and triggered its blur event. Its opposite is `myControl.touched`.
如果用户尚未进入这个HTML控件也没有触发过它的`blur`(失去焦点)事件,则为`true`。
它是`myControl.touched`的反义词。
</td>
</tr>

View File

@ -1,7 +1,16 @@
import { expect } from 'chai';
import { DictEntry } from './dict-entry';
import { dirs } from './dirs';
import { gatherFromMarkdownFiles, isTranslation } from './extractor';
import { gatherFromMarkdownFiles } from './extractor';
import {
isHead,
isNotCheatSheet,
isNotCnPages,
isNotImg,
isNotMarketingDocs,
originalIsNotChinese,
originalIsNotTag,
translationHasNotCodeExample,
} from './utils';
describe('自动检查翻译结果', function () {
const entries = gatherFromMarkdownFiles(dirs.content)
@ -10,12 +19,12 @@ describe('自动检查翻译结果', function () {
.filter(isNotCnPages);
it('译文里不应该出现 <code-example>', function () {
const codeExamples = entries.filter(entry => entry.translation.indexOf('<code-example') !== -1);
const codeExamples = entries.filter(translationHasNotCodeExample);
expect(codeExamples).eql([]);
});
it('原文中不应该有汉语', function () {
const lines = entries.filter(entry => isTranslation(entry.original))
const lines = entries.filter(originalIsNotChinese)
.filter(isNotImg);
expect(lines).eql([]);
});
@ -33,27 +42,7 @@ describe('自动检查翻译结果', function () {
});
it('原文不应该是以 <div 开头的', function () {
const lines = entries.filter(entry => /^ *<div.*/.test(entry.original));
const lines = entries.filter(originalIsNotTag);
expect(lines).eql([]);
});
});
function isNotImg(entry: DictEntry): boolean {
return !/^<(img|figure)/.test(entry.translation);
}
function isNotCheatSheet(entry: DictEntry): boolean {
return !/cheatsheet.md$/.test(entry.sourceFile);
}
function isNotMarketingDocs(entry: DictEntry): boolean {
return !/marketing\/docs.md$/.test(entry.sourceFile);
}
function isNotCnPages(entry: DictEntry): boolean {
return !/cn\/.*?.md$/.test(entry.sourceFile);
}
function isHead(line: string): boolean {
return /^#/.test(line);
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
export class DictEntry {
original: string;
translation: string;
sourceFile: string;
sourceFile?: string;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
import { expect } from 'chai';
import { DictEntry } from './dict-entry';
import { dirs } from './dirs';
import { gatherFromMarkdownFiles, gatherTranslations, listMarkdownFiles, splitAndTrim } from './extractor';
@ -42,12 +43,17 @@ describe('从对照翻译文件中采集生成字典', () => {
});
it('从对照文本的文件夹中采集生成字典(非测试)', () => {
const entries = gatherFromMarkdownFiles(dirs.content);
gatherFromDirectory(dirs.aio + '../../content-1/', dirs.here + 'dict-1.json');
gatherFromDirectory(dirs.aio + '../../content-2/', dirs.here + 'dict-2.json');
gatherFromDirectory(dirs.aio + '../../content-3/', dirs.here + 'dict-3.json');
});
});
function gatherFromDirectory(directory: string, dictFile: string): DictEntry[] {
const entries = gatherFromMarkdownFiles(directory);
const dict = JSON.stringify(entries, null, 2);
const fs = require('fs');
fs.writeFileSync(dirs.here + 'dict-3.json', dict, 'utf-8');
expect(entries.length).greaterThan(100);
});
});
fs.writeFileSync(dictFile, dict, 'utf-8');
return entries;
}

View File

@ -1,27 +1,43 @@
import * as globby from 'globby';
import { DictEntry } from './dict-entry';
import { normalizeLines } from './translate';
import {
isNotCnPages,
originalIsNotChinese,
originalIsNotTag,
originalIsOnlyTag,
translationHasNotCodeExample,
} from './utils';
export function splitAndTrim(text = ''): string[] {
return text.split(/\n+\s*\n+/).map(line => line.trim()).filter(line => !!line);
}
// tslint:disable:max-line-length
const pattern = /[\u2E80-\u2EFF\u2F00-\u2FDF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\u3400-\u4DBF\u4DC0-\u4DFF\u4E00-\u9FBF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]/;
export function isTranslation(text) {
return text &&
/[\u2E80-\u2EFF\u2F00-\u2FDF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\u3400-\u4DBF\u4DC0-\u4DFF\u4E00-\u9FBF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]/.test(text);
return text && pattern.test(text);
}
export function gatherTranslations(text: string): DictEntry[] {
const lines = splitAndTrim(text);
const lines = splitAndTrim(normalizeLines(text));
const result = [];
for (let i = 1; i < lines.length; ++i) {
const translation = lines[i];
const original = lines[i - 1];
const translation = purifyText(lines[i]);
const original = purifyText(lines[i - 1]);
if (isTranslation(translation)) {
result.push({original, translation});
}
}
return result;
return result
.filter(isNotCnPages)
.filter(translationHasNotCodeExample)
.filter(originalIsNotChinese)
.filter(originalIsNotTag)
.filter(originalIsOnlyTag)
.map(purifyEntry);
}
export function listMarkdownFiles(directory: string): string[] {
@ -42,6 +58,20 @@ export function gatherFromMarkdownFiles(directory: string): DictEntry[] {
return entries.reduce((result, value) => result.concat(value), []);
}
export function purifyText(text): string {
return text
.replace(/^<(\w+)[\s\S]*?>([\s\S]*)<\/\1>$/, '$2')
.replace(/^(.*)<code-example .*$/, '$1')
.trim();
}
export function purifyEntry(entry: DictEntry): DictEntry {
return {
original: purifyText(entry.original),
translation: purifyText(entry.translation),
};
}
const contentDirectory = process.argv[2];
gatherFromMarkdownFiles(contentDirectory);

View File

@ -1,13 +1,9 @@
import { expect } from 'chai';
import { dirs } from './dirs';
import { dict, kernelText, lookup, normalizeLines, translate } from './translate';
import { kernelText, lookup, normalizeLines, translate } from './translate';
describe('根据字典进行翻译', () => {
it('忽略明显错误的条目', function () {
expect(dict.filter(entry => /^<div/.test(entry.original))).eql([]);
});
it('抽取核心字符', function () {
expect(kernelText(' # Forms ABC ')).eql('# Forms ABC');
});
@ -21,6 +17,22 @@ describe('根据字典进行翻译', () => {
expect(lines).eql('1. abc\n\n11. def\n');
});
it('把 html tag 拆解开', function () {
const lines = normalizeLines(`
<header>
Angular forms don't require a style library
</header>
`);
expect(lines).eq(`
<header>
Angular forms don't require a style library
</header>
`);
});
it('自动根据字典翻译单个文件', function () {
const fs = require('fs');
const content = fs.readFileSync(__dirname + '/../../../../content-en/' + 'guide/forms.md', 'utf-8');

View File

@ -1,14 +1,10 @@
import * as _ from 'lodash';
import { DictEntry } from './dict-entry';
const dict1 = require('./dict-1.json') as DictEntry[];
const dict2 = require('./dict-2.json') as DictEntry[];
const dict3 = require('./dict-3.json') as DictEntry[];
export const dict = dict1.concat(dict2).concat(dict3)
.filter(entry => !/^\s*<div/.test(entry.original));
export const dict = require('./dict-3.json') as DictEntry[];
export function lookup(english: string, filename: RegExp = /.*/): DictEntry[] {
let entries = dict3
let entries = dict
.filter(entry => filename.test(entry.sourceFile))
.filter(entry => kernelText(entry.original) === kernelText(english));
return _.uniqBy(entries, 'translation');
@ -61,5 +57,9 @@ function repeat(indent: number): string {
}
export function normalizeLines(text: string): string {
return text.replace(/(?=\n *(\d+\.|-|\*|#|<) )\n/g, '\n\n');
// 列表、标题等自带换行含义的markdown
const blockElementPattern = /(?=\n *(\d+\.|-|\*|#|<) )\n/g;
const htmlTagPattern = /\n(\s*<.*?>\s*)\n/g;
return text.replace(blockElementPattern, '\n\n')
.replace(htmlTagPattern, '\n\n$1\n\n');
}

View File

@ -0,0 +1,38 @@
import { DictEntry } from './dict-entry';
import { isTranslation } from './extractor';
export function translationHasNotCodeExample(entry: DictEntry): boolean {
return entry.translation.indexOf('<code-example') === -1;
}
export function originalIsNotChinese(entry: DictEntry): boolean {
return !isTranslation(entry.original);
}
export function originalIsNotTag(entry: DictEntry): boolean {
return !/^\s*<div.*/.test(entry.original);
}
export function originalIsOnlyTag(entry: DictEntry): boolean {
return !/^\s*<\w+>\s*$/.test(entry.original);
}
export function isNotImg(entry: DictEntry): boolean {
return !/^<(img|figure)/.test(entry.translation);
}
export function isNotCheatSheet(entry: DictEntry): boolean {
return !/cheatsheet.md$/.test(entry.sourceFile);
}
export function isNotMarketingDocs(entry: DictEntry): boolean {
return !/marketing\/docs.md$/.test(entry.sourceFile);
}
export function isNotCnPages(entry: DictEntry): boolean {
return !/cn\/.*?.md$/.test(entry.sourceFile);
}
export function isHead(line: string): boolean {
return /^#/.test(line);
}