angular-cn/aio/tools/translator/extractor.ts

99 lines
3.0 KiB
TypeScript
Raw Normal View History

2018-03-01 01:07:51 -05:00
import * as globby from 'globby';
import { DictEntry } from './dict-entry';
2018-03-02 19:13:52 -05:00
import {
isNotCnPages,
isOnlyBeginTag,
normalizeLines,
2018-03-02 19:13:52 -05:00
originalIsNotChinese,
originalIsNotCodeExampleTag,
originalIsNotOnlyBeginTag,
originalIsNotPureCloseTag,
originalIsNotSpecialDivTag,
2018-03-02 19:13:52 -05:00
translationHasNotCodeExample,
} from './utils';
2018-03-01 01:07:51 -05:00
export function splitAndTrim(text = ''): string[] {
return text.split(/\n+\s*\n+/).map(line => line.trim()).filter(line => !!line);
}
2018-03-02 19:13:52 -05:00
// tslint:disable:max-line-length
const pattern = /[\u2E80-\u2EFF\u2F00-\u2FDF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\u3400-\u4DBF\u4DC0-\u4DFF\u4E00-\u9FBF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]/;
2018-03-01 01:07:51 -05:00
export function isTranslation(text) {
2018-03-02 19:13:52 -05:00
return text && pattern.test(text);
2018-03-01 01:07:51 -05:00
}
export function gatherTranslations(text: string): DictEntry[] {
2018-03-02 19:13:52 -05:00
const lines = splitAndTrim(normalizeLines(text));
2018-03-01 01:07:51 -05:00
const result = [];
for (let i = 1; i < lines.length; ++i) {
2018-03-02 19:13:52 -05:00
const translation = purifyText(lines[i]);
2018-03-01 01:07:51 -05:00
if (isTranslation(translation)) {
const original = purifyText(lines[i - 1]);
// 对于包裹在 html tag 中的翻译文本进行特殊处理
if (isOnlyBeginTag(original)) {
const prevBeginTag = lines[i - 4].trim();
const prevEndTag = lines[i - 2].trim();
const thisEndTag = lines[i + 1].trim();
if (original === prevBeginTag && prevEndTag === thisEndTag) {
result.push({
original: lines[i - 3],
translation: lines[i],
});
}
} else {
result.push({original, translation});
}
2018-03-01 01:07:51 -05:00
}
}
2018-03-02 19:13:52 -05:00
return result
.filter(isNotCnPages)
.filter(translationHasNotCodeExample)
.filter(originalIsNotChinese)
.filter(originalIsNotSpecialDivTag)
.filter(originalIsNotCodeExampleTag)
.filter(originalIsNotPureCloseTag)
.filter(originalIsNotOnlyBeginTag)
2018-03-02 19:13:52 -05:00
.map(purifyEntry);
2018-03-01 01:07:51 -05:00
}
2018-03-01 07:11:12 -05:00
export function listMarkdownFiles(directory: string): string[] {
return globby.sync(directory + '**/*.md');
2018-03-01 01:07:51 -05:00
}
export function gatherFromMarkdownFile(fileName: string): DictEntry[] {
const fs = require('fs');
const content = fs.readFileSync(fileName, 'utf-8');
const entries = gatherTranslations(content);
entries.forEach(entry => entry.sourceFile = fileName);
return entries;
}
2018-03-01 07:11:12 -05:00
export function gatherFromMarkdownFiles(directory: string): DictEntry[] {
const files = listMarkdownFiles(directory);
2018-03-01 01:07:51 -05:00
const entries = files.map(gatherFromMarkdownFile);
return entries.reduce((result, value) => result.concat(value), []);
}
2018-03-02 19:13:52 -05:00
export function purifyText(text): string {
return text
.replace(/^(.*)<code-example .*$/, '$1')
.trim();
}
export function purifyEntry(entry: DictEntry): DictEntry {
return {
original: purifyText(entry.original),
translation: purifyText(entry.translation),
};
}
export function gatherFromDirectory(directory: string, dictFile: string): DictEntry[] {
const entries = gatherFromMarkdownFiles(directory);
const dict = JSON.stringify(entries, null, 2);
const fs = require('fs');
fs.writeFileSync(dictFile, dict, 'utf-8');
return entries;
}