feat: 中英字典收集工具

This commit is contained in:
Zhicheng Wang 2018-03-01 14:07:51 +08:00
parent 89060a39df
commit 6120300d4a
7 changed files with 229462 additions and 0 deletions

67322
aio/content/dict/dict-1.json Normal file

File diff suppressed because it is too large Load Diff

72752
aio/content/dict/dict-2.json Normal file

File diff suppressed because it is too large Load Diff

89242
aio/content/dict/dict-3.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,50 @@
import { expect } from 'chai';
import { gatherFromMarkdownFiles, gatherTranslations, listMarkdownFiles, splitAndTrim } from './extractor';
describe('gather to dictionary', () => {
it('should split empty string to empty array', function () {
expect(splitAndTrim()).eql([]);
});
it('should should break by double line break', function () {
const result = splitAndTrim(`a
b
c
d`);
expect(result[1]).eql(`b
c`);
});
it('build map of original and translation', () => {
const result = gatherTranslations(`
a
b
`);
expect(result).eql([{original: 'a', translation: '一'}]);
});
it('should gather from real file', function () {
const fs = require('fs');
const content = fs.readFileSync(__dirname + '/../../content/guide/forms.md', 'utf-8');
const result = gatherTranslations(content);
expect(result[0]).eql({original: '# Forms', translation: '# 表单'});
});
it('should list files recursive', function () {
expect(listMarkdownFiles().length).greaterThan(10);
});
it('should gather from directory', () => {
const entries = gatherFromMarkdownFiles();
const dict = JSON.stringify(entries, null, 2);
const fs = require('fs');
fs.writeFileSync(__dirname + '/../../content/dict.json', dict, 'utf-8');
expect(entries.length).greaterThan(100);
});
});

View File

@ -0,0 +1,53 @@
import * as globby from 'globby';
export function splitAndTrim(text = ''): string[] {
return text.split(/\n+\s*\n+/).map(line => line.trim()).filter(line => !!line);
}
export function isTranslation(text) {
return text &&
/[\u2E80-\u2EFF\u2F00-\u2FDF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\u3400-\u4DBF\u4DC0-\u4DFF\u4E00-\u9FBF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]/.test(text);
}
export class DictEntry {
original: string;
translation: string;
sourceFile: string;
}
export function gatherTranslations(text: string): DictEntry[] {
const lines = splitAndTrim(text);
const result = [];
for (let i = 1; i < lines.length; ++i) {
const translation = lines[i];
const original = lines[i - 1];
if (isTranslation(translation)) {
result.push({original, translation});
}
}
return result;
}
export function listMarkdownFiles(): string[] {
return globby.sync(__dirname + '/../**/*.md');
}
export function gatherFromMarkdownFile(fileName: string): DictEntry[] {
const fs = require('fs');
const content = fs.readFileSync(fileName, 'utf-8');
const entries = gatherTranslations(content);
entries.forEach(entry => entry.sourceFile = fileName);
return entries;
}
export function gatherFromMarkdownFiles(): DictEntry[] {
const files = listMarkdownFiles();
const entries = files.map(gatherFromMarkdownFile);
return entries.reduce((result, value) => result.concat(value), []);
}
const entries = gatherFromMarkdownFiles();
const dict = JSON.stringify(entries, null, 2);
const fs = require('fs');
fs.writeFileSync(__dirname + '/dict-current.json', dict, 'utf-8');

View File

@ -95,11 +95,13 @@
"devDependencies": { "devDependencies": {
"@angular/cli": "^1.6.3", "@angular/cli": "^1.6.3",
"@angular/compiler-cli": "5.2", "@angular/compiler-cli": "5.2",
"@types/chai": "^4.1.2",
"@types/jasmine": "^2.5.52", "@types/jasmine": "^2.5.52",
"@types/jasminewd2": "^2.0.3", "@types/jasminewd2": "^2.0.3",
"@types/node": "~6.0.60", "@types/node": "~6.0.60",
"archiver": "^1.3.0", "archiver": "^1.3.0",
"canonical-path": "^0.0.2", "canonical-path": "^0.0.2",
"chai": "^4.1.2",
"chalk": "^2.1.0", "chalk": "^2.1.0",
"cjson": "^0.5.0", "cjson": "^0.5.0",
"codelyzer": "~2.0.0", "codelyzer": "~2.0.0",

View File

@ -279,6 +279,10 @@
version "0.0.11" version "0.0.11"
resolved "https://registry.yarnpkg.com/@schematics/schematics/-/schematics-0.0.11.tgz#c8f70f270ed38f29b2873248126fd59abd635862" resolved "https://registry.yarnpkg.com/@schematics/schematics/-/schematics-0.0.11.tgz#c8f70f270ed38f29b2873248126fd59abd635862"
"@types/chai@^4.1.2":
version "4.1.2"
resolved "https://registry.yarnpkg.com/@types/chai/-/chai-4.1.2.tgz#f1af664769cfb50af805431c407425ed619daa21"
"@types/core-js@^0.9.41": "@types/core-js@^0.9.41":
version "0.9.43" version "0.9.43"
resolved "https://registry.yarnpkg.com/@types/core-js/-/core-js-0.9.43.tgz#65d646c5e8c0cd1bdee37065799f9d3d48748253" resolved "https://registry.yarnpkg.com/@types/core-js/-/core-js-0.9.43.tgz#65d646c5e8c0cd1bdee37065799f9d3d48748253"
@ -707,6 +711,10 @@ assert@^1.1.1:
dependencies: dependencies:
util "0.10.3" util "0.10.3"
assertion-error@^1.0.1:
version "1.1.0"
resolved "https://registry.yarnpkg.com/assertion-error/-/assertion-error-1.1.0.tgz#e60b6b0e8f301bd97e5375215bda406c85118c0b"
async-each@^1.0.0: async-each@^1.0.0:
version "1.0.1" version "1.0.1"
resolved "https://registry.yarnpkg.com/async-each/-/async-each-1.0.1.tgz#19d386a1d9edc6e7c1c85d388aedbcc56d33602d" resolved "https://registry.yarnpkg.com/async-each/-/async-each-1.0.1.tgz#19d386a1d9edc6e7c1c85d388aedbcc56d33602d"
@ -1282,6 +1290,17 @@ center-align@^0.1.1:
align-text "^0.1.3" align-text "^0.1.3"
lazy-cache "^1.0.3" lazy-cache "^1.0.3"
chai@^4.1.2:
version "4.1.2"
resolved "https://registry.yarnpkg.com/chai/-/chai-4.1.2.tgz#0f64584ba642f0f2ace2806279f4f06ca23ad73c"
dependencies:
assertion-error "^1.0.1"
check-error "^1.0.1"
deep-eql "^3.0.0"
get-func-name "^2.0.0"
pathval "^1.0.0"
type-detect "^4.0.0"
chalk@0.5.1: chalk@0.5.1:
version "0.5.1" version "0.5.1"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-0.5.1.tgz#663b3a648b68b55d04690d49167aa837858f2174" resolved "https://registry.yarnpkg.com/chalk/-/chalk-0.5.1.tgz#663b3a648b68b55d04690d49167aa837858f2174"
@ -1365,6 +1384,10 @@ character-reference-invalid@^1.0.0:
version "0.0.2" version "0.0.2"
resolved "https://registry.yarnpkg.com/charenc/-/charenc-0.0.2.tgz#c0a1d2f3a7092e03774bfa83f14c0fc5790a8667" resolved "https://registry.yarnpkg.com/charenc/-/charenc-0.0.2.tgz#c0a1d2f3a7092e03774bfa83f14c0fc5790a8667"
check-error@^1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/check-error/-/check-error-1.0.2.tgz#574d312edd88bb5dd8912e9286dd6c0aed4aac82"
chokidar@^1.4.1, chokidar@^1.4.2, chokidar@^1.6.0, chokidar@^1.7.0: chokidar@^1.4.1, chokidar@^1.4.2, chokidar@^1.6.0, chokidar@^1.7.0:
version "1.7.0" version "1.7.0"
resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-1.7.0.tgz#798e689778151c8076b4b360e5edd28cda2bb468" resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-1.7.0.tgz#798e689778151c8076b4b360e5edd28cda2bb468"
@ -2202,6 +2225,12 @@ decompress-response@^3.2.0:
dependencies: dependencies:
mimic-response "^1.0.0" mimic-response "^1.0.0"
deep-eql@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/deep-eql/-/deep-eql-3.0.1.tgz#dfc9404400ad1c8fe023e7da1df1c147c4b444df"
dependencies:
type-detect "^4.0.0"
deep-equal@^1.0.1: deep-equal@^1.0.1:
version "1.0.1" version "1.0.1"
resolved "https://registry.yarnpkg.com/deep-equal/-/deep-equal-1.0.1.tgz#f5d260292b660e084eff4cdbc9f08ad3247448b5" resolved "https://registry.yarnpkg.com/deep-equal/-/deep-equal-1.0.1.tgz#f5d260292b660e084eff4cdbc9f08ad3247448b5"
@ -3443,6 +3472,10 @@ get-caller-file@^1.0.1:
version "1.0.2" version "1.0.2"
resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-1.0.2.tgz#f702e63127e7e231c160a80c1554acb70d5047e5" resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-1.0.2.tgz#f702e63127e7e231c160a80c1554acb70d5047e5"
get-func-name@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/get-func-name/-/get-func-name-2.0.0.tgz#ead774abee72e20409433a066366023dd6887a41"
get-stdin@^4.0.1: get-stdin@^4.0.1:
version "4.0.1" version "4.0.1"
resolved "https://registry.yarnpkg.com/get-stdin/-/get-stdin-4.0.1.tgz#b968c6b0a04384324902e8bf1a5df32579a450fe" resolved "https://registry.yarnpkg.com/get-stdin/-/get-stdin-4.0.1.tgz#b968c6b0a04384324902e8bf1a5df32579a450fe"
@ -6222,6 +6255,10 @@ path-type@^2.0.0:
dependencies: dependencies:
pify "^2.0.0" pify "^2.0.0"
pathval@^1.0.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/pathval/-/pathval-1.1.0.tgz#b942e6d4bde653005ef6b71361def8727d0645e0"
pbkdf2@^3.0.3: pbkdf2@^3.0.3:
version "3.0.14" version "3.0.14"
resolved "https://registry.yarnpkg.com/pbkdf2/-/pbkdf2-3.0.14.tgz#a35e13c64799b06ce15320f459c230e68e73bade" resolved "https://registry.yarnpkg.com/pbkdf2/-/pbkdf2-3.0.14.tgz#a35e13c64799b06ce15320f459c230e68e73bade"
@ -8440,6 +8477,10 @@ type-check@~0.3.2:
dependencies: dependencies:
prelude-ls "~1.1.2" prelude-ls "~1.1.2"
type-detect@^4.0.0:
version "4.0.8"
resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c"
type-is@~1.6.15: type-is@~1.6.15:
version "1.6.15" version "1.6.15"
resolved "https://registry.yarnpkg.com/type-is/-/type-is-1.6.15.tgz#cab10fb4909e441c82842eafe1ad646c81804410" resolved "https://registry.yarnpkg.com/type-is/-/type-is-1.6.15.tgz#cab10fb4909e441c82842eafe1ad646c81804410"