refactor: 提供模糊匹配的功能（未完成）

2018-03-15 16:40:11 +08:00 · 2018-03-15 16:40:11 +08:00 · 8a711cba59
commit 8a711cba59
parent 18ac616385
6 changed files with 4440 additions and 84 deletions
--- a/aio/tools/translator/bin/translate-one.ts
+++ b/aio/tools/translator/bin/translate-one.ts
@ -3,5 +3,5 @@
 import { dirs } from '../dirs';
 import { translateFile } from '../translate';
-const filename = 'guide/router.md';
+const filename = 'guide/testing.md';
 translateFile(__dirname + '/../../../../../content-en/' + filename, dirs.content + filename);
--- a/aio/tools/translator/dict-latest.json
+++ b/aio/tools/translator/dict-latest.json
--- a/aio/tools/translator/translate.spec.ts
+++ b/aio/tools/translator/translate.spec.ts
@ -1,5 +1,6 @@
 import { expect } from 'chai';
-import { kernelText, lookup } from './translate';
+import { lookup } from './translate';
 import { kernelText } from './utils';
 describe('根据字典进行翻译', () => {
--- a/aio/tools/translator/translate.ts
+++ b/aio/tools/translator/translate.ts
@ -3,24 +3,19 @@ import * as _ from 'lodash';
 import { DictEntry } from './dict-entry';
 import { dirs } from './dirs';
 import { listMarkdownFiles } from './extractor';
-import { indentOf, normalizeLines, repeat } from './utils';
+import { exactlyTest, indentOf, normalizeLines, repeat } from './utils';
 // TODO: 改用 markdown 解析器实现
 export const dict = require('./dict-latest.json') as DictEntry[];
 export function lookup(english: string, filename: RegExp = /.*/): DictEntry[] {
  const entries = dict
    .filter(entry => filename.test(entry.sourceFile))
-    .filter(entry => kernelText(entry.original) === kernelText(english));
+    .filter(entry => exactlyTest(entry.original, english));
  return _.uniqBy(entries, 'translation');
 }
 export function kernelText(text: string): string {
  return text
    .replace(/[\s\n]+/g, '')
    .replace(/\.$/g, '')
    .trim();
 }
 export function translate(content: string): string[] {
  const lines = normalizeLines(content)
    .split(/\n+\s*\n+/);
--- a/aio/tools/translator/utils.spec.ts
+++ b/aio/tools/translator/utils.spec.ts
@ -1,5 +1,5 @@
 import { expect } from 'chai';
-import { normalizeLines } from './utils';
+import { fuzzyTest, normalizeLines, tokenize } from './utils';
 describe(' 工具函数', () => {
  it('把“1. ”列表处理成空行分隔的格式，以便处理', function () {
@ -338,4 +338,16 @@ a <b> c
 `);
  });
  it('拆分', function () {
    expect(tokenize('abc def,abc.')).eql(['abc', 'def', 'abc']);
  });
  it('模糊匹配', function () {
    expect(fuzzyTest(`a b c d e`, `a b c d e`)).is.false;
    expect(fuzzyTest(`a b c d e f g`, `a b c d e`)).is.false;
    expect(fuzzyTest(`Make that easy by encapsulating the _click-triggering_ process in a helper such as the \`click\` function below:`,
      `Make that consistent and easy by encapsulating the _click-triggering_ process 
 in a helper such as the \`click()\` function below:
 `)).is.true;
  });
 });
--- a/aio/tools/translator/utils.ts
+++ b/aio/tools/translator/utils.ts
@ -1,5 +1,6 @@
 import { DictEntry } from './dict-entry';
 import { isTranslation } from './extractor';
 import * as _ from 'lodash';
 export function translationHasNotCodeExample(entry: DictEntry): boolean {
  return entry.translation.indexOf('<code-example') === -1;
@ -51,7 +52,7 @@ export function isHead(line: string): boolean {
 export function normalizeLines(text: string): string {
  text = '\n' + text + '\n';
-  // 列表、标题等自带换行含义的markdown
+  // 为列表、标题等自带换行含义的markdown多加一个空行
  const blockElementPattern = /(?=\n *(\d+\.|-|\*) )\n/g;
  text = text.replace(blockElementPattern, '\n\n');
  const hxPattern = /(\n *#+ .*)(?=\n)/g;
@ -120,3 +121,30 @@ export function repeat(indent: number): string {
  }
  return result;
 }
 // 目前还不能正常工作
 export function fuzzyTest(text1: string, text2: string): boolean {
  const tokens1 = tokenize(text1);
  const tokens2 = tokenize(text2);
  const sameTokens = _.intersection(tokens1, tokens2);
  const maxTokens = Math.max(tokens1.length, tokens2.length);
  return sameTokens.length > 5 && sameTokens.length / maxTokens >= 0.8;
 }
 export function exactlyTest(text1: string, text2: string): boolean {
  return kernelText(text1) === kernelText(text2);
 }
 export function kernelText(text: string): string {
  return text
    .replace(/[\s\n]+/g, '')
    .replace(/\.$/g, '')
    .toUpperCase()
    .trim();
 }
 export function tokenize(text: string): string[] {
  return text.split(/\W/)
    .map(token => token.trim())
    .filter(token => !!token);
 }