refactor: 提供模糊匹配的功能(未完成)

This commit is contained in:
Zhicheng Wang 2018-03-15 16:40:11 +08:00 committed by Zhicheng Wang
parent 18ac616385
commit 8a711cba59
6 changed files with 4440 additions and 84 deletions

View File

@ -3,5 +3,5 @@
import { dirs } from '../dirs'; import { dirs } from '../dirs';
import { translateFile } from '../translate'; import { translateFile } from '../translate';
const filename = 'guide/router.md'; const filename = 'guide/testing.md';
translateFile(__dirname + '/../../../../../content-en/' + filename, dirs.content + filename); translateFile(__dirname + '/../../../../../content-en/' + filename, dirs.content + filename);

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
import { expect } from 'chai'; import { expect } from 'chai';
import { kernelText, lookup } from './translate'; import { lookup } from './translate';
import { kernelText } from './utils';
describe('根据字典进行翻译', () => { describe('根据字典进行翻译', () => {

View File

@ -3,24 +3,19 @@ import * as _ from 'lodash';
import { DictEntry } from './dict-entry'; import { DictEntry } from './dict-entry';
import { dirs } from './dirs'; import { dirs } from './dirs';
import { listMarkdownFiles } from './extractor'; import { listMarkdownFiles } from './extractor';
import { indentOf, normalizeLines, repeat } from './utils'; import { exactlyTest, indentOf, normalizeLines, repeat } from './utils';
// TODO: 改用 markdown 解析器实现
export const dict = require('./dict-latest.json') as DictEntry[]; export const dict = require('./dict-latest.json') as DictEntry[];
export function lookup(english: string, filename: RegExp = /.*/): DictEntry[] { export function lookup(english: string, filename: RegExp = /.*/): DictEntry[] {
const entries = dict const entries = dict
.filter(entry => filename.test(entry.sourceFile)) .filter(entry => filename.test(entry.sourceFile))
.filter(entry => kernelText(entry.original) === kernelText(english)); .filter(entry => exactlyTest(entry.original, english));
return _.uniqBy(entries, 'translation'); return _.uniqBy(entries, 'translation');
} }
export function kernelText(text: string): string {
return text
.replace(/[\s\n]+/g, '')
.replace(/\.$/g, '')
.trim();
}
export function translate(content: string): string[] { export function translate(content: string): string[] {
const lines = normalizeLines(content) const lines = normalizeLines(content)
.split(/\n+\s*\n+/); .split(/\n+\s*\n+/);

View File

@ -1,5 +1,5 @@
import { expect } from 'chai'; import { expect } from 'chai';
import { normalizeLines } from './utils'; import { fuzzyTest, normalizeLines, tokenize } from './utils';
describe(' 工具函数', () => { describe(' 工具函数', () => {
it('把“1. ”列表处理成空行分隔的格式,以便处理', function () { it('把“1. ”列表处理成空行分隔的格式,以便处理', function () {
@ -338,4 +338,16 @@ a <b> c
`); `);
}); });
it('拆分', function () {
expect(tokenize('abc def,abc.')).eql(['abc', 'def', 'abc']);
});
it('模糊匹配', function () {
expect(fuzzyTest(`a b c d e`, `a b c d e`)).is.false;
expect(fuzzyTest(`a b c d e f g`, `a b c d e`)).is.false;
expect(fuzzyTest(`Make that easy by encapsulating the _click-triggering_ process in a helper such as the \`click\` function below:`,
`Make that consistent and easy by encapsulating the _click-triggering_ process
in a helper such as the \`click()\` function below:
`)).is.true;
});
}); });

View File

@ -1,5 +1,6 @@
import { DictEntry } from './dict-entry'; import { DictEntry } from './dict-entry';
import { isTranslation } from './extractor'; import { isTranslation } from './extractor';
import * as _ from 'lodash';
export function translationHasNotCodeExample(entry: DictEntry): boolean { export function translationHasNotCodeExample(entry: DictEntry): boolean {
return entry.translation.indexOf('<code-example') === -1; return entry.translation.indexOf('<code-example') === -1;
@ -51,7 +52,7 @@ export function isHead(line: string): boolean {
export function normalizeLines(text: string): string { export function normalizeLines(text: string): string {
text = '\n' + text + '\n'; text = '\n' + text + '\n';
// 列表、标题等自带换行含义的markdown // 列表、标题等自带换行含义的markdown多加一个空行
const blockElementPattern = /(?=\n *(\d+\.|-|\*) )\n/g; const blockElementPattern = /(?=\n *(\d+\.|-|\*) )\n/g;
text = text.replace(blockElementPattern, '\n\n'); text = text.replace(blockElementPattern, '\n\n');
const hxPattern = /(\n *#+ .*)(?=\n)/g; const hxPattern = /(\n *#+ .*)(?=\n)/g;
@ -120,3 +121,30 @@ export function repeat(indent: number): string {
} }
return result; return result;
} }
// 目前还不能正常工作
export function fuzzyTest(text1: string, text2: string): boolean {
const tokens1 = tokenize(text1);
const tokens2 = tokenize(text2);
const sameTokens = _.intersection(tokens1, tokens2);
const maxTokens = Math.max(tokens1.length, tokens2.length);
return sameTokens.length > 5 && sameTokens.length / maxTokens >= 0.8;
}
export function exactlyTest(text1: string, text2: string): boolean {
return kernelText(text1) === kernelText(text2);
}
export function kernelText(text: string): string {
return text
.replace(/[\s\n]+/g, '')
.replace(/\.$/g, '')
.toUpperCase()
.trim();
}
export function tokenize(text: string): string[] {
return text.split(/\W/)
.map(token => token.trim())
.filter(token => !!token);
}