refactor: 提供模糊匹配的功能(未完成)
This commit is contained in:
parent
18ac616385
commit
8a711cba59
|
@ -3,5 +3,5 @@
|
|||
import { dirs } from '../dirs';
|
||||
import { translateFile } from '../translate';
|
||||
|
||||
const filename = 'guide/router.md';
|
||||
const filename = 'guide/testing.md';
|
||||
translateFile(__dirname + '/../../../../../content-en/' + filename, dirs.content + filename);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,5 +1,6 @@
|
|||
import { expect } from 'chai';
|
||||
import { kernelText, lookup } from './translate';
|
||||
import { lookup } from './translate';
|
||||
import { kernelText } from './utils';
|
||||
|
||||
|
||||
describe('根据字典进行翻译', () => {
|
||||
|
|
|
@ -3,24 +3,19 @@ import * as _ from 'lodash';
|
|||
import { DictEntry } from './dict-entry';
|
||||
import { dirs } from './dirs';
|
||||
import { listMarkdownFiles } from './extractor';
|
||||
import { indentOf, normalizeLines, repeat } from './utils';
|
||||
import { exactlyTest, indentOf, normalizeLines, repeat } from './utils';
|
||||
|
||||
// TODO: 改用 markdown 解析器实现
|
||||
|
||||
export const dict = require('./dict-latest.json') as DictEntry[];
|
||||
|
||||
export function lookup(english: string, filename: RegExp = /.*/): DictEntry[] {
|
||||
const entries = dict
|
||||
.filter(entry => filename.test(entry.sourceFile))
|
||||
.filter(entry => kernelText(entry.original) === kernelText(english));
|
||||
.filter(entry => exactlyTest(entry.original, english));
|
||||
return _.uniqBy(entries, 'translation');
|
||||
}
|
||||
|
||||
export function kernelText(text: string): string {
|
||||
return text
|
||||
.replace(/[\s\n]+/g, '')
|
||||
.replace(/\.$/g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function translate(content: string): string[] {
|
||||
const lines = normalizeLines(content)
|
||||
.split(/\n+\s*\n+/);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import { expect } from 'chai';
|
||||
import { normalizeLines } from './utils';
|
||||
import { fuzzyTest, normalizeLines, tokenize } from './utils';
|
||||
|
||||
describe(' 工具函数', () => {
|
||||
it('把“1. ”列表处理成空行分隔的格式,以便处理', function () {
|
||||
|
@ -338,4 +338,16 @@ a <b> c
|
|||
`);
|
||||
});
|
||||
|
||||
it('拆分', function () {
|
||||
expect(tokenize('abc def,abc.')).eql(['abc', 'def', 'abc']);
|
||||
});
|
||||
|
||||
it('模糊匹配', function () {
|
||||
expect(fuzzyTest(`a b c d e`, `a b c d e`)).is.false;
|
||||
expect(fuzzyTest(`a b c d e f g`, `a b c d e`)).is.false;
|
||||
expect(fuzzyTest(`Make that easy by encapsulating the _click-triggering_ process in a helper such as the \`click\` function below:`,
|
||||
`Make that consistent and easy by encapsulating the _click-triggering_ process
|
||||
in a helper such as the \`click()\` function below:
|
||||
`)).is.true;
|
||||
});
|
||||
});
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import { DictEntry } from './dict-entry';
|
||||
import { isTranslation } from './extractor';
|
||||
import * as _ from 'lodash';
|
||||
|
||||
export function translationHasNotCodeExample(entry: DictEntry): boolean {
|
||||
return entry.translation.indexOf('<code-example') === -1;
|
||||
|
@ -51,7 +52,7 @@ export function isHead(line: string): boolean {
|
|||
|
||||
export function normalizeLines(text: string): string {
|
||||
text = '\n' + text + '\n';
|
||||
// 列表、标题等自带换行含义的markdown
|
||||
// 为列表、标题等自带换行含义的markdown多加一个空行
|
||||
const blockElementPattern = /(?=\n *(\d+\.|-|\*) )\n/g;
|
||||
text = text.replace(blockElementPattern, '\n\n');
|
||||
const hxPattern = /(\n *#+ .*)(?=\n)/g;
|
||||
|
@ -120,3 +121,30 @@ export function repeat(indent: number): string {
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// 目前还不能正常工作
|
||||
export function fuzzyTest(text1: string, text2: string): boolean {
|
||||
const tokens1 = tokenize(text1);
|
||||
const tokens2 = tokenize(text2);
|
||||
const sameTokens = _.intersection(tokens1, tokens2);
|
||||
const maxTokens = Math.max(tokens1.length, tokens2.length);
|
||||
return sameTokens.length > 5 && sameTokens.length / maxTokens >= 0.8;
|
||||
}
|
||||
|
||||
export function exactlyTest(text1: string, text2: string): boolean {
|
||||
return kernelText(text1) === kernelText(text2);
|
||||
}
|
||||
|
||||
export function kernelText(text: string): string {
|
||||
return text
|
||||
.replace(/[\s\n]+/g, '')
|
||||
.replace(/\.$/g, '')
|
||||
.toUpperCase()
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function tokenize(text: string): string[] {
|
||||
return text.split(/\W/)
|
||||
.map(token => token.trim())
|
||||
.filter(token => !!token);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue