feat: 只根据核心字符比较

feat: 处理列表和标题等不必紧跟换行符的格式
This commit is contained in:
Zhicheng Wang 2018-03-02 14:25:07 +08:00
parent ee97046824
commit c2f0045a5e
2 changed files with 27 additions and 17 deletions

View File

@ -1,21 +1,30 @@
import { expect } from 'chai';
import { dirs } from './dirs';
import { dict, lookup, translate } from './translate';
import { dict, kernelText, lookup, normalizeLines, translate } from './translate';
describe('根据字典进行翻译', () => {
it('忽略明显错误的条目', function () {
expect(dict.filter(entry => /^<div/.test(entry.original))).eql([]);
});
it('抽取核心字符', function () {
expect(kernelText(' # Forms ABC ')).eql('# Forms ABC');
});
it('查字典', () => {
expect(lookup('# Forms')[0].translation).eql('# 表单');
});
it('把“- * 1. #”等处理成空行分隔的格式,以便处理', function () {
const lines = normalizeLines('1. abc\n11. def\n');
expect(lines).eql('1. abc\n\n11. def\n');
});
it('自动根据字典翻译单个文件', function () {
const fs = require('fs');
const content = fs.readFileSync(dirs.content + 'guide/forms.md', 'utf-8');
const content = fs.readFileSync(__dirname + '/../../../../content-en/' + 'guide/forms.md', 'utf-8');
const result = translate(content);
fs.writeFileSync(dirs.content + 'guide/forms.md', result.join('\n\n'), 'utf-8');
});
});

View File

@ -4,22 +4,29 @@ import { DictEntry } from './dict-entry';
const dict1 = require('./dict-1.json') as DictEntry[];
const dict2 = require('./dict-2.json') as DictEntry[];
const dict3 = require('./dict-3.json') as DictEntry[];
export const dict = dict1.concat(dict2).concat(dict3).filter(entry => !/^\s*<div/.test(entry.original));
export const dict = dict1.concat(dict2).concat(dict3)
.filter(entry => !/^\s*<div/.test(entry.original));
export function lookup(english: string, filename: RegExp = /.*/): DictEntry[] {
return _.uniqBy(dict.filter(entry => filename.test(entry.sourceFile)).filter(entry => entry.original === english), 'translation');
let entries = dict3
.filter(entry => filename.test(entry.sourceFile))
.filter(entry => kernelText(entry.original) === kernelText(english));
return _.uniqBy(entries, 'translation');
}
export function kernelText(text: string): string {
return text.replace(/[\s\n]+/g, ' ').trim();
}
export function translate(content: string): string[] {
const lines = content.split(/\n+\s*\n+/);
const lines = normalizeLines(content)
.split(/\n+\s*\n+/);
return lines
.map(splitList)
.map(flatten)
.map(line => {
if (!line.trim()) {
return line;
}
const translations = lookup(line.trim(), /forms.md$/);
const translations = lookup(line, /forms.md$/);
const indent = indentOf(line);
const padding = repeat(indent);
if (translations.length === 0) {
@ -53,12 +60,6 @@ function repeat(indent: number): string {
return result;
}
function splitList(line: string): string[] {
const subLines = line.split(/\n/);
// 把 markdown 里面不必空行的元素都拆开
return [line];
}
export function flatten<T>(arr: T[]): T {
return Array.prototype.concat.apply([], arr);
export function normalizeLines(text: string): string {
return text.replace(/(?=\n *(\d+\.|-|\*|#|<) )\n/g, '\n\n');
}