import { DictEntry } from './dict-entry'; import { isTranslation } from './extractor'; import * as _ from 'lodash'; export function translationHasNotCodeExample(entry: DictEntry): boolean { return entry.translation.indexOf('Back to top\s+<\/a>/g; text = text.replace(specialBackToTopPattern, 'Back to top'); // 原文中有'); // 原文中有的换行会干扰生成 html 的格式，替换一下 // tslint:disable:max-line-length text = text.replace(` Get them now if they're not already installed on your machine. `, `Get them now if they're not already installed on your machine. `); // 为各种列表多加一个空行 const listElementPattern = /(?=\n *(\d+\.|-|\*) )\n/g; text = text.replace(listElementPattern, '\n\n'); // 为标题增加空行 const hxPattern = /^( *#+ .*)$/gm; text = text.replace(hxPattern, '\n$1\n'); // 把多行的 HTML 标题或 p 元素变成单行 const hxMultilinePattern = /^( *)<(h\d|p|header)([^>]*)>\s*(.*)\s*<\/\2>$/gm; text = text.replace(hxMultilinePattern, '\n$1<$2$3>$4\n'); // 为单行的成对标签前后添加空行 const oneLinePairedTagPattern = /^( *)<(p|div|h\d+|code-example|section)\b([^>]*)>([^\n]*?)<\/\2>( *)$/gm; text = text.replace(oneLinePairedTagPattern, '\n$1<$2$3>$4$5\n'); // 为单行的注释前后添加空行 const oneLineCommentPattern = /^( * *)$/gm; text = text.replace(oneLineCommentPattern, '\n$1\n'); // 为单行的 back to top 前后添加空行 const backToTopPattern = /^( *Back to top<\/a> *)$/gm; text = text.replace(backToTopPattern, '\n$1\n'); // 为单行的 {@ 语句前后添加空行 const atTagCommentPattern = /^( *{@a.*} *)$/gm; text = text.replace(atTagCommentPattern, '\n$1\n'); // 为单行的自封闭标签前后添加空行 const oneLineClosedTagPattern = /^( *

) *$/gm; text = text.replace(oneLineClosedTagPattern, '\n$1\n'); // 为单行的
前后添加空行 const oneLineBrTagPattern = /^( *
*)$/gm; text = text.replace(oneLineBrTagPattern, '\n$1\n'); // 为单独的 div 前后添加空行 const oneLineDivTagPattern = /^( *<\/?(div|li|ul|ol)\b([^>]*)> *)$/gm; text = text.replace(oneLineDivTagPattern, '\n$1\n'); // 在 pre 前后添加空行 const preBeginTagPattern = /(^ * *)$/gm; text = text.replace(preEndTagPattern, '$1\n'); // 为 ``` 前后添加空行 const multiLineCodePattern = /^( *```\w* *)$/gm; text = text.replace(multiLineCodePattern, '\n$1\n'); // 把单行的 tr 拆成多行，以便翻译 const trTagPattern = /^( *)(]*>)(.*)(<\/tr>)$/gm; text = text.replace(trTagPattern, '\n$1$2\n\n$1 $3\n\n$1$4\n'); // 把单行的 th/td/li 等拆成多行，以便翻译， const oneLineThTdTagPattern = /^( *)<(th|td|li)\b([^>]*)>(.*?)<\/\2>$/gm; text = text.replace(oneLineThTdTagPattern, '\n$1<$2$3>\n\n$1 $4\n\n$1\n'); // 把原本就是多行的 th/td 中间添加空行 const thTdTagPattern = /^( *)<(th|td)\b( *[^>]*)>([\s\S]*?)<\/\2>$/gm; text = text.replace(thTdTagPattern, '\n\n$1<$2$3>\n\n$1 $4\n\n$1\n\n'); // 在所有的起始标签前面加空行 const blockBeginTagPattern = /^( *)<(code-example|code-tabs|pre|p)\b( *[^>]*)>( *)$/gm; text = text.replace(blockBeginTagPattern, '\n$1<$2$3>$4'); // 在所有的结束标签前面加空行 const blockEndTagPattern = /^( *)<\/(code-example|code-tabs|pre|p)>( *)$/gm; text = text.replace(blockEndTagPattern, '$1$3\n'); // 把所有由空格组成的空行都去掉 const blankLinePattern = /^[ \t]+$/gm; text = text.replace(blankLinePattern, ''); // 把中间的多个回车都变成两个回车 const multipleBlankLinePattern = /\n{2,}/g; text = text.replace(multipleBlankLinePattern, '\n\n'); // 去掉全文头尾的空白 text = text.trim(); return text; } export function indentOf(line): number { let pattern = /^( *)[\s\S]*/; if (!pattern.test(line)) { return 0; } const leadSpaces = line.replace(pattern, '$1').length; if (/^ *(\d+\.|-|\*) /.test(line)) { return leadSpaces + 3; } else { return leadSpaces; } } export function repeat(indent: number): string { let result = ''; for (let i = 0; i < indent; ++i) { result = result + ' '; } return result; } // 目前还不能正常工作 export function fuzzyTest(text1: string, text2: string): boolean { const tokens1 = tokenize(text1); const tokens2 = tokenize(text2); const sameTokens = _.intersection(tokens1, tokens2); const maxTokens = Math.max(tokens1.length, tokens2.length); return sameTokens.length > 5 && sameTokens.length / maxTokens >= 0.8; } export function exactlyTest(key: string, text: string): boolean { return !!key && key === text; } export function kernelText(text: string): string { return text .replace(/([^a-zA-Z0-9#:]|\s|\.$)/g, '') .toUpperCase() .trim(); } export function tokenize(text: string): string[] { return text.split(/\W/) .map(token => token.trim()) .filter(token => !!token); } export function hasInlineText(text: string): boolean { return /(.*?)<\/t> *.*?<\/t>/g.test(text); } export function extractOriginalContent(text: string): string { if (!hasInlineText(text)) { return text; } return text.replace(/(.*?)<\/t> *.*?<\/t>/gi, '$1') .replace(/ +/g, ' '); }