| 
									
										
										
										
											2018-03-03 08:13:52 +08:00
										 |  |  |  | import { DictEntry } from './dict-entry'; | 
					
						
							|  |  |  |  | import { isTranslation } from './extractor'; | 
					
						
							| 
									
										
										
										
											2018-03-15 16:40:11 +08:00
										 |  |  |  | import * as _ from 'lodash'; | 
					
						
							| 
									
										
										
										
											2018-03-03 08:13:52 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | export function translationHasNotCodeExample(entry: DictEntry): boolean { | 
					
						
							|  |  |  |  |   return entry.translation.indexOf('<code-example') === -1; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function originalIsNotChinese(entry: DictEntry): boolean { | 
					
						
							|  |  |  |  |   return !isTranslation(entry.original); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-07 14:19:47 +08:00
										 |  |  |  | export function originalIsNotSpecialDivTag(entry: DictEntry): boolean { | 
					
						
							|  |  |  |  |   return !/^<div class="\w+\b[^>\n]*>$/.test(entry.original); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function originalIsNotCodeExampleTag(entry: DictEntry): boolean { | 
					
						
							|  |  |  |  |   return !/^<\/?code-example\b[^>\n]*>$/.test(entry.original); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function originalIsNotPureCloseTag(entry: DictEntry): boolean { | 
					
						
							| 
									
										
										
										
											2018-03-07 15:48:58 +08:00
										 |  |  |  |   return !/^<\/(td|a|div|header|p|figure)>$/.test(entry.original); | 
					
						
							| 
									
										
										
										
											2018-03-03 11:45:03 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-07 09:08:44 +08:00
										 |  |  |  | export function isOnlyBeginTag(text: string) { | 
					
						
							| 
									
										
										
										
											2018-03-03 11:45:03 +08:00
										 |  |  |  |   return /^\s*<\w+>\s*$/.test(text); | 
					
						
							| 
									
										
										
										
											2018-03-03 08:13:52 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-07 09:08:44 +08:00
										 |  |  |  | export function originalIsNotOnlyBeginTag(entry: DictEntry): boolean { | 
					
						
							|  |  |  |  |   return !isOnlyBeginTag(entry.original); | 
					
						
							| 
									
										
										
										
											2018-03-03 08:13:52 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function isNotImg(entry: DictEntry): boolean { | 
					
						
							|  |  |  |  |   return !/^<(img|figure)/.test(entry.translation); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function isNotCheatSheet(entry: DictEntry): boolean { | 
					
						
							| 
									
										
										
										
											2018-07-27 16:34:19 +08:00
										 |  |  |  |   return !/cheatsheet.md$/.test(entry.sourceFile!!); | 
					
						
							| 
									
										
										
										
											2018-03-03 08:13:52 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function isNotMarketingDocs(entry: DictEntry): boolean { | 
					
						
							| 
									
										
										
										
											2018-07-27 16:34:19 +08:00
										 |  |  |  |   return !/marketing\/docs.md$/.test(entry.sourceFile!!); | 
					
						
							| 
									
										
										
										
											2018-03-03 08:13:52 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function isNotCnPages(entry: DictEntry): boolean { | 
					
						
							| 
									
										
										
										
											2018-07-27 16:34:19 +08:00
										 |  |  |  |   return !/cn\/.*?.md$/.test(entry.sourceFile!!); | 
					
						
							| 
									
										
										
										
											2018-03-03 08:13:52 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function isHead(line: string): boolean { | 
					
						
							|  |  |  |  |   return /^#/.test(line); | 
					
						
							|  |  |  |  | } | 
					
						
							| 
									
										
										
										
											2018-03-03 11:01:57 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | export function normalizeLines(text: string): string { | 
					
						
							| 
									
										
										
										
											2018-03-07 13:28:58 +08:00
										 |  |  |  |   // 原文中有 back to top 被分成两行的情况,这里把它标准化一下
 | 
					
						
							|  |  |  |  |   const specialBackToTopPattern = /<a href="#toc">Back to top\s+<\/a>/g; | 
					
						
							|  |  |  |  |   text = text.replace(specialBackToTopPattern, '<a href="#toc">Back to top</a>'); | 
					
						
							| 
									
										
										
										
											2018-03-24 16:33:17 +08:00
										 |  |  |  |   // 原文中有</table,为它补齐
 | 
					
						
							|  |  |  |  |   text = text.replace(/^ *<\/table$/gm, '</table>'); | 
					
						
							|  |  |  |  |   // 原文中有的换行会干扰生成 html 的格式,替换一下
 | 
					
						
							|  |  |  |  |   // tslint:disable:max-line-length
 | 
					
						
							|  |  |  |  |   text = text.replace(`<a href="https://docs.npmjs.com/getting-started/installing-node" target="_blank" title="Installing Node.js and updating npm">
 | 
					
						
							|  |  |  |  | Get them now</a> if they're not already installed on your machine. | 
					
						
							|  |  |  |  | `, `<a href="https://docs.npmjs.com/getting-started/installing-node" target="_blank" title="Installing Node.js and updating npm">Get them now</a> if they're not already installed on your machine. | 
					
						
							|  |  |  |  | `);
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |   // 为各种列表多加一个空行
 | 
					
						
							|  |  |  |  |   const listElementPattern = /(?=\n *(\d+\.|-|\*) )\n/g; | 
					
						
							|  |  |  |  |   text = text.replace(listElementPattern, '\n\n'); | 
					
						
							|  |  |  |  |   // 为标题增加空行
 | 
					
						
							|  |  |  |  |   const hxPattern = /^( *#+ .*)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(hxPattern, '\n$1\n'); | 
					
						
							|  |  |  |  |   // 把多行的 HTML 标题或 p 元素变成单行
 | 
					
						
							|  |  |  |  |   const hxMultilinePattern = /^( *)<(h\d|p|header)([^>]*)>\s*(.*)\s*<\/\2>$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(hxMultilinePattern, '\n$1<$2$3>$4</$2>\n'); | 
					
						
							|  |  |  |  |   // 为单行的成对标签前后添加空行
 | 
					
						
							|  |  |  |  |   const oneLinePairedTagPattern = /^( *)<(p|div|h\d+|code-example|section)\b([^>]*)>([^\n]*?)<\/\2>( *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(oneLinePairedTagPattern, '\n$1<$2$3>$4</$2>$5\n'); | 
					
						
							|  |  |  |  |   // 为单行的注释前后添加空行
 | 
					
						
							|  |  |  |  |   const oneLineCommentPattern = /^( *<!--[\s\S]*?--> *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(oneLineCommentPattern, '\n$1\n'); | 
					
						
							|  |  |  |  |   // 为单行的 back to top 前后添加空行
 | 
					
						
							|  |  |  |  |   const backToTopPattern = /^( *<a href="#toc">Back to top<\/a> *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(backToTopPattern, '\n$1\n'); | 
					
						
							|  |  |  |  |   // 为单行的 {@ 语句前后添加空行
 | 
					
						
							|  |  |  |  |   const atTagCommentPattern = /^( *{@a.*} *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(atTagCommentPattern, '\n$1\n'); | 
					
						
							|  |  |  |  |   // 为单行的自封闭标签前后添加空行
 | 
					
						
							|  |  |  |  |   const oneLineClosedTagPattern = /^( *<hr *\/?>) *$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(oneLineClosedTagPattern, '\n$1\n'); | 
					
						
							|  |  |  |  |   // 为单行的 <br class="clear"> 前后添加空行
 | 
					
						
							|  |  |  |  |   const oneLineBrTagPattern = /^( *<br class="clear"> *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(oneLineBrTagPattern, '\n$1\n'); | 
					
						
							|  |  |  |  |   // 为单独的 div 前后添加空行
 | 
					
						
							|  |  |  |  |   const oneLineDivTagPattern = /^( *<\/?(div|li|ul|ol)\b([^>]*)> *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(oneLineDivTagPattern, '\n$1\n'); | 
					
						
							|  |  |  |  |   // 在 pre 前后添加空行
 | 
					
						
							|  |  |  |  |   const preBeginTagPattern = /(^ *<pre)/gm; | 
					
						
							|  |  |  |  |   text = text.replace(preBeginTagPattern, '\n$1'); | 
					
						
							|  |  |  |  |   const preEndTagPattern = /(<\/pre> *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(preEndTagPattern, '$1\n'); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |   // 为 ``` 前后添加空行
 | 
					
						
							|  |  |  |  |   const multiLineCodePattern = /^( *```\w* *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(multiLineCodePattern, '\n$1\n'); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |   // 把单行的 tr 拆成多行,以便翻译
 | 
					
						
							|  |  |  |  |   const trTagPattern = /^( *)(<tr\b[^>]*>)(.*)(<\/tr>)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(trTagPattern, '\n$1$2\n\n$1    $3\n\n$1$4\n'); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |   // 把单行的 th/td/li 等拆成多行,以便翻译,
 | 
					
						
							|  |  |  |  |   const oneLineThTdTagPattern = /^( *)<(th|td|li)\b([^>]*)>(.*?)<\/\2>$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(oneLineThTdTagPattern, '\n$1<$2$3>\n\n$1    $4\n\n$1</$2>\n'); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |   // 把原本就是多行的 th/td 中间添加空行
 | 
					
						
							|  |  |  |  |   const thTdTagPattern = /^( *)<(th|td)\b( *[^>]*)>([\s\S]*?)<\/\2>$/gm; | 
					
						
							| 
									
										
										
										
											2018-03-07 13:09:36 +08:00
										 |  |  |  |   text = text.replace(thTdTagPattern, '\n\n$1<$2$3>\n\n$1    $4\n\n$1</$2>\n\n'); | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-24 16:33:17 +08:00
										 |  |  |  |   // 在所有的起始标签前面加空行
 | 
					
						
							|  |  |  |  |   const blockBeginTagPattern = /^( *)<(code-example|code-tabs|pre|p)\b( *[^>]*)>( *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(blockBeginTagPattern, '\n$1<$2$3>$4'); | 
					
						
							| 
									
										
										
										
											2018-03-07 10:43:12 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-24 16:33:17 +08:00
										 |  |  |  |   // 在所有的结束标签前面加空行
 | 
					
						
							|  |  |  |  |   const blockEndTagPattern = /^( *)<\/(code-example|code-tabs|pre|p)>( *)$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(blockEndTagPattern, '$1</$2>$3\n'); | 
					
						
							| 
									
										
										
										
											2018-03-06 17:32:21 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-24 16:33:17 +08:00
										 |  |  |  |   // 把所有由空格组成的空行都去掉
 | 
					
						
							|  |  |  |  |   const blankLinePattern = /^[ \t]+$/gm; | 
					
						
							|  |  |  |  |   text = text.replace(blankLinePattern, ''); | 
					
						
							|  |  |  |  |   // 把中间的多个回车都变成两个回车
 | 
					
						
							|  |  |  |  |   const multipleBlankLinePattern = /\n{2,}/g; | 
					
						
							| 
									
										
										
										
											2018-03-06 17:08:10 +08:00
										 |  |  |  |   text = text.replace(multipleBlankLinePattern, '\n\n'); | 
					
						
							| 
									
										
										
										
											2018-03-07 10:43:12 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-24 16:33:17 +08:00
										 |  |  |  |   // 去掉全文头尾的空白
 | 
					
						
							|  |  |  |  |   text = text.trim(); | 
					
						
							| 
									
										
										
										
											2018-03-07 10:43:12 +08:00
										 |  |  |  |   return text; | 
					
						
							| 
									
										
										
										
											2018-03-03 11:01:57 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function indentOf(line): number { | 
					
						
							|  |  |  |  |   let pattern = /^( *)[\s\S]*/; | 
					
						
							|  |  |  |  |   if (!pattern.test(line)) { | 
					
						
							|  |  |  |  |     return 0; | 
					
						
							|  |  |  |  |   } | 
					
						
							|  |  |  |  |   const leadSpaces = line.replace(pattern, '$1').length; | 
					
						
							|  |  |  |  |   if (/^ *(\d+\.|-|\*) /.test(line)) { | 
					
						
							|  |  |  |  |     return leadSpaces + 3; | 
					
						
							|  |  |  |  |   } else { | 
					
						
							|  |  |  |  |     return leadSpaces; | 
					
						
							|  |  |  |  |   } | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function repeat(indent: number): string { | 
					
						
							|  |  |  |  |   let result = ''; | 
					
						
							|  |  |  |  |   for (let i = 0; i < indent; ++i) { | 
					
						
							|  |  |  |  |     result = result + ' '; | 
					
						
							|  |  |  |  |   } | 
					
						
							|  |  |  |  |   return result; | 
					
						
							|  |  |  |  | } | 
					
						
							| 
									
										
										
										
											2018-03-15 16:40:11 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | // 目前还不能正常工作
 | 
					
						
							|  |  |  |  | export function fuzzyTest(text1: string, text2: string): boolean { | 
					
						
							|  |  |  |  |   const tokens1 = tokenize(text1); | 
					
						
							|  |  |  |  |   const tokens2 = tokenize(text2); | 
					
						
							|  |  |  |  |   const sameTokens = _.intersection(tokens1, tokens2); | 
					
						
							|  |  |  |  |   const maxTokens = Math.max(tokens1.length, tokens2.length); | 
					
						
							|  |  |  |  |   return sameTokens.length > 5 && sameTokens.length / maxTokens >= 0.8; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-24 16:33:17 +08:00
										 |  |  |  | export function exactlyTest(key: string, text: string): boolean { | 
					
						
							| 
									
										
										
										
											2018-07-27 16:34:19 +08:00
										 |  |  |  |   return !!key && key === text; | 
					
						
							| 
									
										
										
										
											2018-03-15 16:40:11 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function kernelText(text: string): string { | 
					
						
							|  |  |  |  |   return text | 
					
						
							| 
									
										
										
										
											2018-03-24 16:33:17 +08:00
										 |  |  |  |     .replace(/([^a-zA-Z0-9#:]|\s|\.$)/g, '') | 
					
						
							| 
									
										
										
										
											2018-03-15 16:40:11 +08:00
										 |  |  |  |     .toUpperCase() | 
					
						
							|  |  |  |  |     .trim(); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function tokenize(text: string): string[] { | 
					
						
							|  |  |  |  |   return text.split(/\W/) | 
					
						
							|  |  |  |  |     .map(token => token.trim()) | 
					
						
							|  |  |  |  |     .filter(token => !!token); | 
					
						
							|  |  |  |  | } | 
					
						
							| 
									
										
										
										
											2018-03-24 16:33:17 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | export function hasInlineText(text: string): boolean { | 
					
						
							|  |  |  |  |   return /<t>(.*?)<\/t> *<t>.*?<\/t>/g.test(text); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | export function extractOriginalContent(text: string): string { | 
					
						
							|  |  |  |  |   if (!hasInlineText(text)) { | 
					
						
							|  |  |  |  |     return text; | 
					
						
							|  |  |  |  |   } | 
					
						
							|  |  |  |  |   return text.replace(/<t>(.*?)<\/t> *<t>.*?<\/t>/gi, '$1') | 
					
						
							|  |  |  |  |     .replace(/ +/g, ' '); | 
					
						
							|  |  |  |  | } |