2017-01-26 09:03:53 -05:00
|
|
|
'use strict';
|
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
const stem = require('stemmer');
|
2017-01-26 09:03:53 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @dgProcessor generateKeywordsProcessor
|
|
|
|
* @description
|
|
|
|
* This processor extracts all the keywords from each document and creates
|
|
|
|
* a new document that will be rendered as a JavaScript file containing all
|
|
|
|
* this data.
|
|
|
|
*/
|
2021-03-28 15:34:09 -04:00
|
|
|
module.exports = function generateKeywordsProcessor(log) {
|
2017-01-26 09:03:53 -05:00
|
|
|
return {
|
2021-03-28 15:34:09 -04:00
|
|
|
ignoreWords: [],
|
2017-01-26 09:03:53 -05:00
|
|
|
propertiesToIgnore: [],
|
|
|
|
docTypesToIgnore: [],
|
|
|
|
outputFolder: '',
|
|
|
|
$validate: {
|
2021-03-28 15:34:09 -04:00
|
|
|
ignoreWords: {},
|
2017-01-26 09:03:53 -05:00
|
|
|
docTypesToIgnore: {},
|
|
|
|
propertiesToIgnore: {},
|
|
|
|
outputFolder: {presence: true}
|
|
|
|
},
|
2017-05-30 15:24:54 -04:00
|
|
|
$runAfter: ['postProcessHtml'],
|
|
|
|
$runBefore: ['writing-files'],
|
2021-03-28 15:34:09 -04:00
|
|
|
$process(docs) {
|
2017-01-26 09:03:53 -05:00
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
const dictionary = new Map();
|
2017-01-26 09:03:53 -05:00
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
// Keywords to ignore
|
|
|
|
const ignoreWords = new Set(this.ignoreWords);
|
|
|
|
log.debug('Words to ignore', ignoreWords);
|
|
|
|
const propertiesToIgnore = new Set(this.propertiesToIgnore);
|
2017-01-26 09:03:53 -05:00
|
|
|
log.debug('Properties to ignore', propertiesToIgnore);
|
2021-03-28 15:34:09 -04:00
|
|
|
const docTypesToIgnore = new Set(this.docTypesToIgnore);
|
2017-01-26 09:03:53 -05:00
|
|
|
log.debug('Doc types to ignore', docTypesToIgnore);
|
|
|
|
|
|
|
|
|
2017-03-12 09:27:47 -04:00
|
|
|
const filteredDocs = docs
|
|
|
|
// We are not interested in some docTypes
|
2021-03-28 15:34:09 -04:00
|
|
|
.filter(doc => !docTypesToIgnore.has(doc.docType))
|
2017-03-12 09:27:47 -04:00
|
|
|
// Ignore internals and private exports (indicated by the ɵ prefix)
|
2021-03-28 15:34:09 -04:00
|
|
|
.filter(doc => !doc.internal && !doc.privateExport);
|
2017-01-26 09:03:53 -05:00
|
|
|
|
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
for(const doc of filteredDocs) {
|
2017-01-26 09:03:53 -05:00
|
|
|
// Search each top level property of the document for search terms
|
2021-03-28 15:34:09 -04:00
|
|
|
let mainTokens = [];
|
|
|
|
for(const key of Object.keys(doc)) {
|
2017-01-26 09:03:53 -05:00
|
|
|
const value = doc[key];
|
2021-03-28 15:34:09 -04:00
|
|
|
if (isString(value) && !propertiesToIgnore.has(key)) {
|
|
|
|
mainTokens.push(...tokenize(value, ignoreWords, dictionary));
|
2017-01-26 09:03:53 -05:00
|
|
|
}
|
2021-03-28 15:34:09 -04:00
|
|
|
}
|
2017-01-26 09:03:53 -05:00
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
const memberTokens = extractMemberTokens(doc, ignoreWords, dictionary);
|
2018-10-23 06:35:18 -04:00
|
|
|
|
2017-07-04 12:59:08 -04:00
|
|
|
// Extract all the keywords from the headings
|
2021-03-28 15:34:09 -04:00
|
|
|
let headingTokens = [];
|
2017-07-04 12:59:08 -04:00
|
|
|
if (doc.vFile && doc.vFile.headings) {
|
2021-03-28 15:34:09 -04:00
|
|
|
for(const headingTag of Object.keys(doc.vFile.headings)) {
|
|
|
|
for(const headingText of doc.vFile.headings[headingTag]) {
|
|
|
|
headingTokens.push(...tokenize(headingText, ignoreWords, dictionary));
|
|
|
|
}
|
|
|
|
}
|
2017-07-04 12:59:08 -04:00
|
|
|
}
|
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
|
2017-07-04 12:59:08 -04:00
|
|
|
// Extract the title to use in searches
|
|
|
|
doc.searchTitle = doc.searchTitle || doc.title || doc.vFile && doc.vFile.title || doc.name || '';
|
2017-01-26 09:03:53 -05:00
|
|
|
|
2017-07-04 12:59:08 -04:00
|
|
|
// Attach all this search data to the document
|
2021-03-28 15:34:09 -04:00
|
|
|
doc.searchTerms = {};
|
|
|
|
if (headingTokens.length > 0) {
|
|
|
|
doc.searchTerms.headings = headingTokens;
|
|
|
|
}
|
|
|
|
if (mainTokens.length > 0) {
|
|
|
|
doc.searchTerms.keywords = mainTokens;
|
|
|
|
}
|
|
|
|
if (memberTokens.length > 0) {
|
|
|
|
doc.searchTerms.members = memberTokens;
|
|
|
|
}
|
|
|
|
if (doc.searchKeywords) {
|
|
|
|
doc.searchTerms.topics = doc.searchKeywords.trim();
|
|
|
|
}
|
|
|
|
}
|
2017-01-26 09:03:53 -05:00
|
|
|
|
2017-07-04 12:59:08 -04:00
|
|
|
// Now process all the search data and collect it up to be used in creating a new document
|
2021-03-28 15:34:09 -04:00
|
|
|
const searchData = {
|
|
|
|
dictionary: Array.from(dictionary.keys()),
|
|
|
|
pages: filteredDocs.map(page => {
|
|
|
|
// Copy the properties from the searchTerms object onto the search data object
|
|
|
|
const searchObj = {
|
|
|
|
path: page.path,
|
|
|
|
title: page.searchTitle,
|
|
|
|
type: page.docType,
|
|
|
|
};
|
|
|
|
if (page.deprecated) {
|
|
|
|
searchObj.deprecated = true;
|
|
|
|
}
|
|
|
|
return Object.assign(searchObj, page.searchTerms);
|
|
|
|
}),
|
|
|
|
};
|
2017-01-26 09:03:53 -05:00
|
|
|
|
|
|
|
docs.push({
|
|
|
|
docType: 'json-doc',
|
|
|
|
id: 'search-data-json',
|
|
|
|
path: this.outputFolder + '/search-data.json',
|
|
|
|
outputPath: this.outputFolder + '/search-data.json',
|
2017-05-30 15:24:54 -04:00
|
|
|
data: searchData,
|
|
|
|
renderedContent: JSON.stringify(searchData)
|
2017-01-26 09:03:53 -05:00
|
|
|
});
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
function isString(value) {
|
|
|
|
return typeof value == 'string';
|
|
|
|
}
|
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
function tokenize(text, ignoreWords, dictionary) {
|
|
|
|
// Split on whitespace and things that are likely to be HTML tags (this is not exhaustive but reduces the unwanted tokens that are indexed).
|
2021-04-05 12:05:54 -04:00
|
|
|
const rawTokens = text.split(/[\s/]+|<\/?[a-z]+(?:\s+\w+(?:="[^"]+")?)*>/img);
|
2018-10-23 06:35:18 -04:00
|
|
|
const tokens = [];
|
2021-03-28 15:34:09 -04:00
|
|
|
for(let token of rawTokens) {
|
|
|
|
token = token.trim();
|
|
|
|
|
2018-10-23 06:35:18 -04:00
|
|
|
// Strip off unwanted trivial characters
|
2021-03-28 15:34:09 -04:00
|
|
|
token = token.replace(/^[_\-"'`({[<$*)}\]>.]+/, '').replace(/[_\-"'`({[<$*)}\]>.]+$/, '');
|
|
|
|
|
|
|
|
// Skip if in the ignored words list
|
|
|
|
if (ignoreWords.has(token.toLowerCase())) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip tokens that contain weird characters
|
|
|
|
if (!/^[\w._-]+$/.test(token)) {
|
|
|
|
continue;
|
2018-10-23 06:35:18 -04:00
|
|
|
}
|
2021-03-28 15:34:09 -04:00
|
|
|
|
|
|
|
storeToken(token, tokens, dictionary);
|
|
|
|
if (token.startsWith('ng')) {
|
|
|
|
storeToken(token.substr(2), tokens, dictionary);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-23 06:35:18 -04:00
|
|
|
return tokens;
|
|
|
|
}
|
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
function storeToken(token, tokens, dictionary) {
|
|
|
|
token = stem(token);
|
|
|
|
if (!dictionary.has(token)) {
|
|
|
|
dictionary.set(token, dictionary.size);
|
|
|
|
}
|
|
|
|
tokens.push(dictionary.get(token));
|
2018-10-23 06:35:18 -04:00
|
|
|
}
|
|
|
|
|
2021-03-28 15:34:09 -04:00
|
|
|
function extractMemberTokens(doc, ignoreWords, dictionary) {
|
|
|
|
if (!doc) return '';
|
|
|
|
|
|
|
|
let memberContent = [];
|
2018-10-23 06:48:49 -04:00
|
|
|
|
2018-10-23 06:35:18 -04:00
|
|
|
if (doc.members) {
|
2021-03-28 15:34:09 -04:00
|
|
|
doc.members.forEach(member => memberContent.push(...tokenize(member.name, ignoreWords, dictionary)));
|
2018-10-23 06:35:18 -04:00
|
|
|
}
|
|
|
|
if (doc.statics) {
|
2021-03-28 15:34:09 -04:00
|
|
|
doc.statics.forEach(member => memberContent.push(...tokenize(member.name, ignoreWords, dictionary)));
|
2018-10-23 06:48:49 -04:00
|
|
|
}
|
|
|
|
if (doc.extendsClauses) {
|
2021-03-28 15:34:09 -04:00
|
|
|
doc.extendsClauses.forEach(clause => memberContent.push(...extractMemberTokens(clause.doc, ignoreWords, dictionary)));
|
2018-10-23 06:48:49 -04:00
|
|
|
}
|
|
|
|
if (doc.implementsClauses) {
|
2021-03-28 15:34:09 -04:00
|
|
|
doc.implementsClauses.forEach(clause => memberContent.push(...extractMemberTokens(clause.doc, ignoreWords, dictionary)));
|
2018-10-23 06:35:18 -04:00
|
|
|
}
|
2021-03-28 15:34:09 -04:00
|
|
|
|
|
|
|
return memberContent;
|
|
|
|
}
|