2017-01-26 09:03:53 -05:00
|
|
|
'use strict';
|
|
|
|
|
|
|
|
var fs = require('fs');
|
|
|
|
var path = require('canonical-path');
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @dgProcessor generateKeywordsProcessor
|
|
|
|
* @description
|
|
|
|
* This processor extracts all the keywords from each document and creates
|
|
|
|
* a new document that will be rendered as a JavaScript file containing all
|
|
|
|
* this data.
|
|
|
|
*/
|
|
|
|
module.exports = function generateKeywordsProcessor(log, readFilesProcessor) {
|
|
|
|
return {
|
|
|
|
ignoreWordsFile: undefined,
|
|
|
|
propertiesToIgnore: [],
|
|
|
|
docTypesToIgnore: [],
|
|
|
|
outputFolder: '',
|
|
|
|
$validate: {
|
|
|
|
ignoreWordsFile: {},
|
|
|
|
docTypesToIgnore: {},
|
|
|
|
propertiesToIgnore: {},
|
|
|
|
outputFolder: {presence: true}
|
|
|
|
},
|
2017-05-30 15:24:54 -04:00
|
|
|
$runAfter: ['postProcessHtml'],
|
|
|
|
$runBefore: ['writing-files'],
|
2017-01-26 09:03:53 -05:00
|
|
|
$process: function(docs) {
|
|
|
|
|
|
|
|
// Keywords to ignore
|
|
|
|
var wordsToIgnore = [];
|
|
|
|
var propertiesToIgnore;
|
|
|
|
var docTypesToIgnore;
|
|
|
|
|
|
|
|
// Load up the keywords to ignore, if specified in the config
|
|
|
|
if (this.ignoreWordsFile) {
|
|
|
|
var ignoreWordsPath = path.resolve(readFilesProcessor.basePath, this.ignoreWordsFile);
|
|
|
|
wordsToIgnore = fs.readFileSync(ignoreWordsPath, 'utf8').toString().split(/[,\s\n\r]+/gm);
|
|
|
|
|
|
|
|
log.debug('Loaded ignore words from "' + ignoreWordsPath + '"');
|
|
|
|
log.silly(wordsToIgnore);
|
|
|
|
}
|
|
|
|
|
|
|
|
propertiesToIgnore = convertToMap(this.propertiesToIgnore);
|
|
|
|
log.debug('Properties to ignore', propertiesToIgnore);
|
|
|
|
docTypesToIgnore = convertToMap(this.docTypesToIgnore);
|
|
|
|
log.debug('Doc types to ignore', docTypesToIgnore);
|
|
|
|
|
|
|
|
var ignoreWordsMap = convertToMap(wordsToIgnore);
|
|
|
|
|
2017-03-12 09:27:47 -04:00
|
|
|
const filteredDocs = docs
|
|
|
|
// We are not interested in some docTypes
|
|
|
|
.filter(function(doc) { return !docTypesToIgnore[doc.docType]; })
|
|
|
|
// Ignore internals and private exports (indicated by the ɵ prefix)
|
|
|
|
.filter(function(doc) { return !doc.internal && !doc.privateExport; });
|
2017-01-26 09:03:53 -05:00
|
|
|
|
|
|
|
|
2017-07-04 12:59:08 -04:00
|
|
|
filteredDocs.forEach(function(doc) {
|
2017-01-26 09:03:53 -05:00
|
|
|
|
|
|
|
var words = [];
|
|
|
|
var keywordMap = Object.assign({}, ignoreWordsMap);
|
|
|
|
var members = [];
|
2017-07-04 12:59:08 -04:00
|
|
|
var membersMap = Object.assign({}, ignoreWordsMap);
|
|
|
|
const headingWords = [];
|
|
|
|
const headingWordMap = Object.assign({}, ignoreWordsMap);
|
2017-01-26 09:03:53 -05:00
|
|
|
|
|
|
|
// Search each top level property of the document for search terms
|
|
|
|
Object.keys(doc).forEach(function(key) {
|
|
|
|
const value = doc[key];
|
|
|
|
|
|
|
|
if (isString(value) && !propertiesToIgnore[key]) {
|
|
|
|
extractWords(value, words, keywordMap);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2018-10-23 06:35:18 -04:00
|
|
|
extractMemberWords(doc, members, membersMap);
|
|
|
|
|
2017-07-04 12:59:08 -04:00
|
|
|
// Extract all the keywords from the headings
|
|
|
|
if (doc.vFile && doc.vFile.headings) {
|
|
|
|
Object.keys(doc.vFile.headings).forEach(function(headingTag) {
|
|
|
|
doc.vFile.headings[headingTag].forEach(function(headingText) {
|
|
|
|
extractWords(headingText, headingWords, headingWordMap);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
// Extract the title to use in searches
|
|
|
|
doc.searchTitle = doc.searchTitle || doc.title || doc.vFile && doc.vFile.title || doc.name || '';
|
2017-01-26 09:03:53 -05:00
|
|
|
|
2017-07-04 12:59:08 -04:00
|
|
|
// Attach all this search data to the document
|
2017-01-26 09:03:53 -05:00
|
|
|
doc.searchTerms = {
|
2018-09-14 09:08:48 -04:00
|
|
|
titleWords: tokenize(doc.searchTitle).join(' '),
|
2017-07-04 12:59:08 -04:00
|
|
|
headingWords: headingWords.sort().join(' '),
|
2017-01-26 09:03:53 -05:00
|
|
|
keywords: words.sort().join(' '),
|
|
|
|
members: members.sort().join(' ')
|
|
|
|
};
|
|
|
|
|
|
|
|
});
|
|
|
|
|
2017-07-04 12:59:08 -04:00
|
|
|
// Now process all the search data and collect it up to be used in creating a new document
|
|
|
|
var searchData = filteredDocs.map(function(page) {
|
|
|
|
// Copy the properties from the searchTerms object onto the search data object
|
|
|
|
return Object.assign({
|
|
|
|
path: page.path,
|
|
|
|
title: page.searchTitle,
|
2018-09-17 12:37:18 -04:00
|
|
|
type: page.docType,
|
|
|
|
deprecated: !!page.deprecated,
|
2017-07-04 12:59:08 -04:00
|
|
|
}, page.searchTerms);
|
|
|
|
});
|
2017-01-26 09:03:53 -05:00
|
|
|
|
|
|
|
docs.push({
|
|
|
|
docType: 'json-doc',
|
|
|
|
id: 'search-data-json',
|
|
|
|
path: this.outputFolder + '/search-data.json',
|
|
|
|
outputPath: this.outputFolder + '/search-data.json',
|
2017-05-30 15:24:54 -04:00
|
|
|
data: searchData,
|
|
|
|
renderedContent: JSON.stringify(searchData)
|
2017-01-26 09:03:53 -05:00
|
|
|
});
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
function isString(value) {
|
|
|
|
return typeof value == 'string';
|
|
|
|
}
|
|
|
|
|
|
|
|
function convertToMap(collection) {
|
|
|
|
const obj = {};
|
|
|
|
collection.forEach(key => { obj[key] = true; });
|
|
|
|
return obj;
|
2018-10-23 06:35:18 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// If the heading contains a name starting with ng, e.g. "ngController", then add the
|
|
|
|
// name without the ng to the text, e.g. "controller".
|
|
|
|
function tokenize(text) {
|
|
|
|
const rawTokens = text.split(/[\s\/]+/mg);
|
|
|
|
const tokens = [];
|
|
|
|
rawTokens.forEach(token => {
|
|
|
|
// Strip off unwanted trivial characters
|
|
|
|
token = token
|
|
|
|
.trim()
|
|
|
|
.replace(/^[_\-"'`({[<$*)}\]>.]+/, '')
|
|
|
|
.replace(/[_\-"'`({[<$*)}\]>.]+$/, '');
|
|
|
|
// Ignore tokens that contain weird characters
|
|
|
|
if (/^[\w.\-]+$/.test(token)) {
|
|
|
|
tokens.push(token.toLowerCase());
|
|
|
|
const ngTokenMatch = /^[nN]g([A-Z]\w*)/.exec(token);
|
|
|
|
if (ngTokenMatch) {
|
|
|
|
tokens.push(ngTokenMatch[1].toLowerCase());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
return tokens;
|
|
|
|
}
|
|
|
|
|
|
|
|
function extractWords(text, words, keywordMap) {
|
|
|
|
var tokens = tokenize(text);
|
|
|
|
tokens.forEach(function(token) {
|
|
|
|
if (!keywordMap[token]) {
|
|
|
|
words.push(token);
|
|
|
|
keywordMap[token] = true;
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function extractMemberWords(doc, members, membersMap) {
|
|
|
|
if (doc.members) {
|
|
|
|
doc.members.forEach(function(member) { extractWords(member.name, members, membersMap); });
|
|
|
|
}
|
|
|
|
if (doc.statics) {
|
|
|
|
doc.statics.forEach(function(member) { extractWords(member.name, members, membersMap); });
|
|
|
|
}
|
2017-01-26 09:03:53 -05:00
|
|
|
}
|