angular-cn/aio/tools/transforms/angular-base-package/processors/generateKeywords.js

'use strict';

var fs = require('fs');
var path = require('canonical-path');

/**
 * @dgProcessor generateKeywordsProcessor
 * @description
 * This processor extracts all the keywords from each document and creates
 * a new document that will be rendered as a JavaScript file containing all
 * this data.
 */
module.exports = function generateKeywordsProcessor(log, readFilesProcessor) {
  return {
    ignoreWordsFile: undefined,
    propertiesToIgnore: [],
    docTypesToIgnore: [],
    outputFolder: '',
    $validate: {
      ignoreWordsFile: {},
      docTypesToIgnore: {},
      propertiesToIgnore: {},
      outputFolder: {presence: true}
    },
    $runAfter: ['postProcessHtml'],
    $runBefore: ['writing-files'],
    $process: function(docs) {

      // Keywords to ignore
      var wordsToIgnore = [];
      var propertiesToIgnore;
      var docTypesToIgnore;

      // Load up the keywords to ignore, if specified in the config
      if (this.ignoreWordsFile) {
        var ignoreWordsPath = path.resolve(readFilesProcessor.basePath, this.ignoreWordsFile);
        wordsToIgnore = fs.readFileSync(ignoreWordsPath, 'utf8').toString().split(/[,\s\n\r]+/gm);

        log.debug('Loaded ignore words from "' + ignoreWordsPath + '"');
        log.silly(wordsToIgnore);
      }

      propertiesToIgnore = convertToMap(this.propertiesToIgnore);
      log.debug('Properties to ignore', propertiesToIgnore);
      docTypesToIgnore = convertToMap(this.docTypesToIgnore);
      log.debug('Doc types to ignore', docTypesToIgnore);

      var ignoreWordsMap = convertToMap(wordsToIgnore);

      // If the heading contains a name starting with ng, e.g. "ngController", then add the
      // name without the ng to the text, e.g. "controller".
      function tokenize(text) {
        const rawTokens = text.split(/[\s\/]+/mg);
        const tokens = [];
        rawTokens.forEach(token => {
          // Strip off unwanted trivial characters
          token = token
              .trim()
              .replace(/^[_\-"'`({[<$*)}\]>.]+/, '')
              .replace(/[_\-"'`({[<$*)}\]>.]+$/, '');
          // Ignore tokens that contain weird characters
          if (/^[\w.\-]+$/.test(token)) {
            tokens.push(token.toLowerCase());
            const ngTokenMatch = /^[nN]g([A-Z]\w*)/.exec(token);
            if (ngTokenMatch) {
              tokens.push(ngTokenMatch[1].toLowerCase());
            }
          }
        });
        return tokens;
      }

      function extractWords(text, words, keywordMap) {
        var tokens = tokenize(text);
        tokens.forEach(function(token) {
          if (!keywordMap[token]) {
            words.push(token);
            keywordMap[token] = true;
          }
        });
      }


      const filteredDocs = docs
          // We are not interested in some docTypes
          .filter(function(doc) { return !docTypesToIgnore[doc.docType]; })
          // Ignore internals and private exports (indicated by the ɵ prefix)
          .filter(function(doc) { return !doc.internal && !doc.privateExport; });


      filteredDocs.forEach(function(doc) {

        var words = [];
        var keywordMap = Object.assign({}, ignoreWordsMap);
        var members = [];
        var membersMap = Object.assign({}, ignoreWordsMap);
        const headingWords = [];
        const headingWordMap = Object.assign({}, ignoreWordsMap);

        // Search each top level property of the document for search terms
        Object.keys(doc).forEach(function(key) {
          const value = doc[key];

          if (isString(value) && !propertiesToIgnore[key]) {
            extractWords(value, words, keywordMap);
          }

          // Special case properties that contain content relating to "members"
          // of a doc that represents, say, a class or interface
          if (key === 'members' || key === 'statics') {
            value.forEach(function(member) { extractWords(member.name, members, membersMap); });
          }
        });

        // Extract all the keywords from the headings
        if (doc.vFile && doc.vFile.headings) {
          Object.keys(doc.vFile.headings).forEach(function(headingTag) {
            doc.vFile.headings[headingTag].forEach(function(headingText) {
              extractWords(headingText, headingWords, headingWordMap);
            });
          });
        }

        // Extract the title to use in searches
        doc.searchTitle = doc.searchTitle || doc.title || doc.vFile && doc.vFile.title || doc.name || '';

        // Attach all this search data to the document
        doc.searchTerms = {
          titleWords: tokenize(doc.searchTitle).join(' '),
          headingWords: headingWords.sort().join(' '),
          keywords: words.sort().join(' '),
          members: members.sort().join(' ')
        };

      });

      // Now process all the search data and collect it up to be used in creating a new document
      var searchData = filteredDocs.map(function(page) {
        // Copy the properties from the searchTerms object onto the search data object
        return Object.assign({
          path: page.path,
          title: page.searchTitle,
          type: page.docType,
          deprecated: !!page.deprecated,
        }, page.searchTerms);
      });

      docs.push({
        docType: 'json-doc',
        id: 'search-data-json',
        path: this.outputFolder + '/search-data.json',
        outputPath: this.outputFolder + '/search-data.json',
        data: searchData,
        renderedContent: JSON.stringify(searchData)
      });
    }
  };
};


function isString(value) {
  return typeof value == 'string';
}

function convertToMap(collection) {
  const obj = {};
  collection.forEach(key => { obj[key] = true; });
  return obj;
}
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`'use strict';`

			`var fs = require('fs');`
			`var path = require('canonical-path');`

			`/**`
			`* @dgProcessor generateKeywordsProcessor`
			`* @description`
			`* This processor extracts all the keywords from each document and creates`
			`* a new document that will be rendered as a JavaScript file containing all`
			`* this data.`
			`*/`
			`module.exports = function generateKeywordsProcessor(log, readFilesProcessor) {`
			`return {`
			`ignoreWordsFile: undefined,`
			`propertiesToIgnore: [],`
			`docTypesToIgnore: [],`
			`outputFolder: '',`
			`$validate: {`
			`ignoreWordsFile: {},`
			`docTypesToIgnore: {},`
			`propertiesToIgnore: {},`
			`outputFolder: {presence: true}`
			`},`
build(aio): the the captured h1 as the title for the search index If there is no title already provided, use the one captured from the renderedContent. 2017-05-30 15:24:54 -04:00			`$runAfter: ['postProcessHtml'],`
			`$runBefore: ['writing-files'],`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`$process: function(docs) {`

			`// Keywords to ignore`
			`var wordsToIgnore = [];`
			`var propertiesToIgnore;`
			`var docTypesToIgnore;`

			`// Load up the keywords to ignore, if specified in the config`
			`if (this.ignoreWordsFile) {`
			`var ignoreWordsPath = path.resolve(readFilesProcessor.basePath, this.ignoreWordsFile);`
			`wordsToIgnore = fs.readFileSync(ignoreWordsPath, 'utf8').toString().split(/[,\s\n\r]+/gm);`

			`log.debug('Loaded ignore words from "' + ignoreWordsPath + '"');`
			`log.silly(wordsToIgnore);`
			`}`

			`propertiesToIgnore = convertToMap(this.propertiesToIgnore);`
			`log.debug('Properties to ignore', propertiesToIgnore);`
			`docTypesToIgnore = convertToMap(this.docTypesToIgnore);`
			`log.debug('Doc types to ignore', docTypesToIgnore);`

			`var ignoreWordsMap = convertToMap(wordsToIgnore);`

build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`// If the heading contains a name starting with ng, e.g. "ngController", then add the`
			`// name without the ng to the text, e.g. "controller".`
build(docs-infra): improve search quality (#25750) PR Close #25750 2018-09-14 09:08:48 -04:00			`function tokenize(text) {`
			`const rawTokens = text.split(/[\s\/]+/mg);`
			`const tokens = [];`
			`rawTokens.forEach(token => {`
			`// Strip off unwanted trivial characters`
			`token = token`
			`.trim()`
			.replace(/^[_\-"'`({[<$*)}\]>.]+/, '')
			.replace(/[_\-"'`({[<$*)}\]>.]+$/, '');
			`// Ignore tokens that contain weird characters`
			`if (/^[\w.\-]+$/.test(token)) {`
			`tokens.push(token.toLowerCase());`
			`const ngTokenMatch = /^[nN]g([A-Z]\w*)/.exec(token);`
			`if (ngTokenMatch) {`
			`tokens.push(ngTokenMatch[1].toLowerCase());`
			`}`
			`}`
			`});`
			`return tokens;`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`}`

			`function extractWords(text, words, keywordMap) {`
build(docs-infra): improve search quality (#25750) PR Close #25750 2018-09-14 09:08:48 -04:00			`var tokens = tokenize(text);`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`tokens.forEach(function(token) {`
build(docs-infra): improve search quality (#25750) PR Close #25750 2018-09-14 09:08:48 -04:00			`if (!keywordMap[token]) {`
			`words.push(token);`
			`keywordMap[token] = true;`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`}`
			`});`
			`}`


fix(aio): ignore private exports Closes #14992 2017-03-12 09:27:47 -04:00			`const filteredDocs = docs`
			`// We are not interested in some docTypes`
			`.filter(function(doc) { return !docTypesToIgnore[doc.docType]; })`
			`// Ignore internals and private exports (indicated by the ɵ prefix)`
			`.filter(function(doc) { return !doc.internal && !doc.privateExport; });`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00

build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`filteredDocs.forEach(function(doc) {`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00
			`var words = [];`
			`var keywordMap = Object.assign({}, ignoreWordsMap);`
			`var members = [];`
build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`var membersMap = Object.assign({}, ignoreWordsMap);`
			`const headingWords = [];`
			`const headingWordMap = Object.assign({}, ignoreWordsMap);`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00
			`// Search each top level property of the document for search terms`
			`Object.keys(doc).forEach(function(key) {`
			`const value = doc[key];`

			`if (isString(value) && !propertiesToIgnore[key]) {`
			`extractWords(value, words, keywordMap);`
			`}`

build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`// Special case properties that contain content relating to "members"`
			`// of a doc that represents, say, a class or interface`
build(aio): add API static members to search index (#21988) Previously searching for `compose` did not include `Validators` in the search results because we were not including all the `static` members of API docs in the index. PR Close #21988 2018-02-02 08:02:18 -05:00			`if (key === 'members' \|\| key === 'statics') {`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`value.forEach(function(member) { extractWords(member.name, members, membersMap); });`
			`}`
			`});`

build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`// Extract all the keywords from the headings`
			`if (doc.vFile && doc.vFile.headings) {`
			`Object.keys(doc.vFile.headings).forEach(function(headingTag) {`
			`doc.vFile.headings[headingTag].forEach(function(headingText) {`
			`extractWords(headingText, headingWords, headingWordMap);`
			`});`
			`});`
			`}`

			`// Extract the title to use in searches`
			`doc.searchTitle = doc.searchTitle \|\| doc.title \|\| doc.vFile && doc.vFile.title \|\| doc.name \|\| '';`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00
build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`// Attach all this search data to the document`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`doc.searchTerms = {`
build(docs-infra): improve search quality (#25750) PR Close #25750 2018-09-14 09:08:48 -04:00			`titleWords: tokenize(doc.searchTitle).join(' '),`
build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`headingWords: headingWords.sort().join(' '),`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`keywords: words.sort().join(' '),`
			`members: members.sort().join(' ')`
			`};`

			`});`

build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`// Now process all the search data and collect it up to be used in creating a new document`
			`var searchData = filteredDocs.map(function(page) {`
			`// Copy the properties from the searchTerms object onto the search data object`
			`return Object.assign({`
			`path: page.path,`
			`title: page.searchTitle,`
build(docs-infra): expose deprecated status on items more clearly (#25750) PR Close #25750 2018-09-17 12:37:18 -04:00			`type: page.docType,`
			`deprecated: !!page.deprecated,`
build(aio): add terms from heading to the search index 2017-07-04 12:59:08 -04:00			`}, page.searchTerms);`
			`});`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00
			`docs.push({`
			`docType: 'json-doc',`
			`id: 'search-data-json',`
			`path: this.outputFolder + '/search-data.json',`
			`outputPath: this.outputFolder + '/search-data.json',`
build(aio): the the captured h1 as the title for the search index If there is no title already provided, use the one captured from the renderedContent. 2017-05-30 15:24:54 -04:00			`data: searchData,`
			`renderedContent: JSON.stringify(searchData)`
build(aio): move doc-gen stuff from angular.io (#14097) 2017-01-26 09:03:53 -05:00			`});`
			`}`
			`};`
			`};`


			`function isString(value) {`
			`return typeof value == 'string';`
			`}`

			`function convertToMap(collection) {`
			`const obj = {};`
			`collection.forEach(key => { obj[key] = true; });`
			`return obj;`
			`}`