diff --git a/aio/package.json b/aio/package.json
index 3922a8354a..2157b78ab4 100644
--- a/aio/package.json
+++ b/aio/package.json
@@ -116,6 +116,7 @@
"@types/jasmine": "~3.6.0",
"@types/lunr": "^2.3.2",
"@types/node": "^12.7.9",
+ "@types/stemmer": "^1.0.2",
"@types/xregexp": "^3.0.30",
"@yarnpkg/lockfile": "^1.1.0",
"archiver": "^1.3.0",
@@ -166,6 +167,7 @@
"rimraf": "^2.6.1",
"semver": "^5.3.0",
"shelljs": "^0.8.4",
+ "stemmer": "^1.0.5",
"timezone-mock": "^1.1.3",
"tree-kill": "^1.1.0",
"ts-node": "^8.4.1",
diff --git a/aio/src/app/search/search.worker.ts b/aio/src/app/search/search.worker.ts
index 5c15cd1c6b..d31a5ce935 100644
--- a/aio/src/app/search/search.worker.ts
+++ b/aio/src/app/search/search.worker.ts
@@ -1,10 +1,11 @@
-import { WebWorkerMessage } from '../shared/web-worker-message';
import * as lunr from 'lunr';
+import {WebWorkerMessage} from '../shared/web-worker-message';
const SEARCH_TERMS_URL = '/generated/docs/app/search-data.json';
let index: lunr.Index;
-const pages: SearchInfo = {};
+const pageMap: SearchInfo = {};
interface SearchInfo {
[key: string]: PageInfo;
@@ -13,8 +14,25 @@ interface SearchInfo {
interface PageInfo {
path: string;
type: string;
- titleWords: string;
- keyWords: string;
+ title: string;
+ headings: string;
+ keywords: string;
+ members: string;
+ topics: string;
+interface EncodedPages {
+ dictionary: string[];
+ pages: EncodedPage[];
+interface EncodedPage {
+ path: string;
+ type: string;
+ title: string;
+ headings: number[];
+ keywords: number[];
+ members: number[];
topics: string;
@@ -24,42 +42,42 @@ addEventListener('message', handleMessage);
// the path and search terms for a page
function createIndex(loadIndexFn: IndexLoader): lunr.Index {
// The lunr typings are missing QueryLexer so we have to add them here manually.
- const queryLexer = (lunr as any as { QueryLexer: { termSeparator: RegExp } }).QueryLexer;
+ const queryLexer = (lunr as any as {QueryLexer: {termSeparator: RegExp}}).QueryLexer;
queryLexer.termSeparator = lunr.tokenizer.separator = /\s+/;
return lunr(function() {
+ this.pipeline.remove(lunr.stemmer);
- this.field('topics', { boost: 15 });
- this.field('titleWords', { boost: 10 });
- this.field('headingWords', { boost: 5 });
- this.field('members', { boost: 4 });
- this.field('keywords', { boost: 2 });
+ this.field('topics', {boost: 15});
+ this.field('title', {boost: 10});
+ this.field('headings', {boost: 5});
+ this.field('members', {boost: 4});
+ this.field('keywords', {boost: 2});
// The worker receives a message to load the index and to query the index
-function handleMessage(message: { data: WebWorkerMessage }): void {
+function handleMessage(message: {data: WebWorkerMessage}): void {
const type = message.data.type;
const id = message.data.id;
const payload = message.data.payload;
switch (type) {
case 'load-index':
- makeRequest(SEARCH_TERMS_URL, (searchInfo: PageInfo[]) => {
- index = createIndex(loadIndex(searchInfo));
- postMessage({ type, id, payload: true });
+ makeRequest(SEARCH_TERMS_URL, (encodedPages: EncodedPages) => {
+ index = createIndex(loadIndex(encodedPages));
+ postMessage({type, id, payload: true});
case 'query-index':
- postMessage({ type, id, payload: { query: payload, results: queryIndex(payload) } });
+ postMessage({type, id, payload: {query: payload, results: queryIndex(payload)}});
- postMessage({ type, id, payload: { error: 'invalid message type' } });
+ postMessage({type, id, payload: {error: 'invalid message type'}});
// Use XHR to make a request to the server
function makeRequest(url: string, callback: (response: any) => void): void {
// The JSON file that is loaded should be an array of PageInfo:
const searchDataRequest = new XMLHttpRequest();
searchDataRequest.onload = function() {
@@ -70,18 +88,29 @@ function makeRequest(url: string, callback: (response: any) => void): void {
-// Create the search index from the searchInfo which contains the information about each page to be indexed
-function loadIndex(pagesData: PageInfo[]): IndexLoader {
+// Create the search index from the searchInfo which contains the information about each page to be
+// indexed
+function loadIndex({dictionary, pages}: EncodedPages): IndexLoader {
return (indexBuilder: lunr.Builder) => {
// Store the pages data to be used in mapping query results back to pages
// Add search terms from each page to the search index
- pagesData.forEach(page => {
+ pages.forEach(encodedPage => {
+ const page = decodePage(encodedPage, dictionary);
- pages[page.path] = page;
+ pageMap[page.path] = page;
+function decodePage(encodedPage: EncodedPage, dictionary: string[]): PageInfo {
+ return {
+ ...encodedPage,
+ headings: encodedPage.headings?.map(i => dictionary[i]).join(' ') ?? '',
+ keywords: encodedPage.keywords?.map(i => dictionary[i]).join(' ') ?? '',
+ members: encodedPage.members?.map(i => dictionary[i]).join(' ') ?? '',
+ };
// Query the index and return the processed results
function queryIndex(query: string): PageInfo[] {
// Strip off quotes
@@ -105,7 +134,7 @@ function queryIndex(query: string): PageInfo[] {
// Map the hits into info about each page to be returned as results
- return results.map(hit => pages[hit.ref]);
+ return results.map(hit => pageMap[hit.ref]);
} catch (e) {
// If the search query cannot be parsed the index throws an error
diff --git a/aio/tools/transforms/angular-base-package/ignore-words.json b/aio/tools/transforms/angular-base-package/ignore-words.json
new file mode 100644
index 0000000000..0f56284ddb
--- /dev/null
+++ b/aio/tools/transforms/angular-base-package/ignore-words.json
@@ -0,0 +1,705 @@
+ "en": [
+ "a",
+ "able",
+ "about",
+ "above",
+ "abst",
+ "accordance",
+ "according",
+ "accordingly",
+ "across",
+ "act",
+ "actually",
+ "added",
+ "adj",
+ "adopted",
+ "affected",
+ "affecting",
+ "affects",
+ "after",
+ "afterwards",
+ "again",
+ "against",
+ "ah",
+ "all",
+ "almost",
+ "alone",
+ "along",
+ "already",
+ "also",
+ "although",
+ "always",
+ "am",
+ "among",
+ "amongst",
+ "an",
+ "and",
+ "announce",
+ "another",
+ "any",
+ "anybody",
+ "anyhow",
+ "anymore",
+ "anyone",
+ "anything",
+ "anyway",
+ "anyways",
+ "anywhere",
+ "apparently",
+ "approximately",
+ "are",
+ "aren",
+ "arent",
+ "arise",
+ "around",
+ "as",
+ "aside",
+ "ask",
+ "asking",
+ "at",
+ "auth",
+ "available",
+ "away",
+ "awfully",
+ "b",
+ "back",
+ "be",
+ "became",
+ "because",
+ "become",
+ "becomes",
+ "becoming",
+ "been",
+ "before",
+ "beforehand",
+ "begin",
+ "beginning",
+ "beginnings",
+ "begins",
+ "behind",
+ "being",
+ "believe",
+ "below",
+ "beside",
+ "besides",
+ "between",
+ "beyond",
+ "biol",
+ "both",
+ "brief",
+ "briefly",
+ "but",
+ "by",
+ "c",
+ "ca",
+ "came",
+ "can",
+ "cannot",
+ "can't",
+ "cant",
+ "cause",
+ "causes",
+ "certain",
+ "certainly",
+ "co",
+ "com",
+ "come",
+ "comes",
+ "contain",
+ "containing",
+ "contains",
+ "could",
+ "couldnt",
+ "d",
+ "date",
+ "did",
+ "didn't",
+ "didnt",
+ "different",
+ "do",
+ "does",
+ "doesn't",
+ "doesnt",
+ "doing",
+ "done",
+ "don't",
+ "dont",
+ "down",
+ "downwards",
+ "due",
+ "during",
+ "e",
+ "each",
+ "ed",
+ "edu",
+ "effect",
+ "eg",
+ "eight",
+ "eighty",
+ "either",
+ "else",
+ "elsewhere",
+ "end",
+ "ending",
+ "enough",
+ "especially",
+ "et",
+ "et-al",
+ "etc",
+ "even",
+ "ever",
+ "every",
+ "everybody",
+ "everyone",
+ "everything",
+ "everywhere",
+ "ex",
+ "except",
+ "f",
+ "far",
+ "few",
+ "ff",
+ "fifth",
+ "first",
+ "five",
+ "fix",
+ "followed",
+ "following",
+ "follows",
+ "for",
+ "former",
+ "formerly",
+ "forth",
+ "found",
+ "four",
+ "from",
+ "further",
+ "furthermore",
+ "g",
+ "gave",
+ "get",
+ "gets",
+ "getting",
+ "give",
+ "given",
+ "gives",
+ "giving",
+ "go",
+ "goes",
+ "gone",
+ "got",
+ "gotten",
+ "h",
+ "had",
+ "happens",
+ "hardly",
+ "has",
+ "hasn't",
+ "hasnt",
+ "have",
+ "haven't",
+ "havent",
+ "having",
+ "he",
+ "hed",
+ "hence",
+ "her",
+ "here",
+ "hereafter",
+ "hereby",
+ "herein",
+ "heres",
+ "hereupon",
+ "hers",
+ "herself",
+ "hes",
+ "hi",
+ "hid",
+ "him",
+ "himself",
+ "his",
+ "hither",
+ "home",
+ "how",
+ "howbeit",
+ "however",
+ "hundred",
+ "i",
+ "id",
+ "ie",
+ "if",
+ "i'll",
+ "ill",
+ "im",
+ "immediate",
+ "immediately",
+ "importance",
+ "important",
+ "in",
+ "inc",
+ "indeed",
+ "index",
+ "information",
+ "instead",
+ "into",
+ "invention",
+ "inward",
+ "is",
+ "isn't",
+ "isnt",
+ "it",
+ "itd",
+ "it'll",
+ "itll",
+ "its",
+ "itself",
+ "i've",
+ "ive",
+ "j",
+ "just",
+ "k",
+ "keep",
+ "keeps",
+ "kept",
+ "keys",
+ "kg",
+ "km",
+ "know",
+ "known",
+ "knows",
+ "l",
+ "largely",
+ "last",
+ "lately",
+ "later",
+ "latter",
+ "latterly",
+ "least",
+ "less",
+ "lest",
+ "let",
+ "lets",
+ "like",
+ "liked",
+ "likely",
+ "line",
+ "little",
+ "'ll",
+ "'ll",
+ "look",
+ "looking",
+ "looks",
+ "ltd",
+ "m",
+ "made",
+ "mainly",
+ "make",
+ "makes",
+ "many",
+ "may",
+ "maybe",
+ "me",
+ "mean",
+ "means",
+ "meantime",
+ "meanwhile",
+ "merely",
+ "mg",
+ "might",
+ "million",
+ "miss",
+ "ml",
+ "more",
+ "moreover",
+ "most",
+ "mostly",
+ "mr",
+ "mrs",
+ "much",
+ "mug",
+ "must",
+ "my",
+ "myself",
+ "n",
+ "na",
+ "name",
+ "namely",
+ "nay",
+ "nd",
+ "near",
+ "nearly",
+ "necessarily",
+ "necessary",
+ "need",
+ "needs",
+ "neither",
+ "never",
+ "nevertheless",
+ "new",
+ "next",
+ "nine",
+ "ninety",
+ "no",
+ "nobody",
+ "non",
+ "none",
+ "nonetheless",
+ "noone",
+ "nor",
+ "normally",
+ "nos",
+ "not",
+ "noted",
+ "nothing",
+ "now",
+ "nowhere",
+ "o",
+ "obtain",
+ "obtained",
+ "obviously",
+ "of",
+ "off",
+ "often",
+ "oh",
+ "ok",
+ "okay",
+ "old",
+ "omitted",
+ "on",
+ "once",
+ "one",
+ "ones",
+ "only",
+ "onto",
+ "or",
+ "ord",
+ "other",
+ "others",
+ "otherwise",
+ "ought",
+ "our",
+ "ours",
+ "ourselves",
+ "out",
+ "outside",
+ "over",
+ "overall",
+ "owing",
+ "own",
+ "p",
+ "page",
+ "pages",
+ "part",
+ "particular",
+ "particularly",
+ "past",
+ "per",
+ "perhaps",
+ "placed",
+ "please",
+ "plus",
+ "poorly",
+ "possible",
+ "possibly",
+ "potentially",
+ "pp",
+ "predominantly",
+ "present",
+ "previously",
+ "primarily",
+ "probably",
+ "promptly",
+ "proud",
+ "provides",
+ "put",
+ "q",
+ "que",
+ "quickly",
+ "quite",
+ "qv",
+ "r",
+ "ran",
+ "rather",
+ "rd",
+ "re",
+ "readily",
+ "really",
+ "recent",
+ "recently",
+ "ref",
+ "refs",
+ "regarding",
+ "regardless",
+ "regards",
+ "related",
+ "relatively",
+ "research",
+ "respectively",
+ "resulted",
+ "resulting",
+ "results",
+ "right",
+ "run",
+ "s",
+ "said",
+ "same",
+ "saw",
+ "say",
+ "saying",
+ "says",
+ "sec",
+ "section",
+ "see",
+ "seeing",
+ "seem",
+ "seemed",
+ "seeming",
+ "seems",
+ "seen",
+ "self",
+ "selves",
+ "sent",
+ "seven",
+ "several",
+ "shall",
+ "she",
+ "shed",
+ "she'll",
+ "shell",
+ "shes",
+ "should",
+ "shouldn't",
+ "shouldnt",
+ "show",
+ "showed",
+ "shown",
+ "showns",
+ "shows",
+ "significant",
+ "significantly",
+ "similar",
+ "similarly",
+ "since",
+ "six",
+ "slightly",
+ "so",
+ "some",
+ "somebody",
+ "somehow",
+ "someone",
+ "somethan",
+ "something",
+ "sometime",
+ "sometimes",
+ "somewhat",
+ "somewhere",
+ "soon",
+ "sorry",
+ "specifically",
+ "specified",
+ "specify",
+ "specifying",
+ "state",
+ "states",
+ "still",
+ "stop",
+ "strongly",
+ "sub",
+ "substantially",
+ "successfully",
+ "such",
+ "sufficiently",
+ "suggest",
+ "sup",
+ "sure",
+ "t",
+ "take",
+ "taken",
+ "taking",
+ "tell",
+ "tends",
+ "th",
+ "than",
+ "thank",
+ "thanks",
+ "thanx",
+ "that",
+ "that'll",
+ "thatll",
+ "thats",
+ "that've",
+ "thatve",
+ "the",
+ "their",
+ "theirs",
+ "them",
+ "themselves",
+ "then",
+ "thence",
+ "there",
+ "thereafter",
+ "thereby",
+ "thered",
+ "therefore",
+ "therein",
+ "there'll",
+ "therell",
+ "thereof",
+ "therere",
+ "theres",
+ "thereto",
+ "thereupon",
+ "there've",
+ "thereve",
+ "these",
+ "they",
+ "theyd",
+ "they'll",
+ "theyll",
+ "theyre",
+ "they've",
+ "theyve",
+ "think",
+ "this",
+ "those",
+ "thou",
+ "though",
+ "thoughh",
+ "thousand",
+ "throug",
+ "through",
+ "throughout",
+ "thru",
+ "thus",
+ "til",
+ "tip",
+ "to",
+ "together",
+ "too",
+ "took",
+ "toward",
+ "towards",
+ "tried",
+ "tries",
+ "truly",
+ "try",
+ "trying",
+ "ts",
+ "twice",
+ "two",
+ "u",
+ "un",
+ "under",
+ "unfortunately",
+ "unless",
+ "unlike",
+ "unlikely",
+ "until",
+ "unto",
+ "up",
+ "upon",
+ "ups",
+ "us",
+ "use",
+ "used",
+ "useful",
+ "usefully",
+ "usefulness",
+ "uses",
+ "using",
+ "usually",
+ "v",
+ "value",
+ "various",
+ "'ve",
+ "'ve",
+ "very",
+ "via",
+ "viz",
+ "vol",
+ "vols",
+ "vs",
+ "w",
+ "want",
+ "wants",
+ "was",
+ "wasn't",
+ "wasnt",
+ "way",
+ "we",
+ "wed",
+ "welcome",
+ "we'll",
+ "well",
+ "went",
+ "were",
+ "weren't",
+ "werent",
+ "we've",
+ "weve",
+ "what",
+ "whatever",
+ "what'll",
+ "whatll",
+ "whats",
+ "when",
+ "whence",
+ "whenever",
+ "where",
+ "whereafter",
+ "whereas",
+ "whereby",
+ "wherein",
+ "wheres",
+ "whereupon",
+ "wherever",
+ "whether",
+ "which",
+ "while",
+ "whim",
+ "whither",
+ "who",
+ "whod",
+ "whoever",
+ "whole",
+ "who'll",
+ "wholl",
+ "whom",
+ "whomever",
+ "whos",
+ "whose",
+ "why",
+ "widely",
+ "will",
+ "willing",
+ "wish",
+ "with",
+ "within",
+ "without",
+ "won't",
+ "wont",
+ "words",
+ "would",
+ "wouldn't",
+ "wouldnt",
+ "www",
+ "x",
+ "y",
+ "yes",
+ "yet",
+ "you",
+ "youd",
+ "you'll",
+ "youll",
+ "your",
+ "youre",
+ "yours",
+ "yourself",
+ "yourselves",
+ "you've",
+ "youve",
+ "z",
+ "zero"
+ ]
diff --git a/aio/tools/transforms/angular-base-package/ignore.words b/aio/tools/transforms/angular-base-package/ignore.words
deleted file mode 100644
index 82b9f2fc3f..0000000000
--- a/aio/tools/transforms/angular-base-package/ignore.words
+++ /dev/null
@@ -1,701 +0,0 @@
diff --git a/aio/tools/transforms/angular-base-package/index.js b/aio/tools/transforms/angular-base-package/index.js
index e62f4373fb..8c946c89ec 100644
--- a/aio/tools/transforms/angular-base-package/index.js
+++ b/aio/tools/transforms/angular-base-package/index.js
@@ -65,9 +65,9 @@ module.exports = new Package('angular-base', [
readFilesProcessor.sourceFiles = [];
collectExamples.exampleFolders = [];
- generateKeywordsProcessor.ignoreWordsFile = path.resolve(__dirname, 'ignore.words');
+ generateKeywordsProcessor.ignoreWords = require(path.resolve(__dirname, 'ignore-words'))['en'];
generateKeywordsProcessor.docTypesToIgnore = ['example-region'];
- generateKeywordsProcessor.propertiesToIgnore = ['basePath', 'renderedContent'];
+ generateKeywordsProcessor.propertiesToIgnore = ['basePath', 'renderedContent', 'docType', 'searchTitle'];
// Where do we write the output files?
diff --git a/aio/tools/transforms/angular-base-package/processors/generateKeywords.js b/aio/tools/transforms/angular-base-package/processors/generateKeywords.js
index 7cad35b0db..020d460de8 100644
--- a/aio/tools/transforms/angular-base-package/processors/generateKeywords.js
+++ b/aio/tools/transforms/angular-base-package/processors/generateKeywords.js
@@ -1,7 +1,6 @@
'use strict';
-var fs = require('fs');
-var path = require('canonical-path');
+const stem = require('stemmer');
* @dgProcessor generateKeywordsProcessor
@@ -10,103 +9,98 @@ var path = require('canonical-path');
* a new document that will be rendered as a JavaScript file containing all
* this data.
-module.exports = function generateKeywordsProcessor(log, readFilesProcessor) {
+module.exports = function generateKeywordsProcessor(log) {
return {
- ignoreWordsFile: undefined,
+ ignoreWords: [],
propertiesToIgnore: [],
docTypesToIgnore: [],
outputFolder: '',
$validate: {
- ignoreWordsFile: {},
+ ignoreWords: {},
docTypesToIgnore: {},
propertiesToIgnore: {},
outputFolder: {presence: true}
$runAfter: ['postProcessHtml'],
$runBefore: ['writing-files'],
- $process: function(docs) {
+ $process(docs) {
+ const dictionary = new Map();
// Keywords to ignore
- var wordsToIgnore = [];
- var propertiesToIgnore;
- var docTypesToIgnore;
- // Load up the keywords to ignore, if specified in the config
- if (this.ignoreWordsFile) {
- var ignoreWordsPath = path.resolve(readFilesProcessor.basePath, this.ignoreWordsFile);
- wordsToIgnore = fs.readFileSync(ignoreWordsPath, 'utf8').toString().split(/[,\s\n\r]+/gm);
- log.debug('Loaded ignore words from "' + ignoreWordsPath + '"');
- log.silly(wordsToIgnore);
- }
- propertiesToIgnore = convertToMap(this.propertiesToIgnore);
+ const ignoreWords = new Set(this.ignoreWords);
+ log.debug('Words to ignore', ignoreWords);
+ const propertiesToIgnore = new Set(this.propertiesToIgnore);
log.debug('Properties to ignore', propertiesToIgnore);
- docTypesToIgnore = convertToMap(this.docTypesToIgnore);
+ const docTypesToIgnore = new Set(this.docTypesToIgnore);
log.debug('Doc types to ignore', docTypesToIgnore);
- var ignoreWordsMap = convertToMap(wordsToIgnore);
const filteredDocs = docs
// We are not interested in some docTypes
- .filter(function(doc) { return !docTypesToIgnore[doc.docType]; })
+ .filter(doc => !docTypesToIgnore.has(doc.docType))
// Ignore internals and private exports (indicated by the ɵ prefix)
- .filter(function(doc) { return !doc.internal && !doc.privateExport; });
+ .filter(doc => !doc.internal && !doc.privateExport);
- filteredDocs.forEach(function(doc) {
- var words = [];
- var keywordMap = Object.assign({}, ignoreWordsMap);
- var members = [];
- var membersMap = Object.assign({}, ignoreWordsMap);
- const headingWords = [];
- const headingWordMap = Object.assign({}, ignoreWordsMap);
+ for(const doc of filteredDocs) {
// Search each top level property of the document for search terms
- Object.keys(doc).forEach(function(key) {
+ let mainTokens = [];
+ for(const key of Object.keys(doc)) {
const value = doc[key];
- if (isString(value) && !propertiesToIgnore[key]) {
- extractWords(value, words, keywordMap);
+ if (isString(value) && !propertiesToIgnore.has(key)) {
+ mainTokens.push(...tokenize(value, ignoreWords, dictionary));
- });
+ }
- extractMemberWords(doc, members, membersMap);
+ const memberTokens = extractMemberTokens(doc, ignoreWords, dictionary);
// Extract all the keywords from the headings
+ let headingTokens = [];
if (doc.vFile && doc.vFile.headings) {
- Object.keys(doc.vFile.headings).forEach(function(headingTag) {
- doc.vFile.headings[headingTag].forEach(function(headingText) {
- extractWords(headingText, headingWords, headingWordMap);
- });
- });
+ for(const headingTag of Object.keys(doc.vFile.headings)) {
+ for(const headingText of doc.vFile.headings[headingTag]) {
+ headingTokens.push(...tokenize(headingText, ignoreWords, dictionary));
+ }
+ }
// Extract the title to use in searches
doc.searchTitle = doc.searchTitle || doc.title || doc.vFile && doc.vFile.title || doc.name || '';
// Attach all this search data to the document
- doc.searchTerms = {
- titleWords: tokenize(doc.searchTitle).join(' '),
- headingWords: headingWords.sort().join(' '),
- keywords: words.sort().join(' '),
- members: members.sort().join(' '),
- topics: doc.searchKeywords
- };
- });
+ doc.searchTerms = {};
+ if (headingTokens.length > 0) {
+ doc.searchTerms.headings = headingTokens;
+ }
+ if (mainTokens.length > 0) {
+ doc.searchTerms.keywords = mainTokens;
+ }
+ if (memberTokens.length > 0) {
+ doc.searchTerms.members = memberTokens;
+ }
+ if (doc.searchKeywords) {
+ doc.searchTerms.topics = doc.searchKeywords.trim();
+ }
+ }
// Now process all the search data and collect it up to be used in creating a new document
- var searchData = filteredDocs.map(function(page) {
- // Copy the properties from the searchTerms object onto the search data object
- return Object.assign({
- path: page.path,
- title: page.searchTitle,
- type: page.docType,
- deprecated: !!page.deprecated,
- }, page.searchTerms);
- });
+ const searchData = {
+ dictionary: Array.from(dictionary.keys()),
+ pages: filteredDocs.map(page => {
+ // Copy the properties from the searchTerms object onto the search data object
+ const searchObj = {
+ path: page.path,
+ title: page.searchTitle,
+ type: page.docType,
+ };
+ if (page.deprecated) {
+ searchObj.deprecated = true;
+ }
+ return Object.assign(searchObj, page.searchTerms);
+ }),
+ };
docType: 'json-doc',
@@ -120,63 +114,64 @@ module.exports = function generateKeywordsProcessor(log, readFilesProcessor) {
function isString(value) {
return typeof value == 'string';
-function convertToMap(collection) {
- const obj = {};
- collection.forEach(key => { obj[key] = true; });
- return obj;
-// If the heading contains a name starting with ng, e.g. "ngController", then add the
-// name without the ng to the text, e.g. "controller".
-function tokenize(text) {
- const rawTokens = text.split(/[\s\/]+/mg);
+function tokenize(text, ignoreWords, dictionary) {
+ // Split on whitespace and things that are likely to be HTML tags (this is not exhaustive but reduces the unwanted tokens that are indexed).
+ const rawTokens = text.split(/[\s\/]+|<\/?[a-z]+(?:\s+\w+(?:="[^"]+")?)*>/img);
const tokens = [];
- rawTokens.forEach(token => {
+ for(let token of rawTokens) {
+ token = token.trim();
// Strip off unwanted trivial characters
- token = token
- .trim()
- .replace(/^[_\-"'`({[<$*)}\]>.]+/, '')
- .replace(/[_\-"'`({[<$*)}\]>.]+$/, '');
- // Ignore tokens that contain weird characters
- if (/^[\w.\-]+$/.test(token)) {
- tokens.push(token.toLowerCase());
- const ngTokenMatch = /^[nN]g([A-Z]\w*)/.exec(token);
- if (ngTokenMatch) {
- tokens.push(ngTokenMatch[1].toLowerCase());
- }
+ token = token.replace(/^[_\-"'`({[<$*)}\]>.]+/, '').replace(/[_\-"'`({[<$*)}\]>.]+$/, '');
+ // Skip if in the ignored words list
+ if (ignoreWords.has(token.toLowerCase())) {
+ continue;
- });
+ // Skip tokens that contain weird characters
+ if (!/^[\w._-]+$/.test(token)) {
+ continue;
+ }
+ storeToken(token, tokens, dictionary);
+ if (token.startsWith('ng')) {
+ storeToken(token.substr(2), tokens, dictionary);
+ }
+ }
return tokens;
-function extractWords(text, words, keywordMap) {
- var tokens = tokenize(text);
- tokens.forEach(function(token) {
- if (!keywordMap[token]) {
- words.push(token);
- keywordMap[token] = true;
- }
- });
+function storeToken(token, tokens, dictionary) {
+ token = stem(token);
+ if (!dictionary.has(token)) {
+ dictionary.set(token, dictionary.size);
+ }
+ tokens.push(dictionary.get(token));
-function extractMemberWords(doc, members, membersMap) {
- if (!doc) return;
+function extractMemberTokens(doc, ignoreWords, dictionary) {
+ if (!doc) return '';
+ let memberContent = [];
if (doc.members) {
- doc.members.forEach(member => extractWords(member.name, members, membersMap));
+ doc.members.forEach(member => memberContent.push(...tokenize(member.name, ignoreWords, dictionary)));
if (doc.statics) {
- doc.statics.forEach(member => extractWords(member.name, members, membersMap));
+ doc.statics.forEach(member => memberContent.push(...tokenize(member.name, ignoreWords, dictionary)));
if (doc.extendsClauses) {
- doc.extendsClauses.forEach(clause => extractMemberWords(clause.doc, members, membersMap));
+ doc.extendsClauses.forEach(clause => memberContent.push(...extractMemberTokens(clause.doc, ignoreWords, dictionary)));
if (doc.implementsClauses) {
- doc.implementsClauses.forEach(clause => extractMemberWords(clause.doc, members, membersMap));
+ doc.implementsClauses.forEach(clause => memberContent.push(...extractMemberTokens(clause.doc, ignoreWords, dictionary)));
\ No newline at end of file
+ return memberContent;
diff --git a/aio/tools/transforms/angular-base-package/processors/generateKeywords.spec.js b/aio/tools/transforms/angular-base-package/processors/generateKeywords.spec.js
index e482ee4a1b..3065a1c16e 100644
--- a/aio/tools/transforms/angular-base-package/processors/generateKeywords.spec.js
+++ b/aio/tools/transforms/angular-base-package/processors/generateKeywords.spec.js
@@ -1,12 +1,22 @@
+const path = require('canonical-path');
+const Dgeni = require('dgeni');
const testPackage = require('../../helpers/test-package');
const mockLogger = require('dgeni/lib/mocks/log')(false);
const processorFactory = require('./generateKeywords');
-const Dgeni = require('dgeni');
const mockReadFilesProcessor = {
basePath: 'base/path'
+const ignoreWords = require(path.resolve(__dirname, '../ignore-words'))['en'];
+function createProcessor() {
+ const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ processor.ignoreWords = ignoreWords;
+ return processor;
describe('generateKeywords processor', () => {
it('should be available on the injector', () => {
@@ -17,30 +27,81 @@ describe('generateKeywords processor', () => {
it('should run after the correct processor', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ const processor = createProcessor();
it('should run before the correct processor', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ const processor = createProcessor();
it('should ignore internal and private exports', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ const processor = createProcessor();
const docs = [
{ docType: 'class', name: 'PublicExport' },
{ docType: 'class', name: 'PrivateExport', privateExport: true },
{ docType: 'class', name: 'InternalExport', internal: true }
- expect(docs[docs.length - 1].data).toEqual([
- jasmine.objectContaining({ title: 'PublicExport', type: 'class'})
+ expect(docs[docs.length - 1].data.pages).toEqual([
+ jasmine.objectContaining({ title: 'PublicExport', type: 'class' })
+ it('should ignore docs that are in the `docTypesToIgnore` list', () => {
+ const processor = createProcessor();
+ processor.docTypesToIgnore = ['interface'];
+ const docs = [
+ { docType: 'class', name: 'Class' },
+ { docType: 'interface', name: 'Interface' },
+ { docType: 'content', name: 'Guide' },
+ ];
+ processor.$process(docs);
+ expect(docs[docs.length - 1].data.pages).toEqual([
+ jasmine.objectContaining({ title: 'Class', type: 'class' }),
+ jasmine.objectContaining({ title: 'Guide', type: 'content' }),
+ ]);
+ });
+ it('should not collect keywords from properties that are in the `propertiesToIgnore` list', () => {
+ const processor = createProcessor();
+ processor.propertiesToIgnore = ['docType', 'ignore'];
+ const docs = [
+ { docType: 'class', name: 'FooClass', ignore: 'ignore this content' },
+ { docType: 'interface', name: 'BarInterface', capture: 'capture this content' },
+ ];
+ processor.$process(docs);
+ expect(docs[docs.length - 1].data).toEqual({
+ dictionary: [ 'fooclass', 'barinterfac', 'captur', 'content' ],
+ pages: [
+ jasmine.objectContaining({ title: 'FooClass', type: 'class', keywords: [0] }),
+ jasmine.objectContaining({ title: 'BarInterface', type: 'interface', keywords: [1, 2, 3] }),
+ ],
+ });
+ });
+ it('should not collect keywords that look like HTML tags', () => {
+ const processor = createProcessor();
+ const docs = [
+ { docType: 'class', name: 'FooClass', content: `
+ Content inside a table |
` },
+ ];
+ processor.$process(docs);
+ expect(docs[docs.length - 1].data).toEqual({
+ dictionary: ['class', 'fooclass', 'content', 'insid', 'tabl'],
+ pages: [
+ jasmine.objectContaining({keywords: [0, 1, 2, 3, 4] })
+ ],
+ });
+ });
it('should compute `doc.searchTitle` from the doc properties if not already provided', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ const processor = createProcessor();
const docs = [
{ docType: 'class', name: 'A', searchTitle: 'searchTitle A', title: 'title A', vFile: { headings: { h1: ['vFile A'] } } },
{ docType: 'class', name: 'B', title: 'title B', vFile: { headings: { h1: ['vFile B'] } } },
@@ -48,7 +109,7 @@ describe('generateKeywords processor', () => {
{ docType: 'class', name: 'D' },
- expect(docs[docs.length - 1].data).toEqual([
+ expect(docs[docs.length - 1].data.pages).toEqual([
jasmine.objectContaining({ title: 'searchTitle A' }),
jasmine.objectContaining({ title: 'title B' }),
jasmine.objectContaining({ title: 'vFile C' }),
@@ -57,34 +118,19 @@ describe('generateKeywords processor', () => {
it('should use `doc.searchTitle` as the title in the search index', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ const processor = createProcessor();
const docs = [
{ docType: 'class', name: 'PublicExport', searchTitle: 'class PublicExport' },
const keywordsDoc = docs[docs.length - 1];
- expect(keywordsDoc.data).toEqual([
- jasmine.objectContaining({ title: 'class PublicExport', type: 'class'})
+ expect(keywordsDoc.data.pages).toEqual([
+ jasmine.objectContaining({ title: 'class PublicExport', type: 'class' })
- it('should add title words to the search terms', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
- const docs = [
- {
- docType: 'class',
- name: 'PublicExport',
- searchTitle: 'class PublicExport',
- vFile: { headings: { h2: ['heading A', 'heading B'] } }
- },
- ];
- processor.$process(docs);
- const keywordsDoc = docs[docs.length - 1];
- expect(keywordsDoc.data[0].titleWords).toEqual('class publicexport');
- });
it('should add heading words to the search terms', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ const processor = createProcessor();
const docs = [
docType: 'class',
@@ -95,11 +141,16 @@ describe('generateKeywords processor', () => {
const keywordsDoc = docs[docs.length - 1];
- expect(keywordsDoc.data[0].headingWords).toEqual('heading important secondary');
+ expect(keywordsDoc.data).toEqual({
+ dictionary: ['class', 'publicexport', 'head', 'secondari'],
+ pages: [
+ jasmine.objectContaining({ headings: [2, 3, 2] })
+ ]
+ });
it('should add member doc properties to the search terms', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ const processor = createProcessor();
const docs = [
docType: 'class',
@@ -123,13 +174,18 @@ describe('generateKeywords processor', () => {
const keywordsDoc = docs[docs.length - 1];
- expect(keywordsDoc.data[0].members).toEqual(
- 'instancemethoda instancemethodb instancepropertya instancepropertyb staticmethoda staticmethodb staticpropertya staticpropertyb'
- );
+ expect(keywordsDoc.data).toEqual({
+ dictionary: ['class', 'publicexport', 'content', 'ngclass', 'instancemethoda','instancepropertya','instancemethodb','instancepropertyb','staticmethoda','staticpropertya','staticmethodb','staticpropertyb', 'head'],
+ pages: [
+ jasmine.objectContaining({
+ members: [4, 5, 6, 7, 8, 9, 10, 11]
+ })
+ ]
+ });
it('should add inherited member doc properties to the search terms', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ const processor = createProcessor();
const parentClass = {
docType: 'class',
name: 'ParentClass',
@@ -163,13 +219,27 @@ describe('generateKeywords processor', () => {
const docs = [childClass, parentClass, parentInterface];
const keywordsDoc = docs[docs.length - 1];
- expect(keywordsDoc.data[0].members.split(' ').sort().join(' ')).toEqual(
- 'childmember1 childmember2 parentmember1 parentmember2 parentmember3'
- );
+ expect(keywordsDoc.data).toEqual({
+ dictionary: ['class', 'child', 'childmember1', 'childmember2', 'parentmember1', 'parentmember2', 'parentmember3', 'parentclass', 'interfac', 'parentinterfac'],
+ pages: [
+ jasmine.objectContaining({
+ title: 'Child',
+ members: [2, 3, 4, 5, 6]
+ }),
+ jasmine.objectContaining({
+ title: 'ParentClass',
+ members: [4, 5]
+ }),
+ jasmine.objectContaining({
+ title: 'ParentInterface',
+ members: [6]
+ })
+ ]
+ });
- it('should process terms prefixed with "ng" to include the term stripped of "ng"', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ it('should include both stripped and unstripped "ng" prefixed tokens', () => {
+ const processor = createProcessor();
const docs = [
docType: 'class',
@@ -181,14 +251,19 @@ describe('generateKeywords processor', () => {
const keywordsDoc = docs[docs.length - 1];
- expect(keywordsDoc.data[0].titleWords).toEqual('ngcontroller controller');
- expect(keywordsDoc.data[0].headingWords).toEqual('model ngmodel');
- expect(keywordsDoc.data[0].keywords).toContain('class');
- expect(keywordsDoc.data[0].keywords).toContain('ngclass');
+ expect(keywordsDoc.data).toEqual({
+ dictionary: ['class', 'publicexport', 'ngcontrol', 'control', 'content', 'ngclass', 'ngmodel', 'model'],
+ pages: [
+ jasmine.objectContaining({
+ headings: [6, 7],
+ keywords: [0, 1, 2, 3, 4, 5, 0],
+ })
+ ],
+ });
- it('should generate renderedContent property', () => {
- const processor = processorFactory(mockLogger, mockReadFilesProcessor);
+ it('should generate compressed encoded renderedContent property', () => {
+ const processor = createProcessor();
const docs = [
docType: 'class',
@@ -196,19 +271,33 @@ describe('generateKeywords processor', () => {
description: 'The is the documentation for the SomeClass API.',
vFile: { headings: { h1: ['SomeClass'], h2: ['Some heading'] } }
+ {
+ docType: 'class',
+ name: 'SomeClass2',
+ description: 'description',
+ members: [
+ { name: 'member1' },
+ ],
+ deprecated: true
+ },
const keywordsDoc = docs[docs.length - 1];
- expect(JSON.parse(keywordsDoc.renderedContent)).toEqual(
- [{
+ expect(JSON.parse(keywordsDoc.renderedContent)).toEqual({
+ dictionary: ['class', 'someclass', 'document', 'api', 'head', 'someclass2', 'descript', 'member1'],
+ pages: [{
- 'titleWords':'someclass',
- 'headingWords':'heading some someclass',
- 'keywords':'api class documentation for is someclass the',
- 'members':'',
- 'deprecated': false,
+ 'headings': [1, 4],
+ 'keywords': [0, 1, 2, 1, 3],
+ },
+ {
+ 'title':'SomeClass2',
+ 'type':'class',
+ 'keywords': [0, 5, 6],
+ 'members': [7],
+ 'deprecated': true,
- );
+ });
diff --git a/aio/yarn.lock b/aio/yarn.lock
index 6e5d4e71ad..bcfb0be3d0 100644
--- a/aio/yarn.lock
+++ b/aio/yarn.lock
@@ -2005,6 +2005,11 @@
resolved "https://registry.yarnpkg.com/@types/source-list-map/-/source-list-map-0.1.2.tgz#0078836063ffaf17412349bba364087e0ac02ec9"
integrity sha512-K5K+yml8LTo9bWJI/rECfIPrGgxdpeNbj+d53lwN4QjW1MCwlkhUms+gtdzigTeUyBr09+u8BwOIY3MXvHdcsA==
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/@types/stemmer/-/stemmer-1.0.2.tgz#bd8354f50b3c9b87c351d169240e45cf1fa1f5e8"
+ integrity sha512-2gWEIFqVZjjZxo8/TcugCAl7nW9Jd9ArEDpTAc5nH7d+ZUkreHA7GzuFcLZ0sflLrA5b1PZ+2yDyHJcuP9KWWw==
"@types/unist@*", "@types/unist@^2.0.0", "@types/unist@^2.0.2":
version "2.0.3"
resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.3.tgz#9c088679876f374eb5983f150d4787aa6fb32d7e"
@@ -12802,6 +12807,11 @@ static-extend@^0.1.1:
resolved "https://registry.yarnpkg.com/statuses/-/statuses-1.5.0.tgz#161c7dac177659fd9811f43771fa99381478628c"
integrity sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=
+ version "1.0.5"
+ resolved "https://registry.yarnpkg.com/stemmer/-/stemmer-1.0.5.tgz#fd89beaf8bff5d04b6643bfffcaed0fc420deec0"
+ integrity sha512-SLq7annzSKRDStasOJJoftCSCzBCKmBmH38jC4fDtCunAqOzpTpIm9zmaHmwNJiZ8gLe9qpVdBVbEG2DC5dE2A==
version "2.0.2"
resolved "https://registry.yarnpkg.com/stream-browserify/-/stream-browserify-2.0.2.tgz#87521d38a44aa7ee91ce1cd2a47df0cb49dd660b"