refactor(docs-infra): include more info in search index data (#41368)
The AIO search index is built in a WebWorker on the browser from a set of page information that is downloaded as a JSON file (`search-data.json`). We want to keep this file as small as possible while providing enough data to generate a useful index to query against. Previously, we only included one copy of each (non-ignored) term from each doc but this prevents more subtle ranking of query results, since the number of occurences of a term in a doc is lost. This commit changes the generated file in the following ways: - All non-ignored terms are now included in the order in which they appear in the doc. - The terms are indexed into a dictonary to avoid the text of the term being repeated in every doc that contains the term. - Each term is pre-"stemmed" using the same Porter Stemming algorith that the Lunr search engine uses. The web-worker has been updated to decode the new format of the file. Now that all terms are included, it may enable some level of phrase based matching in the future. The size of the generated file is considerably larger than previously, but on production HTTP servers the data is sent compressed, which reduces the size dramatically. PR Close #41368
This commit is contained in:
parent
55f7f1d446
commit
fccffc647b
|
@ -116,6 +116,7 @@
|
|||
"@types/jasmine": "~3.6.0",
|
||||
"@types/lunr": "^2.3.2",
|
||||
"@types/node": "^12.7.9",
|
||||
"@types/stemmer": "^1.0.2",
|
||||
"@types/xregexp": "^3.0.30",
|
||||
"@yarnpkg/lockfile": "^1.1.0",
|
||||
"archiver": "^1.3.0",
|
||||
|
@ -166,6 +167,7 @@
|
|||
"rimraf": "^2.6.1",
|
||||
"semver": "^5.3.0",
|
||||
"shelljs": "^0.8.4",
|
||||
"stemmer": "^1.0.5",
|
||||
"timezone-mock": "^1.1.3",
|
||||
"tree-kill": "^1.1.0",
|
||||
"ts-node": "^8.4.1",
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
/// <reference lib="webworker" />
|
||||
import { WebWorkerMessage } from '../shared/web-worker-message';
|
||||
import * as lunr from 'lunr';
|
||||
|
||||
import {WebWorkerMessage} from '../shared/web-worker-message';
|
||||
|
||||
const SEARCH_TERMS_URL = '/generated/docs/app/search-data.json';
|
||||
let index: lunr.Index;
|
||||
const pages: SearchInfo = {};
|
||||
const pageMap: SearchInfo = {};
|
||||
|
||||
interface SearchInfo {
|
||||
[key: string]: PageInfo;
|
||||
|
@ -13,8 +14,25 @@ interface SearchInfo {
|
|||
interface PageInfo {
|
||||
path: string;
|
||||
type: string;
|
||||
titleWords: string;
|
||||
keyWords: string;
|
||||
title: string;
|
||||
headings: string;
|
||||
keywords: string;
|
||||
members: string;
|
||||
topics: string;
|
||||
}
|
||||
|
||||
interface EncodedPages {
|
||||
dictionary: string[];
|
||||
pages: EncodedPage[];
|
||||
}
|
||||
|
||||
interface EncodedPage {
|
||||
path: string;
|
||||
type: string;
|
||||
title: string;
|
||||
headings: number[];
|
||||
keywords: number[];
|
||||
members: number[];
|
||||
topics: string;
|
||||
}
|
||||
|
||||
|
@ -27,10 +45,11 @@ function createIndex(loadIndexFn: IndexLoader): lunr.Index {
|
|||
const queryLexer = (lunr as any as {QueryLexer: {termSeparator: RegExp}}).QueryLexer;
|
||||
queryLexer.termSeparator = lunr.tokenizer.separator = /\s+/;
|
||||
return lunr(function() {
|
||||
this.pipeline.remove(lunr.stemmer);
|
||||
this.ref('path');
|
||||
this.field('topics', {boost: 15});
|
||||
this.field('titleWords', { boost: 10 });
|
||||
this.field('headingWords', { boost: 5 });
|
||||
this.field('title', {boost: 10});
|
||||
this.field('headings', {boost: 5});
|
||||
this.field('members', {boost: 4});
|
||||
this.field('keywords', {boost: 2});
|
||||
loadIndexFn(this);
|
||||
|
@ -44,8 +63,8 @@ function handleMessage(message: { data: WebWorkerMessage }): void {
|
|||
const payload = message.data.payload;
|
||||
switch (type) {
|
||||
case 'load-index':
|
||||
makeRequest(SEARCH_TERMS_URL, (searchInfo: PageInfo[]) => {
|
||||
index = createIndex(loadIndex(searchInfo));
|
||||
makeRequest(SEARCH_TERMS_URL, (encodedPages: EncodedPages) => {
|
||||
index = createIndex(loadIndex(encodedPages));
|
||||
postMessage({type, id, payload: true});
|
||||
});
|
||||
break;
|
||||
|
@ -59,7 +78,6 @@ function handleMessage(message: { data: WebWorkerMessage }): void {
|
|||
|
||||
// Use XHR to make a request to the server
|
||||
function makeRequest(url: string, callback: (response: any) => void): void {
|
||||
|
||||
// The JSON file that is loaded should be an array of PageInfo:
|
||||
const searchDataRequest = new XMLHttpRequest();
|
||||
searchDataRequest.onload = function() {
|
||||
|
@ -70,18 +88,29 @@ function makeRequest(url: string, callback: (response: any) => void): void {
|
|||
}
|
||||
|
||||
|
||||
// Create the search index from the searchInfo which contains the information about each page to be indexed
|
||||
function loadIndex(pagesData: PageInfo[]): IndexLoader {
|
||||
// Create the search index from the searchInfo which contains the information about each page to be
|
||||
// indexed
|
||||
function loadIndex({dictionary, pages}: EncodedPages): IndexLoader {
|
||||
return (indexBuilder: lunr.Builder) => {
|
||||
// Store the pages data to be used in mapping query results back to pages
|
||||
// Add search terms from each page to the search index
|
||||
pagesData.forEach(page => {
|
||||
pages.forEach(encodedPage => {
|
||||
const page = decodePage(encodedPage, dictionary);
|
||||
indexBuilder.add(page);
|
||||
pages[page.path] = page;
|
||||
pageMap[page.path] = page;
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
function decodePage(encodedPage: EncodedPage, dictionary: string[]): PageInfo {
|
||||
return {
|
||||
...encodedPage,
|
||||
headings: encodedPage.headings?.map(i => dictionary[i]).join(' ') ?? '',
|
||||
keywords: encodedPage.keywords?.map(i => dictionary[i]).join(' ') ?? '',
|
||||
members: encodedPage.members?.map(i => dictionary[i]).join(' ') ?? '',
|
||||
};
|
||||
}
|
||||
|
||||
// Query the index and return the processed results
|
||||
function queryIndex(query: string): PageInfo[] {
|
||||
// Strip off quotes
|
||||
|
@ -105,7 +134,7 @@ function queryIndex(query: string): PageInfo[] {
|
|||
}
|
||||
|
||||
// Map the hits into info about each page to be returned as results
|
||||
return results.map(hit => pages[hit.ref]);
|
||||
return results.map(hit => pageMap[hit.ref]);
|
||||
}
|
||||
} catch (e) {
|
||||
// If the search query cannot be parsed the index throws an error
|
||||
|
|
|
@ -0,0 +1,705 @@
|
|||
{
|
||||
"en": [
|
||||
"a",
|
||||
"able",
|
||||
"about",
|
||||
"above",
|
||||
"abst",
|
||||
"accordance",
|
||||
"according",
|
||||
"accordingly",
|
||||
"across",
|
||||
"act",
|
||||
"actually",
|
||||
"added",
|
||||
"adj",
|
||||
"adopted",
|
||||
"affected",
|
||||
"affecting",
|
||||
"affects",
|
||||
"after",
|
||||
"afterwards",
|
||||
"again",
|
||||
"against",
|
||||
"ah",
|
||||
"all",
|
||||
"almost",
|
||||
"alone",
|
||||
"along",
|
||||
"already",
|
||||
"also",
|
||||
"although",
|
||||
"always",
|
||||
"am",
|
||||
"among",
|
||||
"amongst",
|
||||
"an",
|
||||
"and",
|
||||
"announce",
|
||||
"another",
|
||||
"any",
|
||||
"anybody",
|
||||
"anyhow",
|
||||
"anymore",
|
||||
"anyone",
|
||||
"anything",
|
||||
"anyway",
|
||||
"anyways",
|
||||
"anywhere",
|
||||
"apparently",
|
||||
"approximately",
|
||||
"are",
|
||||
"aren",
|
||||
"arent",
|
||||
"arise",
|
||||
"around",
|
||||
"as",
|
||||
"aside",
|
||||
"ask",
|
||||
"asking",
|
||||
"at",
|
||||
"auth",
|
||||
"available",
|
||||
"away",
|
||||
"awfully",
|
||||
"b",
|
||||
"back",
|
||||
"be",
|
||||
"became",
|
||||
"because",
|
||||
"become",
|
||||
"becomes",
|
||||
"becoming",
|
||||
"been",
|
||||
"before",
|
||||
"beforehand",
|
||||
"begin",
|
||||
"beginning",
|
||||
"beginnings",
|
||||
"begins",
|
||||
"behind",
|
||||
"being",
|
||||
"believe",
|
||||
"below",
|
||||
"beside",
|
||||
"besides",
|
||||
"between",
|
||||
"beyond",
|
||||
"biol",
|
||||
"both",
|
||||
"brief",
|
||||
"briefly",
|
||||
"but",
|
||||
"by",
|
||||
"c",
|
||||
"ca",
|
||||
"came",
|
||||
"can",
|
||||
"cannot",
|
||||
"can't",
|
||||
"cant",
|
||||
"cause",
|
||||
"causes",
|
||||
"certain",
|
||||
"certainly",
|
||||
"co",
|
||||
"com",
|
||||
"come",
|
||||
"comes",
|
||||
"contain",
|
||||
"containing",
|
||||
"contains",
|
||||
"could",
|
||||
"couldnt",
|
||||
"d",
|
||||
"date",
|
||||
"did",
|
||||
"didn't",
|
||||
"didnt",
|
||||
"different",
|
||||
"do",
|
||||
"does",
|
||||
"doesn't",
|
||||
"doesnt",
|
||||
"doing",
|
||||
"done",
|
||||
"don't",
|
||||
"dont",
|
||||
"down",
|
||||
"downwards",
|
||||
"due",
|
||||
"during",
|
||||
"e",
|
||||
"each",
|
||||
"ed",
|
||||
"edu",
|
||||
"effect",
|
||||
"eg",
|
||||
"eight",
|
||||
"eighty",
|
||||
"either",
|
||||
"else",
|
||||
"elsewhere",
|
||||
"end",
|
||||
"ending",
|
||||
"enough",
|
||||
"especially",
|
||||
"et",
|
||||
"et-al",
|
||||
"etc",
|
||||
"even",
|
||||
"ever",
|
||||
"every",
|
||||
"everybody",
|
||||
"everyone",
|
||||
"everything",
|
||||
"everywhere",
|
||||
"ex",
|
||||
"except",
|
||||
"f",
|
||||
"far",
|
||||
"few",
|
||||
"ff",
|
||||
"fifth",
|
||||
"first",
|
||||
"five",
|
||||
"fix",
|
||||
"followed",
|
||||
"following",
|
||||
"follows",
|
||||
"for",
|
||||
"former",
|
||||
"formerly",
|
||||
"forth",
|
||||
"found",
|
||||
"four",
|
||||
"from",
|
||||
"further",
|
||||
"furthermore",
|
||||
"g",
|
||||
"gave",
|
||||
"get",
|
||||
"gets",
|
||||
"getting",
|
||||
"give",
|
||||
"given",
|
||||
"gives",
|
||||
"giving",
|
||||
"go",
|
||||
"goes",
|
||||
"gone",
|
||||
"got",
|
||||
"gotten",
|
||||
"h",
|
||||
"had",
|
||||
"happens",
|
||||
"hardly",
|
||||
"has",
|
||||
"hasn't",
|
||||
"hasnt",
|
||||
"have",
|
||||
"haven't",
|
||||
"havent",
|
||||
"having",
|
||||
"he",
|
||||
"hed",
|
||||
"hence",
|
||||
"her",
|
||||
"here",
|
||||
"hereafter",
|
||||
"hereby",
|
||||
"herein",
|
||||
"heres",
|
||||
"hereupon",
|
||||
"hers",
|
||||
"herself",
|
||||
"hes",
|
||||
"hi",
|
||||
"hid",
|
||||
"him",
|
||||
"himself",
|
||||
"his",
|
||||
"hither",
|
||||
"home",
|
||||
"how",
|
||||
"howbeit",
|
||||
"however",
|
||||
"hundred",
|
||||
"i",
|
||||
"id",
|
||||
"ie",
|
||||
"if",
|
||||
"i'll",
|
||||
"ill",
|
||||
"im",
|
||||
"immediate",
|
||||
"immediately",
|
||||
"importance",
|
||||
"important",
|
||||
"in",
|
||||
"inc",
|
||||
"indeed",
|
||||
"index",
|
||||
"information",
|
||||
"instead",
|
||||
"into",
|
||||
"invention",
|
||||
"inward",
|
||||
"is",
|
||||
"isn't",
|
||||
"isnt",
|
||||
"it",
|
||||
"itd",
|
||||
"it'll",
|
||||
"itll",
|
||||
"its",
|
||||
"itself",
|
||||
"i've",
|
||||
"ive",
|
||||
"j",
|
||||
"just",
|
||||
"k",
|
||||
"keep",
|
||||
"keeps",
|
||||
"kept",
|
||||
"keys",
|
||||
"kg",
|
||||
"km",
|
||||
"know",
|
||||
"known",
|
||||
"knows",
|
||||
"l",
|
||||
"largely",
|
||||
"last",
|
||||
"lately",
|
||||
"later",
|
||||
"latter",
|
||||
"latterly",
|
||||
"least",
|
||||
"less",
|
||||
"lest",
|
||||
"let",
|
||||
"lets",
|
||||
"like",
|
||||
"liked",
|
||||
"likely",
|
||||
"line",
|
||||
"little",
|
||||
"'ll",
|
||||
"'ll",
|
||||
"look",
|
||||
"looking",
|
||||
"looks",
|
||||
"ltd",
|
||||
"m",
|
||||
"made",
|
||||
"mainly",
|
||||
"make",
|
||||
"makes",
|
||||
"many",
|
||||
"may",
|
||||
"maybe",
|
||||
"me",
|
||||
"mean",
|
||||
"means",
|
||||
"meantime",
|
||||
"meanwhile",
|
||||
"merely",
|
||||
"mg",
|
||||
"might",
|
||||
"million",
|
||||
"miss",
|
||||
"ml",
|
||||
"more",
|
||||
"moreover",
|
||||
"most",
|
||||
"mostly",
|
||||
"mr",
|
||||
"mrs",
|
||||
"much",
|
||||
"mug",
|
||||
"must",
|
||||
"my",
|
||||
"myself",
|
||||
"n",
|
||||
"na",
|
||||
"name",
|
||||
"namely",
|
||||
"nay",
|
||||
"nd",
|
||||
"near",
|
||||
"nearly",
|
||||
"necessarily",
|
||||
"necessary",
|
||||
"need",
|
||||
"needs",
|
||||
"neither",
|
||||
"never",
|
||||
"nevertheless",
|
||||
"new",
|
||||
"next",
|
||||
"nine",
|
||||
"ninety",
|
||||
"no",
|
||||
"nobody",
|
||||
"non",
|
||||
"none",
|
||||
"nonetheless",
|
||||
"noone",
|
||||
"nor",
|
||||
"normally",
|
||||
"nos",
|
||||
"not",
|
||||
"noted",
|
||||
"nothing",
|
||||
"now",
|
||||
"nowhere",
|
||||
"o",
|
||||
"obtain",
|
||||
"obtained",
|
||||
"obviously",
|
||||
"of",
|
||||
"off",
|
||||
"often",
|
||||
"oh",
|
||||
"ok",
|
||||
"okay",
|
||||
"old",
|
||||
"omitted",
|
||||
"on",
|
||||
"once",
|
||||
"one",
|
||||
"ones",
|
||||
"only",
|
||||
"onto",
|
||||
"or",
|
||||
"ord",
|
||||
"other",
|
||||
"others",
|
||||
"otherwise",
|
||||
"ought",
|
||||
"our",
|
||||
"ours",
|
||||
"ourselves",
|
||||
"out",
|
||||
"outside",
|
||||
"over",
|
||||
"overall",
|
||||
"owing",
|
||||
"own",
|
||||
"p",
|
||||
"page",
|
||||
"pages",
|
||||
"part",
|
||||
"particular",
|
||||
"particularly",
|
||||
"past",
|
||||
"per",
|
||||
"perhaps",
|
||||
"placed",
|
||||
"please",
|
||||
"plus",
|
||||
"poorly",
|
||||
"possible",
|
||||
"possibly",
|
||||
"potentially",
|
||||
"pp",
|
||||
"predominantly",
|
||||
"present",
|
||||
"previously",
|
||||
"primarily",
|
||||
"probably",
|
||||
"promptly",
|
||||
"proud",
|
||||
"provides",
|
||||
"put",
|
||||
"q",
|
||||
"que",
|
||||
"quickly",
|
||||
"quite",
|
||||
"qv",
|
||||
"r",
|
||||
"ran",
|
||||
"rather",
|
||||
"rd",
|
||||
"re",
|
||||
"readily",
|
||||
"really",
|
||||
"recent",
|
||||
"recently",
|
||||
"ref",
|
||||
"refs",
|
||||
"regarding",
|
||||
"regardless",
|
||||
"regards",
|
||||
"related",
|
||||
"relatively",
|
||||
"research",
|
||||
"respectively",
|
||||
"resulted",
|
||||
"resulting",
|
||||
"results",
|
||||
"right",
|
||||
"run",
|
||||
"s",
|
||||
"said",
|
||||
"same",
|
||||
"saw",
|
||||
"say",
|
||||
"saying",
|
||||
"says",
|
||||
"sec",
|
||||
"section",
|
||||
"see",
|
||||
"seeing",
|
||||
"seem",
|
||||
"seemed",
|
||||
"seeming",
|
||||
"seems",
|
||||
"seen",
|
||||
"self",
|
||||
"selves",
|
||||
"sent",
|
||||
"seven",
|
||||
"several",
|
||||
"shall",
|
||||
"she",
|
||||
"shed",
|
||||
"she'll",
|
||||
"shell",
|
||||
"shes",
|
||||
"should",
|
||||
"shouldn't",
|
||||
"shouldnt",
|
||||
"show",
|
||||
"showed",
|
||||
"shown",
|
||||
"showns",
|
||||
"shows",
|
||||
"significant",
|
||||
"significantly",
|
||||
"similar",
|
||||
"similarly",
|
||||
"since",
|
||||
"six",
|
||||
"slightly",
|
||||
"so",
|
||||
"some",
|
||||
"somebody",
|
||||
"somehow",
|
||||
"someone",
|
||||
"somethan",
|
||||
"something",
|
||||
"sometime",
|
||||
"sometimes",
|
||||
"somewhat",
|
||||
"somewhere",
|
||||
"soon",
|
||||
"sorry",
|
||||
"specifically",
|
||||
"specified",
|
||||
"specify",
|
||||
"specifying",
|
||||
"state",
|
||||
"states",
|
||||
"still",
|
||||
"stop",
|
||||
"strongly",
|
||||
"sub",
|
||||
"substantially",
|
||||
"successfully",
|
||||
"such",
|
||||
"sufficiently",
|
||||
"suggest",
|
||||
"sup",
|
||||
"sure",
|
||||
"t",
|
||||
"take",
|
||||
"taken",
|
||||
"taking",
|
||||
"tell",
|
||||
"tends",
|
||||
"th",
|
||||
"than",
|
||||
"thank",
|
||||
"thanks",
|
||||
"thanx",
|
||||
"that",
|
||||
"that'll",
|
||||
"thatll",
|
||||
"thats",
|
||||
"that've",
|
||||
"thatve",
|
||||
"the",
|
||||
"their",
|
||||
"theirs",
|
||||
"them",
|
||||
"themselves",
|
||||
"then",
|
||||
"thence",
|
||||
"there",
|
||||
"thereafter",
|
||||
"thereby",
|
||||
"thered",
|
||||
"therefore",
|
||||
"therein",
|
||||
"there'll",
|
||||
"therell",
|
||||
"thereof",
|
||||
"therere",
|
||||
"theres",
|
||||
"thereto",
|
||||
"thereupon",
|
||||
"there've",
|
||||
"thereve",
|
||||
"these",
|
||||
"they",
|
||||
"theyd",
|
||||
"they'll",
|
||||
"theyll",
|
||||
"theyre",
|
||||
"they've",
|
||||
"theyve",
|
||||
"think",
|
||||
"this",
|
||||
"those",
|
||||
"thou",
|
||||
"though",
|
||||
"thoughh",
|
||||
"thousand",
|
||||
"throug",
|
||||
"through",
|
||||
"throughout",
|
||||
"thru",
|
||||
"thus",
|
||||
"til",
|
||||
"tip",
|
||||
"to",
|
||||
"together",
|
||||
"too",
|
||||
"took",
|
||||
"toward",
|
||||
"towards",
|
||||
"tried",
|
||||
"tries",
|
||||
"truly",
|
||||
"try",
|
||||
"trying",
|
||||
"ts",
|
||||
"twice",
|
||||
"two",
|
||||
"u",
|
||||
"un",
|
||||
"under",
|
||||
"unfortunately",
|
||||
"unless",
|
||||
"unlike",
|
||||
"unlikely",
|
||||
"until",
|
||||
"unto",
|
||||
"up",
|
||||
"upon",
|
||||
"ups",
|
||||
"us",
|
||||
"use",
|
||||
"used",
|
||||
"useful",
|
||||
"usefully",
|
||||
"usefulness",
|
||||
"uses",
|
||||
"using",
|
||||
"usually",
|
||||
"v",
|
||||
"value",
|
||||
"various",
|
||||
"'ve",
|
||||
"'ve",
|
||||
"very",
|
||||
"via",
|
||||
"viz",
|
||||
"vol",
|
||||
"vols",
|
||||
"vs",
|
||||
"w",
|
||||
"want",
|
||||
"wants",
|
||||
"was",
|
||||
"wasn't",
|
||||
"wasnt",
|
||||
"way",
|
||||
"we",
|
||||
"wed",
|
||||
"welcome",
|
||||
"we'll",
|
||||
"well",
|
||||
"went",
|
||||
"were",
|
||||
"weren't",
|
||||
"werent",
|
||||
"we've",
|
||||
"weve",
|
||||
"what",
|
||||
"whatever",
|
||||
"what'll",
|
||||
"whatll",
|
||||
"whats",
|
||||
"when",
|
||||
"whence",
|
||||
"whenever",
|
||||
"where",
|
||||
"whereafter",
|
||||
"whereas",
|
||||
"whereby",
|
||||
"wherein",
|
||||
"wheres",
|
||||
"whereupon",
|
||||
"wherever",
|
||||
"whether",
|
||||
"which",
|
||||
"while",
|
||||
"whim",
|
||||
"whither",
|
||||
"who",
|
||||
"whod",
|
||||
"whoever",
|
||||
"whole",
|
||||
"who'll",
|
||||
"wholl",
|
||||
"whom",
|
||||
"whomever",
|
||||
"whos",
|
||||
"whose",
|
||||
"why",
|
||||
"widely",
|
||||
"will",
|
||||
"willing",
|
||||
"wish",
|
||||
"with",
|
||||
"within",
|
||||
"without",
|
||||
"won't",
|
||||
"wont",
|
||||
"words",
|
||||
"would",
|
||||
"wouldn't",
|
||||
"wouldnt",
|
||||
"www",
|
||||
"x",
|
||||
"y",
|
||||
"yes",
|
||||
"yet",
|
||||
"you",
|
||||
"youd",
|
||||
"you'll",
|
||||
"youll",
|
||||
"your",
|
||||
"youre",
|
||||
"yours",
|
||||
"yourself",
|
||||
"yourselves",
|
||||
"you've",
|
||||
"youve",
|
||||
"z",
|
||||
"zero"
|
||||
]
|
||||
}
|
|
@ -1,701 +0,0 @@
|
|||
a
|
||||
able
|
||||
about
|
||||
above
|
||||
abst
|
||||
accordance
|
||||
according
|
||||
accordingly
|
||||
across
|
||||
act
|
||||
actually
|
||||
added
|
||||
adj
|
||||
adopted
|
||||
affected
|
||||
affecting
|
||||
affects
|
||||
after
|
||||
afterwards
|
||||
again
|
||||
against
|
||||
ah
|
||||
all
|
||||
almost
|
||||
alone
|
||||
along
|
||||
already
|
||||
also
|
||||
although
|
||||
always
|
||||
am
|
||||
among
|
||||
amongst
|
||||
an
|
||||
and
|
||||
announce
|
||||
another
|
||||
any
|
||||
anybody
|
||||
anyhow
|
||||
anymore
|
||||
anyone
|
||||
anything
|
||||
anyway
|
||||
anyways
|
||||
anywhere
|
||||
apparently
|
||||
approximately
|
||||
are
|
||||
aren
|
||||
arent
|
||||
arise
|
||||
around
|
||||
as
|
||||
aside
|
||||
ask
|
||||
asking
|
||||
at
|
||||
auth
|
||||
available
|
||||
away
|
||||
awfully
|
||||
b
|
||||
back
|
||||
be
|
||||
became
|
||||
because
|
||||
become
|
||||
becomes
|
||||
becoming
|
||||
been
|
||||
before
|
||||
beforehand
|
||||
begin
|
||||
beginning
|
||||
beginnings
|
||||
begins
|
||||
behind
|
||||
being
|
||||
believe
|
||||
below
|
||||
beside
|
||||
besides
|
||||
between
|
||||
beyond
|
||||
biol
|
||||
both
|
||||
brief
|
||||
briefly
|
||||
but
|
||||
by
|
||||
c
|
||||
ca
|
||||
came
|
||||
can
|
||||
cannot
|
||||
can't
|
||||
cant
|
||||
cause
|
||||
causes
|
||||
certain
|
||||
certainly
|
||||
co
|
||||
com
|
||||
come
|
||||
comes
|
||||
contain
|
||||
containing
|
||||
contains
|
||||
could
|
||||
couldnt
|
||||
d
|
||||
date
|
||||
did
|
||||
didn't
|
||||
didnt
|
||||
different
|
||||
do
|
||||
does
|
||||
doesn't
|
||||
doesnt
|
||||
doing
|
||||
done
|
||||
don't
|
||||
dont
|
||||
down
|
||||
downwards
|
||||
due
|
||||
during
|
||||
e
|
||||
each
|
||||
ed
|
||||
edu
|
||||
effect
|
||||
eg
|
||||
eight
|
||||
eighty
|
||||
either
|
||||
else
|
||||
elsewhere
|
||||
end
|
||||
ending
|
||||
enough
|
||||
especially
|
||||
et
|
||||
et-al
|
||||
etc
|
||||
even
|
||||
ever
|
||||
every
|
||||
everybody
|
||||
everyone
|
||||
everything
|
||||
everywhere
|
||||
ex
|
||||
except
|
||||
f
|
||||
far
|
||||
few
|
||||
ff
|
||||
fifth
|
||||
first
|
||||
five
|
||||
fix
|
||||
followed
|
||||
following
|
||||
follows
|
||||
for
|
||||
former
|
||||
formerly
|
||||
forth
|
||||
found
|
||||
four
|
||||
from
|
||||
further
|
||||
furthermore
|
||||
g
|
||||
gave
|
||||
get
|
||||
gets
|
||||
getting
|
||||
give
|
||||
given
|
||||
gives
|
||||
giving
|
||||
go
|
||||
goes
|
||||
gone
|
||||
got
|
||||
gotten
|
||||
h
|
||||
had
|
||||
happens
|
||||
hardly
|
||||
has
|
||||
hasn't
|
||||
hasnt
|
||||
have
|
||||
haven't
|
||||
havent
|
||||
having
|
||||
he
|
||||
hed
|
||||
hence
|
||||
her
|
||||
here
|
||||
hereafter
|
||||
hereby
|
||||
herein
|
||||
heres
|
||||
hereupon
|
||||
hers
|
||||
herself
|
||||
hes
|
||||
hi
|
||||
hid
|
||||
him
|
||||
himself
|
||||
his
|
||||
hither
|
||||
home
|
||||
how
|
||||
howbeit
|
||||
however
|
||||
hundred
|
||||
i
|
||||
id
|
||||
ie
|
||||
if
|
||||
i'll
|
||||
ill
|
||||
im
|
||||
immediate
|
||||
immediately
|
||||
importance
|
||||
important
|
||||
in
|
||||
inc
|
||||
indeed
|
||||
index
|
||||
information
|
||||
instead
|
||||
into
|
||||
invention
|
||||
inward
|
||||
is
|
||||
isn't
|
||||
isnt
|
||||
it
|
||||
itd
|
||||
it'll
|
||||
itll
|
||||
its
|
||||
itself
|
||||
i've
|
||||
ive
|
||||
j
|
||||
just
|
||||
k
|
||||
keep
|
||||
keeps
|
||||
kept
|
||||
keys
|
||||
kg
|
||||
km
|
||||
know
|
||||
known
|
||||
knows
|
||||
l
|
||||
largely
|
||||
last
|
||||
lately
|
||||
later
|
||||
latter
|
||||
latterly
|
||||
least
|
||||
less
|
||||
lest
|
||||
let
|
||||
lets
|
||||
like
|
||||
liked
|
||||
likely
|
||||
line
|
||||
little
|
||||
'll
|
||||
'll
|
||||
look
|
||||
looking
|
||||
looks
|
||||
ltd
|
||||
m
|
||||
made
|
||||
mainly
|
||||
make
|
||||
makes
|
||||
many
|
||||
may
|
||||
maybe
|
||||
me
|
||||
mean
|
||||
means
|
||||
meantime
|
||||
meanwhile
|
||||
merely
|
||||
mg
|
||||
might
|
||||
million
|
||||
miss
|
||||
ml
|
||||
more
|
||||
moreover
|
||||
most
|
||||
mostly
|
||||
mr
|
||||
mrs
|
||||
much
|
||||
mug
|
||||
must
|
||||
my
|
||||
myself
|
||||
n
|
||||
na
|
||||
name
|
||||
namely
|
||||
nay
|
||||
nd
|
||||
near
|
||||
nearly
|
||||
necessarily
|
||||
necessary
|
||||
need
|
||||
needs
|
||||
neither
|
||||
never
|
||||
nevertheless
|
||||
new
|
||||
next
|
||||
nine
|
||||
ninety
|
||||
no
|
||||
nobody
|
||||
non
|
||||
none
|
||||
nonetheless
|
||||
noone
|
||||
nor
|
||||
normally
|
||||
nos
|
||||
not
|
||||
noted
|
||||
nothing
|
||||
now
|
||||
nowhere
|
||||
o
|
||||
obtain
|
||||
obtained
|
||||
obviously
|
||||
of
|
||||
off
|
||||
often
|
||||
oh
|
||||
ok
|
||||
okay
|
||||
old
|
||||
omitted
|
||||
on
|
||||
once
|
||||
one
|
||||
ones
|
||||
only
|
||||
onto
|
||||
or
|
||||
ord
|
||||
other
|
||||
others
|
||||
otherwise
|
||||
ought
|
||||
our
|
||||
ours
|
||||
ourselves
|
||||
out
|
||||
outside
|
||||
over
|
||||
overall
|
||||
owing
|
||||
own
|
||||
p
|
||||
page
|
||||
pages
|
||||
part
|
||||
particular
|
||||
particularly
|
||||
past
|
||||
per
|
||||
perhaps
|
||||
placed
|
||||
please
|
||||
plus
|
||||
poorly
|
||||
possible
|
||||
possibly
|
||||
potentially
|
||||
pp
|
||||
predominantly
|
||||
present
|
||||
previously
|
||||
primarily
|
||||
probably
|
||||
promptly
|
||||
proud
|
||||
provides
|
||||
put
|
||||
q
|
||||
que
|
||||
quickly
|
||||
quite
|
||||
qv
|
||||
r
|
||||
ran
|
||||
rather
|
||||
rd
|
||||
re
|
||||
readily
|
||||
really
|
||||
recent
|
||||
recently
|
||||
ref
|
||||
refs
|
||||
regarding
|
||||
regardless
|
||||
regards
|
||||
related
|
||||
relatively
|
||||
research
|
||||
respectively
|
||||
resulted
|
||||
resulting
|
||||
results
|
||||
right
|
||||
run
|
||||
s
|
||||
said
|
||||
same
|
||||
saw
|
||||
say
|
||||
saying
|
||||
says
|
||||
sec
|
||||
section
|
||||
see
|
||||
seeing
|
||||
seem
|
||||
seemed
|
||||
seeming
|
||||
seems
|
||||
seen
|
||||
self
|
||||
selves
|
||||
sent
|
||||
seven
|
||||
several
|
||||
shall
|
||||
she
|
||||
shed
|
||||
she'll
|
||||
shell
|
||||
shes
|
||||
should
|
||||
shouldn't
|
||||
shouldnt
|
||||
show
|
||||
showed
|
||||
shown
|
||||
showns
|
||||
shows
|
||||
significant
|
||||
significantly
|
||||
similar
|
||||
similarly
|
||||
since
|
||||
six
|
||||
slightly
|
||||
so
|
||||
some
|
||||
somebody
|
||||
somehow
|
||||
someone
|
||||
somethan
|
||||
something
|
||||
sometime
|
||||
sometimes
|
||||
somewhat
|
||||
somewhere
|
||||
soon
|
||||
sorry
|
||||
specifically
|
||||
specified
|
||||
specify
|
||||
specifying
|
||||
state
|
||||
states
|
||||
still
|
||||
stop
|
||||
strongly
|
||||
sub
|
||||
substantially
|
||||
successfully
|
||||
such
|
||||
sufficiently
|
||||
suggest
|
||||
sup
|
||||
sure
|
||||
t
|
||||
take
|
||||
taken
|
||||
taking
|
||||
tell
|
||||
tends
|
||||
th
|
||||
than
|
||||
thank
|
||||
thanks
|
||||
thanx
|
||||
that
|
||||
that'll
|
||||
thatll
|
||||
thats
|
||||
that've
|
||||
thatve
|
||||
the
|
||||
their
|
||||
theirs
|
||||
them
|
||||
themselves
|
||||
then
|
||||
thence
|
||||
there
|
||||
thereafter
|
||||
thereby
|
||||
thered
|
||||
therefore
|
||||
therein
|
||||
there'll
|
||||
therell
|
||||
thereof
|
||||
therere
|
||||
theres
|
||||
thereto
|
||||
thereupon
|
||||
there've
|
||||
thereve
|
||||
these
|
||||
they
|
||||
theyd
|
||||
they'll
|
||||
theyll
|
||||
theyre
|
||||
they've
|
||||
theyve
|
||||
think
|
||||
this
|
||||
those
|
||||
thou
|
||||
though
|
||||
thoughh
|
||||
thousand
|
||||
throug
|
||||
through
|
||||
throughout
|
||||
thru
|
||||
thus
|
||||
til
|
||||
tip
|
||||
to
|
||||
together
|
||||
too
|
||||
took
|
||||
toward
|
||||
towards
|
||||
tried
|
||||
tries
|
||||
truly
|
||||
try
|
||||
trying
|
||||
ts
|
||||
twice
|
||||
two
|
||||
u
|
||||
un
|
||||
under
|
||||
unfortunately
|
||||
unless
|
||||
unlike
|
||||
unlikely
|
||||
until
|
||||
unto
|
||||
up
|
||||
upon
|
||||
ups
|
||||
us
|
||||
use
|
||||
used
|
||||
useful
|
||||
usefully
|
||||
usefulness
|
||||
uses
|
||||
using
|
||||
usually
|
||||
v
|
||||
value
|
||||
various
|
||||
've
|
||||
've
|
||||
very
|
||||
via
|
||||
viz
|
||||
vol
|
||||
vols
|
||||
vs
|
||||
w
|
||||
want
|
||||
wants
|
||||
was
|
||||
wasn't
|
||||
wasnt
|
||||
way
|
||||
we
|
||||
wed
|
||||
welcome
|
||||
we'll
|
||||
well
|
||||
went
|
||||
were
|
||||
weren't
|
||||
werent
|
||||
we've
|
||||
weve
|
||||
what
|
||||
whatever
|
||||
what'll
|
||||
whatll
|
||||
whats
|
||||
when
|
||||
whence
|
||||
whenever
|
||||
where
|
||||
whereafter
|
||||
whereas
|
||||
whereby
|
||||
wherein
|
||||
wheres
|
||||
whereupon
|
||||
wherever
|
||||
whether
|
||||
which
|
||||
while
|
||||
whim
|
||||
whither
|
||||
who
|
||||
whod
|
||||
whoever
|
||||
whole
|
||||
who'll
|
||||
wholl
|
||||
whom
|
||||
whomever
|
||||
whos
|
||||
whose
|
||||
why
|
||||
widely
|
||||
will
|
||||
willing
|
||||
wish
|
||||
with
|
||||
within
|
||||
without
|
||||
won't
|
||||
wont
|
||||
words
|
||||
would
|
||||
wouldn't
|
||||
wouldnt
|
||||
www
|
||||
x
|
||||
y
|
||||
yes
|
||||
yet
|
||||
you
|
||||
youd
|
||||
you'll
|
||||
youll
|
||||
your
|
||||
youre
|
||||
yours
|
||||
yourself
|
||||
yourselves
|
||||
you've
|
||||
youve
|
||||
z
|
||||
zero
|
|
@ -65,9 +65,9 @@ module.exports = new Package('angular-base', [
|
|||
readFilesProcessor.sourceFiles = [];
|
||||
collectExamples.exampleFolders = [];
|
||||
|
||||
generateKeywordsProcessor.ignoreWordsFile = path.resolve(__dirname, 'ignore.words');
|
||||
generateKeywordsProcessor.ignoreWords = require(path.resolve(__dirname, 'ignore-words'))['en'];
|
||||
generateKeywordsProcessor.docTypesToIgnore = ['example-region'];
|
||||
generateKeywordsProcessor.propertiesToIgnore = ['basePath', 'renderedContent'];
|
||||
generateKeywordsProcessor.propertiesToIgnore = ['basePath', 'renderedContent', 'docType', 'searchTitle'];
|
||||
})
|
||||
|
||||
// Where do we write the output files?
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
'use strict';
|
||||
|
||||
var fs = require('fs');
|
||||
var path = require('canonical-path');
|
||||
const stem = require('stemmer');
|
||||
|
||||
/**
|
||||
* @dgProcessor generateKeywordsProcessor
|
||||
|
@ -10,103 +9,98 @@ var path = require('canonical-path');
|
|||
* a new document that will be rendered as a JavaScript file containing all
|
||||
* this data.
|
||||
*/
|
||||
module.exports = function generateKeywordsProcessor(log, readFilesProcessor) {
|
||||
module.exports = function generateKeywordsProcessor(log) {
|
||||
return {
|
||||
ignoreWordsFile: undefined,
|
||||
ignoreWords: [],
|
||||
propertiesToIgnore: [],
|
||||
docTypesToIgnore: [],
|
||||
outputFolder: '',
|
||||
$validate: {
|
||||
ignoreWordsFile: {},
|
||||
ignoreWords: {},
|
||||
docTypesToIgnore: {},
|
||||
propertiesToIgnore: {},
|
||||
outputFolder: {presence: true}
|
||||
},
|
||||
$runAfter: ['postProcessHtml'],
|
||||
$runBefore: ['writing-files'],
|
||||
$process: function(docs) {
|
||||
$process(docs) {
|
||||
|
||||
const dictionary = new Map();
|
||||
|
||||
// Keywords to ignore
|
||||
var wordsToIgnore = [];
|
||||
var propertiesToIgnore;
|
||||
var docTypesToIgnore;
|
||||
|
||||
// Load up the keywords to ignore, if specified in the config
|
||||
if (this.ignoreWordsFile) {
|
||||
var ignoreWordsPath = path.resolve(readFilesProcessor.basePath, this.ignoreWordsFile);
|
||||
wordsToIgnore = fs.readFileSync(ignoreWordsPath, 'utf8').toString().split(/[,\s\n\r]+/gm);
|
||||
|
||||
log.debug('Loaded ignore words from "' + ignoreWordsPath + '"');
|
||||
log.silly(wordsToIgnore);
|
||||
}
|
||||
|
||||
propertiesToIgnore = convertToMap(this.propertiesToIgnore);
|
||||
const ignoreWords = new Set(this.ignoreWords);
|
||||
log.debug('Words to ignore', ignoreWords);
|
||||
const propertiesToIgnore = new Set(this.propertiesToIgnore);
|
||||
log.debug('Properties to ignore', propertiesToIgnore);
|
||||
docTypesToIgnore = convertToMap(this.docTypesToIgnore);
|
||||
const docTypesToIgnore = new Set(this.docTypesToIgnore);
|
||||
log.debug('Doc types to ignore', docTypesToIgnore);
|
||||
|
||||
var ignoreWordsMap = convertToMap(wordsToIgnore);
|
||||
|
||||
const filteredDocs = docs
|
||||
// We are not interested in some docTypes
|
||||
.filter(function(doc) { return !docTypesToIgnore[doc.docType]; })
|
||||
.filter(doc => !docTypesToIgnore.has(doc.docType))
|
||||
// Ignore internals and private exports (indicated by the ɵ prefix)
|
||||
.filter(function(doc) { return !doc.internal && !doc.privateExport; });
|
||||
.filter(doc => !doc.internal && !doc.privateExport);
|
||||
|
||||
|
||||
filteredDocs.forEach(function(doc) {
|
||||
|
||||
var words = [];
|
||||
var keywordMap = Object.assign({}, ignoreWordsMap);
|
||||
var members = [];
|
||||
var membersMap = Object.assign({}, ignoreWordsMap);
|
||||
const headingWords = [];
|
||||
const headingWordMap = Object.assign({}, ignoreWordsMap);
|
||||
|
||||
for(const doc of filteredDocs) {
|
||||
// Search each top level property of the document for search terms
|
||||
Object.keys(doc).forEach(function(key) {
|
||||
let mainTokens = [];
|
||||
for(const key of Object.keys(doc)) {
|
||||
const value = doc[key];
|
||||
|
||||
if (isString(value) && !propertiesToIgnore[key]) {
|
||||
extractWords(value, words, keywordMap);
|
||||
if (isString(value) && !propertiesToIgnore.has(key)) {
|
||||
mainTokens.push(...tokenize(value, ignoreWords, dictionary));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
extractMemberWords(doc, members, membersMap);
|
||||
const memberTokens = extractMemberTokens(doc, ignoreWords, dictionary);
|
||||
|
||||
// Extract all the keywords from the headings
|
||||
let headingTokens = [];
|
||||
if (doc.vFile && doc.vFile.headings) {
|
||||
Object.keys(doc.vFile.headings).forEach(function(headingTag) {
|
||||
doc.vFile.headings[headingTag].forEach(function(headingText) {
|
||||
extractWords(headingText, headingWords, headingWordMap);
|
||||
});
|
||||
});
|
||||
for(const headingTag of Object.keys(doc.vFile.headings)) {
|
||||
for(const headingText of doc.vFile.headings[headingTag]) {
|
||||
headingTokens.push(...tokenize(headingText, ignoreWords, dictionary));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Extract the title to use in searches
|
||||
doc.searchTitle = doc.searchTitle || doc.title || doc.vFile && doc.vFile.title || doc.name || '';
|
||||
|
||||
// Attach all this search data to the document
|
||||
doc.searchTerms = {
|
||||
titleWords: tokenize(doc.searchTitle).join(' '),
|
||||
headingWords: headingWords.sort().join(' '),
|
||||
keywords: words.sort().join(' '),
|
||||
members: members.sort().join(' '),
|
||||
topics: doc.searchKeywords
|
||||
};
|
||||
|
||||
});
|
||||
doc.searchTerms = {};
|
||||
if (headingTokens.length > 0) {
|
||||
doc.searchTerms.headings = headingTokens;
|
||||
}
|
||||
if (mainTokens.length > 0) {
|
||||
doc.searchTerms.keywords = mainTokens;
|
||||
}
|
||||
if (memberTokens.length > 0) {
|
||||
doc.searchTerms.members = memberTokens;
|
||||
}
|
||||
if (doc.searchKeywords) {
|
||||
doc.searchTerms.topics = doc.searchKeywords.trim();
|
||||
}
|
||||
}
|
||||
|
||||
// Now process all the search data and collect it up to be used in creating a new document
|
||||
var searchData = filteredDocs.map(function(page) {
|
||||
const searchData = {
|
||||
dictionary: Array.from(dictionary.keys()),
|
||||
pages: filteredDocs.map(page => {
|
||||
// Copy the properties from the searchTerms object onto the search data object
|
||||
return Object.assign({
|
||||
const searchObj = {
|
||||
path: page.path,
|
||||
title: page.searchTitle,
|
||||
type: page.docType,
|
||||
deprecated: !!page.deprecated,
|
||||
}, page.searchTerms);
|
||||
});
|
||||
};
|
||||
if (page.deprecated) {
|
||||
searchObj.deprecated = true;
|
||||
}
|
||||
return Object.assign(searchObj, page.searchTerms);
|
||||
}),
|
||||
};
|
||||
|
||||
docs.push({
|
||||
docType: 'json-doc',
|
||||
|
@ -120,63 +114,64 @@ module.exports = function generateKeywordsProcessor(log, readFilesProcessor) {
|
|||
};
|
||||
};
|
||||
|
||||
|
||||
function isString(value) {
|
||||
return typeof value == 'string';
|
||||
}
|
||||
|
||||
function convertToMap(collection) {
|
||||
const obj = {};
|
||||
collection.forEach(key => { obj[key] = true; });
|
||||
return obj;
|
||||
function tokenize(text, ignoreWords, dictionary) {
|
||||
// Split on whitespace and things that are likely to be HTML tags (this is not exhaustive but reduces the unwanted tokens that are indexed).
|
||||
const rawTokens = text.split(/[\s\/]+|<\/?[a-z]+(?:\s+\w+(?:="[^"]+")?)*>/img);
|
||||
const tokens = [];
|
||||
for(let token of rawTokens) {
|
||||
token = token.trim();
|
||||
|
||||
// Strip off unwanted trivial characters
|
||||
token = token.replace(/^[_\-"'`({[<$*)}\]>.]+/, '').replace(/[_\-"'`({[<$*)}\]>.]+$/, '');
|
||||
|
||||
// Skip if in the ignored words list
|
||||
if (ignoreWords.has(token.toLowerCase())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the heading contains a name starting with ng, e.g. "ngController", then add the
|
||||
// name without the ng to the text, e.g. "controller".
|
||||
function tokenize(text) {
|
||||
const rawTokens = text.split(/[\s\/]+/mg);
|
||||
const tokens = [];
|
||||
rawTokens.forEach(token => {
|
||||
// Strip off unwanted trivial characters
|
||||
token = token
|
||||
.trim()
|
||||
.replace(/^[_\-"'`({[<$*)}\]>.]+/, '')
|
||||
.replace(/[_\-"'`({[<$*)}\]>.]+$/, '');
|
||||
// Ignore tokens that contain weird characters
|
||||
if (/^[\w.\-]+$/.test(token)) {
|
||||
tokens.push(token.toLowerCase());
|
||||
const ngTokenMatch = /^[nN]g([A-Z]\w*)/.exec(token);
|
||||
if (ngTokenMatch) {
|
||||
tokens.push(ngTokenMatch[1].toLowerCase());
|
||||
// Skip tokens that contain weird characters
|
||||
if (!/^[\w._-]+$/.test(token)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
storeToken(token, tokens, dictionary);
|
||||
if (token.startsWith('ng')) {
|
||||
storeToken(token.substr(2), tokens, dictionary);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
function extractWords(text, words, keywordMap) {
|
||||
var tokens = tokenize(text);
|
||||
tokens.forEach(function(token) {
|
||||
if (!keywordMap[token]) {
|
||||
words.push(token);
|
||||
keywordMap[token] = true;
|
||||
function storeToken(token, tokens, dictionary) {
|
||||
token = stem(token);
|
||||
if (!dictionary.has(token)) {
|
||||
dictionary.set(token, dictionary.size);
|
||||
}
|
||||
});
|
||||
tokens.push(dictionary.get(token));
|
||||
}
|
||||
|
||||
function extractMemberWords(doc, members, membersMap) {
|
||||
if (!doc) return;
|
||||
function extractMemberTokens(doc, ignoreWords, dictionary) {
|
||||
if (!doc) return '';
|
||||
|
||||
let memberContent = [];
|
||||
|
||||
if (doc.members) {
|
||||
doc.members.forEach(member => extractWords(member.name, members, membersMap));
|
||||
doc.members.forEach(member => memberContent.push(...tokenize(member.name, ignoreWords, dictionary)));
|
||||
}
|
||||
if (doc.statics) {
|
||||
doc.statics.forEach(member => extractWords(member.name, members, membersMap));
|
||||
doc.statics.forEach(member => memberContent.push(...tokenize(member.name, ignoreWords, dictionary)));
|
||||
}
|
||||
if (doc.extendsClauses) {
|
||||
doc.extendsClauses.forEach(clause => extractMemberWords(clause.doc, members, membersMap));
|
||||
doc.extendsClauses.forEach(clause => memberContent.push(...extractMemberTokens(clause.doc, ignoreWords, dictionary)));
|
||||
}
|
||||
if (doc.implementsClauses) {
|
||||
doc.implementsClauses.forEach(clause => extractMemberWords(clause.doc, members, membersMap));
|
||||
doc.implementsClauses.forEach(clause => memberContent.push(...extractMemberTokens(clause.doc, ignoreWords, dictionary)));
|
||||
}
|
||||
|
||||
return memberContent;
|
||||
}
|
|
@ -1,12 +1,22 @@
|
|||
const path = require('canonical-path');
|
||||
const Dgeni = require('dgeni');
|
||||
|
||||
const testPackage = require('../../helpers/test-package');
|
||||
const mockLogger = require('dgeni/lib/mocks/log')(false);
|
||||
const processorFactory = require('./generateKeywords');
|
||||
const Dgeni = require('dgeni');
|
||||
|
||||
const mockReadFilesProcessor = {
|
||||
basePath: 'base/path'
|
||||
};
|
||||
|
||||
const ignoreWords = require(path.resolve(__dirname, '../ignore-words'))['en'];
|
||||
|
||||
function createProcessor() {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
processor.ignoreWords = ignoreWords;
|
||||
return processor;
|
||||
}
|
||||
|
||||
describe('generateKeywords processor', () => {
|
||||
|
||||
it('should be available on the injector', () => {
|
||||
|
@ -17,30 +27,81 @@ describe('generateKeywords processor', () => {
|
|||
});
|
||||
|
||||
it('should run after the correct processor', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const processor = createProcessor();
|
||||
expect(processor.$runAfter).toEqual(['postProcessHtml']);
|
||||
});
|
||||
|
||||
it('should run before the correct processor', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const processor = createProcessor();
|
||||
expect(processor.$runBefore).toEqual(['writing-files']);
|
||||
});
|
||||
|
||||
it('should ignore internal and private exports', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const processor = createProcessor();
|
||||
const docs = [
|
||||
{ docType: 'class', name: 'PublicExport' },
|
||||
{ docType: 'class', name: 'PrivateExport', privateExport: true },
|
||||
{ docType: 'class', name: 'InternalExport', internal: true }
|
||||
];
|
||||
processor.$process(docs);
|
||||
expect(docs[docs.length - 1].data).toEqual([
|
||||
expect(docs[docs.length - 1].data.pages).toEqual([
|
||||
jasmine.objectContaining({ title: 'PublicExport', type: 'class' })
|
||||
]);
|
||||
});
|
||||
|
||||
it('should ignore docs that are in the `docTypesToIgnore` list', () => {
|
||||
const processor = createProcessor();
|
||||
processor.docTypesToIgnore = ['interface'];
|
||||
const docs = [
|
||||
{ docType: 'class', name: 'Class' },
|
||||
{ docType: 'interface', name: 'Interface' },
|
||||
{ docType: 'content', name: 'Guide' },
|
||||
];
|
||||
processor.$process(docs);
|
||||
expect(docs[docs.length - 1].data.pages).toEqual([
|
||||
jasmine.objectContaining({ title: 'Class', type: 'class' }),
|
||||
jasmine.objectContaining({ title: 'Guide', type: 'content' }),
|
||||
]);
|
||||
});
|
||||
|
||||
it('should not collect keywords from properties that are in the `propertiesToIgnore` list', () => {
|
||||
const processor = createProcessor();
|
||||
processor.propertiesToIgnore = ['docType', 'ignore'];
|
||||
const docs = [
|
||||
{ docType: 'class', name: 'FooClass', ignore: 'ignore this content' },
|
||||
{ docType: 'interface', name: 'BarInterface', capture: 'capture this content' },
|
||||
];
|
||||
processor.$process(docs);
|
||||
expect(docs[docs.length - 1].data).toEqual({
|
||||
dictionary: [ 'fooclass', 'barinterfac', 'captur', 'content' ],
|
||||
pages: [
|
||||
jasmine.objectContaining({ title: 'FooClass', type: 'class', keywords: [0] }),
|
||||
jasmine.objectContaining({ title: 'BarInterface', type: 'interface', keywords: [1, 2, 3] }),
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should not collect keywords that look like HTML tags', () => {
|
||||
const processor = createProcessor();
|
||||
const docs = [
|
||||
{ docType: 'class', name: 'FooClass', content: `
|
||||
<table id="foo">
|
||||
<tr class="moo" id="bar">
|
||||
<td>Content inside a table</td>
|
||||
</tr>
|
||||
</table>` },
|
||||
];
|
||||
processor.$process(docs);
|
||||
expect(docs[docs.length - 1].data).toEqual({
|
||||
dictionary: ['class', 'fooclass', 'content', 'insid', 'tabl'],
|
||||
pages: [
|
||||
jasmine.objectContaining({keywords: [0, 1, 2, 3, 4] })
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should compute `doc.searchTitle` from the doc properties if not already provided', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const processor = createProcessor();
|
||||
const docs = [
|
||||
{ docType: 'class', name: 'A', searchTitle: 'searchTitle A', title: 'title A', vFile: { headings: { h1: ['vFile A'] } } },
|
||||
{ docType: 'class', name: 'B', title: 'title B', vFile: { headings: { h1: ['vFile B'] } } },
|
||||
|
@ -48,7 +109,7 @@ describe('generateKeywords processor', () => {
|
|||
{ docType: 'class', name: 'D' },
|
||||
];
|
||||
processor.$process(docs);
|
||||
expect(docs[docs.length - 1].data).toEqual([
|
||||
expect(docs[docs.length - 1].data.pages).toEqual([
|
||||
jasmine.objectContaining({ title: 'searchTitle A' }),
|
||||
jasmine.objectContaining({ title: 'title B' }),
|
||||
jasmine.objectContaining({ title: 'vFile C' }),
|
||||
|
@ -57,34 +118,19 @@ describe('generateKeywords processor', () => {
|
|||
});
|
||||
|
||||
it('should use `doc.searchTitle` as the title in the search index', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const processor = createProcessor();
|
||||
const docs = [
|
||||
{ docType: 'class', name: 'PublicExport', searchTitle: 'class PublicExport' },
|
||||
];
|
||||
processor.$process(docs);
|
||||
const keywordsDoc = docs[docs.length - 1];
|
||||
expect(keywordsDoc.data).toEqual([
|
||||
expect(keywordsDoc.data.pages).toEqual([
|
||||
jasmine.objectContaining({ title: 'class PublicExport', type: 'class' })
|
||||
]);
|
||||
});
|
||||
|
||||
it('should add title words to the search terms', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const docs = [
|
||||
{
|
||||
docType: 'class',
|
||||
name: 'PublicExport',
|
||||
searchTitle: 'class PublicExport',
|
||||
vFile: { headings: { h2: ['heading A', 'heading B'] } }
|
||||
},
|
||||
];
|
||||
processor.$process(docs);
|
||||
const keywordsDoc = docs[docs.length - 1];
|
||||
expect(keywordsDoc.data[0].titleWords).toEqual('class publicexport');
|
||||
});
|
||||
|
||||
it('should add heading words to the search terms', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const processor = createProcessor();
|
||||
const docs = [
|
||||
{
|
||||
docType: 'class',
|
||||
|
@ -95,11 +141,16 @@ describe('generateKeywords processor', () => {
|
|||
];
|
||||
processor.$process(docs);
|
||||
const keywordsDoc = docs[docs.length - 1];
|
||||
expect(keywordsDoc.data[0].headingWords).toEqual('heading important secondary');
|
||||
expect(keywordsDoc.data).toEqual({
|
||||
dictionary: ['class', 'publicexport', 'head', 'secondari'],
|
||||
pages: [
|
||||
jasmine.objectContaining({ headings: [2, 3, 2] })
|
||||
]
|
||||
});
|
||||
});
|
||||
|
||||
it('should add member doc properties to the search terms', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const processor = createProcessor();
|
||||
const docs = [
|
||||
{
|
||||
docType: 'class',
|
||||
|
@ -123,13 +174,18 @@ describe('generateKeywords processor', () => {
|
|||
];
|
||||
processor.$process(docs);
|
||||
const keywordsDoc = docs[docs.length - 1];
|
||||
expect(keywordsDoc.data[0].members).toEqual(
|
||||
'instancemethoda instancemethodb instancepropertya instancepropertyb staticmethoda staticmethodb staticpropertya staticpropertyb'
|
||||
);
|
||||
expect(keywordsDoc.data).toEqual({
|
||||
dictionary: ['class', 'publicexport', 'content', 'ngclass', 'instancemethoda','instancepropertya','instancemethodb','instancepropertyb','staticmethoda','staticpropertya','staticmethodb','staticpropertyb', 'head'],
|
||||
pages: [
|
||||
jasmine.objectContaining({
|
||||
members: [4, 5, 6, 7, 8, 9, 10, 11]
|
||||
})
|
||||
]
|
||||
});
|
||||
});
|
||||
|
||||
it('should add inherited member doc properties to the search terms', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
const processor = createProcessor();
|
||||
const parentClass = {
|
||||
docType: 'class',
|
||||
name: 'ParentClass',
|
||||
|
@ -163,13 +219,27 @@ describe('generateKeywords processor', () => {
|
|||
const docs = [childClass, parentClass, parentInterface];
|
||||
processor.$process(docs);
|
||||
const keywordsDoc = docs[docs.length - 1];
|
||||
expect(keywordsDoc.data[0].members.split(' ').sort().join(' ')).toEqual(
|
||||
'childmember1 childmember2 parentmember1 parentmember2 parentmember3'
|
||||
);
|
||||
expect(keywordsDoc.data).toEqual({
|
||||
dictionary: ['class', 'child', 'childmember1', 'childmember2', 'parentmember1', 'parentmember2', 'parentmember3', 'parentclass', 'interfac', 'parentinterfac'],
|
||||
pages: [
|
||||
jasmine.objectContaining({
|
||||
title: 'Child',
|
||||
members: [2, 3, 4, 5, 6]
|
||||
}),
|
||||
jasmine.objectContaining({
|
||||
title: 'ParentClass',
|
||||
members: [4, 5]
|
||||
}),
|
||||
jasmine.objectContaining({
|
||||
title: 'ParentInterface',
|
||||
members: [6]
|
||||
})
|
||||
]
|
||||
});
|
||||
});
|
||||
|
||||
it('should process terms prefixed with "ng" to include the term stripped of "ng"', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
it('should include both stripped and unstripped "ng" prefixed tokens', () => {
|
||||
const processor = createProcessor();
|
||||
const docs = [
|
||||
{
|
||||
docType: 'class',
|
||||
|
@ -181,14 +251,19 @@ describe('generateKeywords processor', () => {
|
|||
];
|
||||
processor.$process(docs);
|
||||
const keywordsDoc = docs[docs.length - 1];
|
||||
expect(keywordsDoc.data[0].titleWords).toEqual('ngcontroller controller');
|
||||
expect(keywordsDoc.data[0].headingWords).toEqual('model ngmodel');
|
||||
expect(keywordsDoc.data[0].keywords).toContain('class');
|
||||
expect(keywordsDoc.data[0].keywords).toContain('ngclass');
|
||||
expect(keywordsDoc.data).toEqual({
|
||||
dictionary: ['class', 'publicexport', 'ngcontrol', 'control', 'content', 'ngclass', 'ngmodel', 'model'],
|
||||
pages: [
|
||||
jasmine.objectContaining({
|
||||
headings: [6, 7],
|
||||
keywords: [0, 1, 2, 3, 4, 5, 0],
|
||||
})
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should generate renderedContent property', () => {
|
||||
const processor = processorFactory(mockLogger, mockReadFilesProcessor);
|
||||
it('should generate compressed encoded renderedContent property', () => {
|
||||
const processor = createProcessor();
|
||||
const docs = [
|
||||
{
|
||||
docType: 'class',
|
||||
|
@ -196,19 +271,33 @@ describe('generateKeywords processor', () => {
|
|||
description: 'The is the documentation for the SomeClass API.',
|
||||
vFile: { headings: { h1: ['SomeClass'], h2: ['Some heading'] } }
|
||||
},
|
||||
{
|
||||
docType: 'class',
|
||||
name: 'SomeClass2',
|
||||
description: 'description',
|
||||
members: [
|
||||
{ name: 'member1' },
|
||||
],
|
||||
deprecated: true
|
||||
},
|
||||
];
|
||||
processor.$process(docs);
|
||||
const keywordsDoc = docs[docs.length - 1];
|
||||
expect(JSON.parse(keywordsDoc.renderedContent)).toEqual(
|
||||
[{
|
||||
expect(JSON.parse(keywordsDoc.renderedContent)).toEqual({
|
||||
dictionary: ['class', 'someclass', 'document', 'api', 'head', 'someclass2', 'descript', 'member1'],
|
||||
pages: [{
|
||||
'title':'SomeClass',
|
||||
'type':'class',
|
||||
'titleWords':'someclass',
|
||||
'headingWords':'heading some someclass',
|
||||
'keywords':'api class documentation for is someclass the',
|
||||
'members':'',
|
||||
'deprecated': false,
|
||||
'headings': [1, 4],
|
||||
'keywords': [0, 1, 2, 1, 3],
|
||||
},
|
||||
{
|
||||
'title':'SomeClass2',
|
||||
'type':'class',
|
||||
'keywords': [0, 5, 6],
|
||||
'members': [7],
|
||||
'deprecated': true,
|
||||
}]
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -2005,6 +2005,11 @@
|
|||
resolved "https://registry.yarnpkg.com/@types/source-list-map/-/source-list-map-0.1.2.tgz#0078836063ffaf17412349bba364087e0ac02ec9"
|
||||
integrity sha512-K5K+yml8LTo9bWJI/rECfIPrGgxdpeNbj+d53lwN4QjW1MCwlkhUms+gtdzigTeUyBr09+u8BwOIY3MXvHdcsA==
|
||||
|
||||
"@types/stemmer@^1.0.2":
|
||||
version "1.0.2"
|
||||
resolved "https://registry.yarnpkg.com/@types/stemmer/-/stemmer-1.0.2.tgz#bd8354f50b3c9b87c351d169240e45cf1fa1f5e8"
|
||||
integrity sha512-2gWEIFqVZjjZxo8/TcugCAl7nW9Jd9ArEDpTAc5nH7d+ZUkreHA7GzuFcLZ0sflLrA5b1PZ+2yDyHJcuP9KWWw==
|
||||
|
||||
"@types/unist@*", "@types/unist@^2.0.0", "@types/unist@^2.0.2":
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.3.tgz#9c088679876f374eb5983f150d4787aa6fb32d7e"
|
||||
|
@ -12802,6 +12807,11 @@ static-extend@^0.1.1:
|
|||
resolved "https://registry.yarnpkg.com/statuses/-/statuses-1.5.0.tgz#161c7dac177659fd9811f43771fa99381478628c"
|
||||
integrity sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=
|
||||
|
||||
stemmer@^1.0.5:
|
||||
version "1.0.5"
|
||||
resolved "https://registry.yarnpkg.com/stemmer/-/stemmer-1.0.5.tgz#fd89beaf8bff5d04b6643bfffcaed0fc420deec0"
|
||||
integrity sha512-SLq7annzSKRDStasOJJoftCSCzBCKmBmH38jC4fDtCunAqOzpTpIm9zmaHmwNJiZ8gLe9qpVdBVbEG2DC5dE2A==
|
||||
|
||||
stream-browserify@^2.0.1:
|
||||
version "2.0.2"
|
||||
resolved "https://registry.yarnpkg.com/stream-browserify/-/stream-browserify-2.0.2.tgz#87521d38a44aa7ee91ce1cd2a47df0cb49dd660b"
|
||||
|
|
Loading…
Reference in New Issue