feat(docs-infra): improve search query processing (#41368)

This commit tries to improve the search results by processing
the query and attempting progressively less restrictive searches
until a non-zero set of pages is matched.

The new procesing includes:

* stripping off quote marks, which were causing searches to fail
* first attempting to match pages where ALL the query terms exist
* second attempting to match pages where ANY of the query terms exist
* third attempting to match pages where the title contains partial word matches

The first query attempt approximates, quite well, the idea of searching
for multi-word phrases. This is given the technical nature of the terms
and the fairly small size of the corpus.

PR Close #41368
This commit is contained in:
Pete Bacon Darwin 2021-03-29 22:02:54 +01:00 committed by Alex Rickabaugh
parent a5a3752859
commit 55f7f1d446
3 changed files with 20 additions and 11 deletions

View File

@ -84,22 +84,33 @@ function loadIndex(pagesData: PageInfo[]): IndexLoader {
// Query the index and return the processed results
function queryIndex(query: string): PageInfo[] {
// Strip off quotes
query = query.replace(/^["']|['"]$/g, '');
try {
if (query.length) {
let results = index.search(query);
// First try a query where every term must be present
const queryAll = query.replace(/(^|\s)([^\s]+)/g, '$1+$2');
let results = index.search(queryAll);
// If that was too restrictive just query for any term to be present
if (results.length === 0) {
// Add a relaxed search in the title for the first word in the query
// E.g. if the search is "ngCont guide" then we search for "ngCont guide titleWords:ngCont*"
const titleQuery = 'titleWords:*' + query.split(' ', 1)[0] + '*';
results = index.search(query);
}
// If that is still too restrictive then search in the title for the first word in the query
if (results.length === 0) {
// E.g. if the search is "ngCont guide" then we search for "ngCont guide title:ngCont*"
const titleQuery = 'title:*' + query.split(' ', 1)[0] + '*';
results = index.search(query + ' ' + titleQuery);
}
// Map the hits into info about each page to be returned as results
return results.map(hit => pages[hit.ref]);
}
} catch (e) {
// If the search query cannot be parsed the index throws an error
// Log it and recover
console.log(e);
console.error(e);
}
return [];
}

View File

@ -103,11 +103,10 @@ describe(browser.baseUrl, () => {
});
it('should show relevant results on 404', async () => {
await page.goTo('http/router');
await page.goTo('common/http');
const results = await page.getSearchResults();
expect(results).toContain('HttpClient');
expect(results).toContain('Router');
expect(results).toContain('common/http package');
});
});
});

View File

@ -217,11 +217,10 @@ describe('site App', () => {
});
it('should search the index for words found in the url', async () => {
await page.navigateTo('http/router');
await page.navigateTo('common/http');
const results = await page.getSearchResults();
expect(results).toContain('HttpRequest');
expect(results).toContain('Router');
expect(results).toContain('common/http package');
});
});