From 55f7f1d446e6280541ea525490ba36fe1db2d7ab Mon Sep 17 00:00:00 2001 From: Pete Bacon Darwin Date: Mon, 29 Mar 2021 22:02:54 +0100 Subject: [PATCH] feat(docs-infra): improve search query processing (#41368) This commit tries to improve the search results by processing the query and attempting progressively less restrictive searches until a non-zero set of pages is matched. The new procesing includes: * stripping off quote marks, which were causing searches to fail * first attempting to match pages where ALL the query terms exist * second attempting to match pages where ANY of the query terms exist * third attempting to match pages where the title contains partial word matches The first query attempt approximates, quite well, the idea of searching for multi-word phrases. This is given the technical nature of the terms and the fairly small size of the corpus. PR Close #41368 --- aio/src/app/search/search.worker.ts | 21 ++++++++++++++----- .../deployment/e2e/smoke-tests.e2e-spec.ts | 5 ++--- aio/tests/e2e/src/app.e2e-spec.ts | 5 ++--- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/aio/src/app/search/search.worker.ts b/aio/src/app/search/search.worker.ts index 153416a680..5c15cd1c6b 100644 --- a/aio/src/app/search/search.worker.ts +++ b/aio/src/app/search/search.worker.ts @@ -84,22 +84,33 @@ function loadIndex(pagesData: PageInfo[]): IndexLoader { // Query the index and return the processed results function queryIndex(query: string): PageInfo[] { + // Strip off quotes + query = query.replace(/^["']|['"]$/g, ''); try { if (query.length) { - let results = index.search(query); + // First try a query where every term must be present + const queryAll = query.replace(/(^|\s)([^\s]+)/g, '$1+$2'); + let results = index.search(queryAll); + + // If that was too restrictive just query for any term to be present if (results.length === 0) { - // Add a relaxed search in the title for the first word in the query - // E.g. if the search is "ngCont guide" then we search for "ngCont guide titleWords:ngCont*" - const titleQuery = 'titleWords:*' + query.split(' ', 1)[0] + '*'; + results = index.search(query); + } + + // If that is still too restrictive then search in the title for the first word in the query + if (results.length === 0) { + // E.g. if the search is "ngCont guide" then we search for "ngCont guide title:ngCont*" + const titleQuery = 'title:*' + query.split(' ', 1)[0] + '*'; results = index.search(query + ' ' + titleQuery); } + // Map the hits into info about each page to be returned as results return results.map(hit => pages[hit.ref]); } } catch (e) { // If the search query cannot be parsed the index throws an error // Log it and recover - console.log(e); + console.error(e); } return []; } diff --git a/aio/tests/deployment/e2e/smoke-tests.e2e-spec.ts b/aio/tests/deployment/e2e/smoke-tests.e2e-spec.ts index e724deea86..2d58484fc6 100644 --- a/aio/tests/deployment/e2e/smoke-tests.e2e-spec.ts +++ b/aio/tests/deployment/e2e/smoke-tests.e2e-spec.ts @@ -103,11 +103,10 @@ describe(browser.baseUrl, () => { }); it('should show relevant results on 404', async () => { - await page.goTo('http/router'); + await page.goTo('common/http'); const results = await page.getSearchResults(); - expect(results).toContain('HttpClient'); - expect(results).toContain('Router'); + expect(results).toContain('common/http package'); }); }); }); diff --git a/aio/tests/e2e/src/app.e2e-spec.ts b/aio/tests/e2e/src/app.e2e-spec.ts index 442c840ff0..e341324858 100644 --- a/aio/tests/e2e/src/app.e2e-spec.ts +++ b/aio/tests/e2e/src/app.e2e-spec.ts @@ -217,11 +217,10 @@ describe('site App', () => { }); it('should search the index for words found in the url', async () => { - await page.navigateTo('http/router'); + await page.navigateTo('common/http'); const results = await page.getSearchResults(); - expect(results).toContain('HttpRequest'); - expect(results).toContain('Router'); + expect(results).toContain('common/http package'); }); });