From 1a822178b23e1a82ae7050c931160618a48f8316 Mon Sep 17 00:00:00 2001
From: michaelabuckley <michaelabuckley@gmail.com>
Date: Mon, 1 Nov 2021 11:31:23 -0400
Subject: [PATCH] Docs and cleanup for #2997 (#3126)

* Docs and cleanup for #2997

Auto-merge triggered too soon.

* Change link extraction to support multi-paths.

Cleanup naming.

* Remove this != null check

* Cleanup and comments

* Update hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md

Co-authored-by: Tadgh <tadgh@cs.toronto.edu>

* Update hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md

Co-authored-by: Tadgh <tadgh@cs.toronto.edu>

* Reformat and comments

Co-authored-by: Tadgh <garygrantgraham@gmail.com>
Co-authored-by: Tadgh <tadgh@cs.toronto.edu>
---
 .../2841-word-boundary-text-searches.yaml     |   3 +-
 .../5_6_0/3000-Extend-lucene-indexing.yaml    |   3 +-
 .../ca/uhn/hapi/fhir/docs/files.properties    |   1 +
 .../uhn/hapi/fhir/docs/server_jpa/elastic.md  |  30 ++++
 .../server_jpa_partitioning/partitioning.md   |   2 +
 .../ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java  |   2 +-
 .../fhir/jpa/dao/FulltextSearchSvcImpl.java   | 163 ++----------------
 .../uhn/fhir/jpa/dao/IFulltextSearchSvc.java  |   3 +-
 .../ExtendedLuceneClauseBuilder.java}         |   9 +-
 .../search/ExtendedLuceneIndexExtractor.java  |  68 ++++++++
 .../search/ExtendedLuceneSearchBuilder.java   | 141 +++++++++++++++
 .../uhn/fhir/jpa/dao/search/package-info.java |  21 +++
 .../jpa/search/builder/SearchBuilder.java     |  34 ++--
 .../config/TestR4ConfigWithElasticSearch.java |   2 +-
 14 files changed, 306 insertions(+), 176 deletions(-)
 create mode 100644 hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md
 rename hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/{HibernateSearchQueryBuilder.java => search/ExtendedLuceneClauseBuilder.java} (97%)
 create mode 100644 hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java
 create mode 100644 hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneSearchBuilder.java
 create mode 100644 hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java

diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_5_0/2841-word-boundary-text-searches.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_5_0/2841-word-boundary-text-searches.yaml
index e6c390cdf95..5f67c3be7c0 100644
--- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_5_0/2841-word-boundary-text-searches.yaml
+++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_5_0/2841-word-boundary-text-searches.yaml
@@ -2,4 +2,5 @@
 type: add
 issue: 2841
 title: "The [:text](https://www.hl7.org/fhir/search.html#text) Search Parameter modifier now searches by word boundary of the text content
- as opposed to only searching at the start of the text.  Add * to match word prefixes (e.g. weig* will match weight)."
+   as opposed to only searching at the start of the text when using Lucene/Elasticsearch indexing.
+   Add * to match word prefixes (e.g. weig* will match weight)."
diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_6_0/3000-Extend-lucene-indexing.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_6_0/3000-Extend-lucene-indexing.yaml
index 3f4b6d092a3..e1aea10c669 100644
--- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_6_0/3000-Extend-lucene-indexing.yaml
+++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_6_0/3000-Extend-lucene-indexing.yaml
@@ -1,4 +1,5 @@
 ---
 type: add
 issue: 2999
-title: "Lucene/Elasticsearch indexing has been extended to string and token parameters. This can be controlled by the new `setAdvancedLuceneIndexing()` property of DaoConfig."
+title: "Lucene/Elasticsearch indexing has been extended to string, token, and reference parameters.
+  This can be enabled by the new `setAdvancedLuceneIndexing()` property of DaoConfig."
diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/files.properties b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/files.properties
index 6e3bb52e90c..27d50aa20ab 100644
--- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/files.properties
+++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/files.properties
@@ -60,6 +60,7 @@ page.server_jpa.performance=Performance
 page.server_jpa.upgrading=Upgrade Guide
 page.server_jpa.diff=Diff Operation
 page.server_jpa.lastn=LastN Operation
+page.server_jpa.elastic=Lucene/Elasticsearch Indexing
 page.server_jpa.terminology=Terminology
 
 section.server_jpa_mdm.title=JPA Server: MDM
diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md
new file mode 100644
index 00000000000..2213c615ecb
--- /dev/null
+++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md
@@ -0,0 +1,30 @@
+# HAPI FHIR JPA Lucene/Elasticsearch Indexing
+
+The HAPI JPA Server supports optional indexing via Hibernate Search when configured to use Lucene or Elasticsearch.
+This is required to support the `_content`, or `_text` search parameters.
+
+# Experimental Advanced Lucene/Elasticsearch Indexing
+
+Additional indexing is implemented for simple search parameters of type token, string, and reference.
+These implement the basic search, as well as several modifiers:
+This **experimental** feature is enabled via the `setAdvancedLuceneIndexing()` property of DaoConfig.
+
+## String search
+
+The Advanced Lucene string search indexing supports the default search, as well as the modifiers defined in https://www.hl7.org/fhir/search.html#string.
+- Default searching matches by prefix, insensitive to case or accents
+- `:exact` matches the entire string, matching case and accents
+- `:contains` extends the default search to match any substring of the text
+- `:text` provides a rich search syntax as using the Simple Query Syntax as defined by 
+[Lucene](https://lucene.apache.org/core/8_10_1/queryparser/org/apache/lucene/queryparser/simple/SimpleQueryParser.html) and 
+[Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html#simple-query-string-syntax).
+
+## Token search
+
+The Advance Lucene indexing supports the default token search by code, system, or system+code, 
+as well as with the `:text` modifier.
+The `:text` modifier provides the same Simple Query Syntax used by string `:text` searches.
+See https://www.hl7.org/fhir/search.html#token.
+
+
+
diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md
index 4ab1a4c4e98..dc6f3ac868f 100644
--- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md
+++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md
@@ -172,3 +172,5 @@ None of the limitations listed here are considered permanent. Over time the HAPI
 * **Bulk Operations are not partition aware**: Bulk export operations will export data across all partitions.
 
 * **Package Operations are not partition aware**: Package operations will only create, update and query resources in the default partition.
+
+* **Advanced Elasticsearch indexing is not partition optimized**: The results are correctly partitioned, but the extended indexing is not optimized to account for partitions. 
diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java
index 457b065ac9c..41be8c7d55a 100644
--- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java
+++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java
@@ -1672,7 +1672,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
 			theEntity.setNarrativeText(parseNarrativeTextIntoWords(theResource));
 			theEntity.setContentText(parseContentTextIntoWords(theContext, theResource));
 			if (myDaoConfig.isAdvancedLuceneIndexing()) {
-				ExtendedLuceneIndexData luceneIndexData = myFulltextSearchSvc.extractLuceneIndexData(theContext, theResource, theNewParams);
+				ExtendedLuceneIndexData luceneIndexData = myFulltextSearchSvc.extractLuceneIndexData(theResource, theNewParams);
 				theEntity.setLuceneIndexData(luceneIndexData);
 			}
 		}
diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java
index 3ad3ad857a5..c95edb71ce2 100644
--- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java
+++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java
@@ -24,7 +24,9 @@ import ca.uhn.fhir.context.FhirContext;
 import ca.uhn.fhir.context.RuntimeSearchParam;
 import ca.uhn.fhir.jpa.api.config.DaoConfig;
 import ca.uhn.fhir.jpa.dao.data.IForcedIdDao;
-import ca.uhn.fhir.jpa.model.entity.ResourceLink;
+import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneSearchBuilder;
+import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneIndexExtractor;
+import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneClauseBuilder;
 import ca.uhn.fhir.jpa.model.entity.ResourceTable;
 import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData;
 import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
@@ -33,14 +35,9 @@ import ca.uhn.fhir.model.api.IQueryParameterType;
 import ca.uhn.fhir.rest.api.Constants;
 import ca.uhn.fhir.rest.api.server.RequestDetails;
 import ca.uhn.fhir.rest.api.server.storage.ResourcePersistentId;
-import ca.uhn.fhir.rest.param.QuantityParam;
-import ca.uhn.fhir.rest.param.ReferenceParam;
 import ca.uhn.fhir.rest.param.StringParam;
 import ca.uhn.fhir.rest.param.TokenParam;
 import ca.uhn.fhir.rest.server.util.ISearchParamRegistry;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-import org.apache.commons.lang3.StringUtils;
 import org.hibernate.search.mapper.orm.Search;
 import org.hibernate.search.mapper.orm.session.SearchSession;
 import org.hl7.fhir.instance.model.api.IAnyResource;
@@ -53,19 +50,14 @@ import org.springframework.transaction.support.TransactionTemplate;
 import javax.persistence.EntityManager;
 import javax.persistence.PersistenceContext;
 import javax.persistence.PersistenceContextType;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.stream.Collectors;
 
 import static org.apache.commons.lang3.StringUtils.isNotBlank;
 
 public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
 	private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FulltextSearchSvcImpl.class);
-	public static final String EMPTY_MODIFIER = "";
 	@Autowired
 	protected IForcedIdDao myForcedIdDao;
 	@PersistenceContext(type = PersistenceContextType.TRANSACTION)
@@ -78,7 +70,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
 	private ISearchParamRegistry mySearchParamRegistry;
 	@Autowired
 	private DaoConfig myDaoConfig;
-
+	private ExtendedLuceneSearchBuilder myAdvancedIndexQueryBuilder = new ExtendedLuceneSearchBuilder();
 
 	private Boolean ourDisabled;
 
@@ -89,54 +81,13 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
 		super();
 	}
 
-	public ExtendedLuceneIndexData extractLuceneIndexData(FhirContext theContext, IBaseResource theResource, ResourceIndexedSearchParams theNewParams) {
-		ExtendedLuceneIndexData retVal = new ExtendedLuceneIndexData(myFhirContext);
-
-		theNewParams.myStringParams.forEach(nextParam ->
-			retVal.addStringIndexData(nextParam.getParamName(), nextParam.getValueExact()));
-
-		theNewParams.myTokenParams.forEach(nextParam ->
-			retVal.addTokenIndexData(nextParam.getParamName(), nextParam.getSystem(), nextParam.getValue()));
-
-		if (!theNewParams.myLinks.isEmpty()) {
-			Map<String, ResourceLink> spNameToLinkMap = buildSpNameToLinkMap(theResource, theNewParams);
-
-			spNameToLinkMap.entrySet()
-					.forEach(nextEntry -> {
-						ResourceLink resourceLink = nextEntry.getValue();
-						String qualifiedTargetResourceId = resourceLink.getTargetResourceType() + "/" + resourceLink.getTargetResourceId();
-						retVal.addResourceLinkIndexData(nextEntry.getKey(), qualifiedTargetResourceId);
-					});
-
-		}
-		return retVal;
-	}
-
-	private Map<String, ResourceLink> buildSpNameToLinkMap(IBaseResource theResource, ResourceIndexedSearchParams theNewParams) {
+	public ExtendedLuceneIndexData extractLuceneIndexData(IBaseResource theResource, ResourceIndexedSearchParams theNewParams) {
 		String resourceType = myFhirContext.getResourceType(theResource);
-
-		Map<String, RuntimeSearchParam> paramNameToRuntimeParam =
-			theNewParams.getPopulatedResourceLinkParameters().stream()
-				.collect(Collectors.toMap(
-					(theParam) -> theParam,
-					(theParam) -> mySearchParamRegistry.getActiveSearchParam(resourceType, theParam)));
-
-		Map<String, ResourceLink> paramNameToIndexedLink = new HashMap<>();
-		for ( Map.Entry<String, RuntimeSearchParam> entry :paramNameToRuntimeParam.entrySet()) {
-			ResourceLink link = theNewParams.myLinks.stream().filter(resourceLink ->
-				entry.getValue().getPathsSplit().stream()
-					.anyMatch(path -> path.equalsIgnoreCase(resourceLink.getSourcePath())))
-				.findFirst().orElse(null);
-			paramNameToIndexedLink.put(entry.getKey(), link);
-		}
-		return paramNameToIndexedLink;
+		Map<String, RuntimeSearchParam> activeSearchParams = mySearchParamRegistry.getActiveSearchParams(resourceType);
+		ExtendedLuceneIndexExtractor extractor = new ExtendedLuceneIndexExtractor(myFhirContext, activeSearchParams);
+		return extractor.extract(theNewParams);
 	}
 
-	/**
-	 * These params have complicated semantics, or are best resolved at the JPA layer for now.
-	 */
-	static final Set<String> ourUnsafeSearchParmeters = Sets.newHashSet("_id", "_tag", "_meta");
-
 	@Override
 	public boolean supportsSomeOf(SearchParameterMap myParams) {
 		// keep this in sync with the guts of doSearch
@@ -144,56 +95,12 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
 
 		requiresHibernateSearchAccess |=
 			myDaoConfig.isAdvancedLuceneIndexing() &&
-				myParams.entrySet().stream()
-					.filter(e -> !ourUnsafeSearchParmeters.contains(e.getKey()))
-					// each and clause may have a different modifier, so split down to the ORs
-					.flatMap(andList -> andList.getValue().stream())
-					.flatMap(Collection::stream)
-					.anyMatch(this::isParamSupported);
+				myAdvancedIndexQueryBuilder.isSupportsSomeOf(myParams);
 
 		return requiresHibernateSearchAccess;
 	}
 
-	private boolean isParamSupported(IQueryParameterType param) {
-		String modifier = StringUtils.defaultString(param.getQueryParameterQualifier(), EMPTY_MODIFIER);
-		if (param instanceof TokenParam) {
-			switch (modifier) {
-				case Constants.PARAMQUALIFIER_TOKEN_TEXT:
-				case "":
-					// we support plain token and token:text
-					return true;
-				default:
-					return false;
-			}
-		} else if (param instanceof StringParam) {
-			switch (modifier) {
-				// we support string:text, string:contains, string:exact, and unmodified string.
-				case Constants.PARAMQUALIFIER_TOKEN_TEXT:
-				case Constants.PARAMQUALIFIER_STRING_EXACT:
-				case Constants.PARAMQUALIFIER_STRING_CONTAINS:
-				case EMPTY_MODIFIER:
-					return true;
-				default:
-					return false;
-			}
-		} else if (param instanceof QuantityParam) {
-			return false;
-		} else if (param instanceof ReferenceParam) {
-			//We cannot search by chain.
-			if (((ReferenceParam) param).getChain() != null) {
-				return false;
-			}
-			switch (modifier) {
-				case	EMPTY_MODIFIER:
-					return true;
-				case Constants.PARAMQUALIFIER_MDM:
-				default:
-					return false;
-			}
-		} else {
-			return false;
-		}
-	}
+
 
 
 	private List<ResourcePersistentId> doSearch(String theResourceType, SearchParameterMap theParams, ResourcePersistentId theReferencingPid) {
@@ -207,7 +114,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
 			)
 			.where(
 				f -> f.bool(b -> {
-					HibernateSearchQueryBuilder builder = new HibernateSearchQueryBuilder(myFhirContext, b, f);
+					ExtendedLuceneClauseBuilder builder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f);
 
 					/*
 					 * Handle _content parameter (resource body content)
@@ -232,53 +139,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
 					 * Handle other supported parameters
 					 */
 					if (myDaoConfig.isAdvancedLuceneIndexing()) {
-						// copy the keys to avoid concurrent modification error
-						ArrayList<String> paramNames = Lists.newArrayList(theParams.keySet());
-						for(String nextParam: paramNames) {
-							if (ourUnsafeSearchParmeters.contains(nextParam)) {
-								continue;
-							}
-							RuntimeSearchParam activeParam = mySearchParamRegistry.getActiveSearchParam(theResourceType, nextParam);
-							if (activeParam == null) {
-								// ignore magic params handled in JPA
-								continue;
-							}
-							switch (activeParam.getParamType()) {
-								case TOKEN:
-									List<List<IQueryParameterType>> tokenTextAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_TOKEN_TEXT);
-									builder.addStringTextSearch(nextParam, tokenTextAndOrTerms);
-
-									List<List<IQueryParameterType>> tokenUnmodifiedAndOrTerms = theParams.removeByNameUnmodified(nextParam);
-									builder.addTokenUnmodifiedSearch(nextParam, tokenUnmodifiedAndOrTerms);
-
-									break;
-								case STRING:
-									List<List<IQueryParameterType>> stringTextAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_TOKEN_TEXT);
-									builder.addStringTextSearch(nextParam, stringTextAndOrTerms);
-
-									List<List<IQueryParameterType>> stringExactAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_STRING_EXACT);
-									builder.addStringExactSearch(nextParam, stringExactAndOrTerms);
-
-									List<List<IQueryParameterType>> stringContainsAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_STRING_CONTAINS);
-									builder.addStringContainsSearch(nextParam, stringContainsAndOrTerms);
-
-									List<List<IQueryParameterType>> stringAndOrTerms = theParams.removeByNameUnmodified(nextParam);
-									builder.addStringUnmodifiedSearch(nextParam, stringAndOrTerms);
-									break;
-
-								case QUANTITY:
-									break;
-
-								case REFERENCE:
-									List<List<IQueryParameterType>> referenceAndOrTerms = theParams.removeByNameUnmodified(nextParam);
-									builder.addReferenceUnchainedSearch(nextParam, referenceAndOrTerms);
-									break;
-
-								default:
-									// ignore unsupported param types/modifiers.  They will be processed up in SearchBuilder.
-							}
-						}
-
+						myAdvancedIndexQueryBuilder.addAndConsumeAdvancedQueryClauses(builder, theResourceType, theParams, mySearchParamRegistry);
 					}
 					//DROP EARLY HERE IF BOOL IS EMPTY?
 
diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/IFulltextSearchSvc.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/IFulltextSearchSvc.java
index 93284123642..6ef820cace0 100644
--- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/IFulltextSearchSvc.java
+++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/IFulltextSearchSvc.java
@@ -22,7 +22,6 @@ package ca.uhn.fhir.jpa.dao;
 
 import java.util.List;
 
-import ca.uhn.fhir.context.FhirContext;
 import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData;
 import ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams;
 import ca.uhn.fhir.rest.api.server.storage.ResourcePersistentId;
@@ -47,7 +46,7 @@ public interface IFulltextSearchSvc {
 
 	boolean isDisabled();
 
-	ExtendedLuceneIndexData extractLuceneIndexData(FhirContext theContext, IBaseResource theResource, ResourceIndexedSearchParams theNewParams);
+	ExtendedLuceneIndexData extractLuceneIndexData(IBaseResource theResource, ResourceIndexedSearchParams theNewParams);
 
     boolean supportsSomeOf(SearchParameterMap myParams);
 }
diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/HibernateSearchQueryBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java
similarity index 97%
rename from hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/HibernateSearchQueryBuilder.java
rename to hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java
index 525bfbf1d29..b80c5df895f 100644
--- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/HibernateSearchQueryBuilder.java
+++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java
@@ -1,4 +1,4 @@
-package ca.uhn.fhir.jpa.dao;
+package ca.uhn.fhir.jpa.dao.search;
 
 import ca.uhn.fhir.context.FhirContext;
 import ca.uhn.fhir.model.api.IQueryParameterType;
@@ -26,20 +26,19 @@ import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING
 import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_TEXT;
 import static org.apache.commons.lang3.StringUtils.isNotBlank;
 
-public class HibernateSearchQueryBuilder {
-	private static final Logger ourLog = LoggerFactory.getLogger(HibernateSearchQueryBuilder.class);
+public class ExtendedLuceneClauseBuilder {
+	private static final Logger ourLog = LoggerFactory.getLogger(ExtendedLuceneClauseBuilder.class);
 
 	final FhirContext myFhirContext;
 	final SearchPredicateFactory myPredicateFactory;
 	final BooleanPredicateClausesStep<?> myRootClause;
 
-	public HibernateSearchQueryBuilder(FhirContext myFhirContext, BooleanPredicateClausesStep<?> myRootClause, SearchPredicateFactory myPredicateFactory) {
+	public ExtendedLuceneClauseBuilder(FhirContext myFhirContext, BooleanPredicateClausesStep<?> myRootClause, SearchPredicateFactory myPredicateFactory) {
 		this.myFhirContext = myFhirContext;
 		this.myRootClause = myRootClause;
 		this.myPredicateFactory = myPredicateFactory;
 	}
 
-
 	@Nonnull
 	private Set<String> extractOrStringParams(List<? extends IQueryParameterType> nextAnd) {
 		Set<String> terms = new HashSet<>();
diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java
new file mode 100644
index 00000000000..b8b15a2562e
--- /dev/null
+++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java
@@ -0,0 +1,68 @@
+package ca.uhn.fhir.jpa.dao.search;
+
+import ca.uhn.fhir.context.FhirContext;
+import ca.uhn.fhir.context.RuntimeSearchParam;
+import ca.uhn.fhir.jpa.model.entity.ResourceLink;
+import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData;
+import ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams;
+import org.jetbrains.annotations.NotNull;
+
+import java.util.*;
+
+/**
+ * Extract search params for advanced lucene indexing.
+ *
+ * This class re-uses the extracted JPA entities to build an ExtendedLuceneIndexData instance.
+ */
+public class ExtendedLuceneIndexExtractor {
+
+	private final FhirContext myContext;
+	private final Map<String, RuntimeSearchParam> myParams;
+
+	public ExtendedLuceneIndexExtractor(FhirContext theContext, Map<String, RuntimeSearchParam> theActiveParams) {
+		myContext = theContext;
+		myParams = theActiveParams;
+	}
+
+	@NotNull
+	public ExtendedLuceneIndexData extract(ResourceIndexedSearchParams theNewParams) {
+		// wip mb this is testable now.
+		ExtendedLuceneIndexData retVal = new ExtendedLuceneIndexData(myContext);
+
+		theNewParams.myStringParams.forEach(nextParam ->
+			retVal.addStringIndexData(nextParam.getParamName(), nextParam.getValueExact()));
+
+		theNewParams.myTokenParams.forEach(nextParam ->
+			retVal.addTokenIndexData(nextParam.getParamName(), nextParam.getSystem(), nextParam.getValue()));
+
+		if (!theNewParams.myLinks.isEmpty()) {
+
+			// awkwardly, links are shared between different search params if they use the same path,
+			// so we re-build the linkage.
+			// WIP MB is this the right design?  Or should we follow JPA and share these?
+			Map<String, List<String>> linkPathToParamName = new HashMap<>();
+			for (String nextParamName : theNewParams.getPopulatedResourceLinkParameters()) {
+				RuntimeSearchParam sp = myParams.get(nextParamName);
+				List<String> pathsSplit = sp.getPathsSplit();
+				for (String nextPath : pathsSplit) {
+					// we want case-insensitive matching
+					nextPath = nextPath.toLowerCase(Locale.ROOT);
+
+					linkPathToParamName
+						.computeIfAbsent(nextPath, (p) -> new ArrayList<>())
+						.add(nextParamName);
+				}
+			}
+
+			for (ResourceLink nextLink : theNewParams.getResourceLinks()) {
+				String insensitivePath = nextLink.getSourcePath().toLowerCase(Locale.ROOT);
+				List<String> paramNames = linkPathToParamName.getOrDefault(insensitivePath, Collections.emptyList());
+				for (String nextParamName : paramNames) {
+					String qualifiedTargetResourceId = nextLink.getTargetResourceType() + "/" + nextLink.getTargetResourceId();
+					retVal.addResourceLinkIndexData(nextParamName, qualifiedTargetResourceId);
+				}
+			}
+		}
+		return retVal;
+	}
+}
diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneSearchBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneSearchBuilder.java
new file mode 100644
index 00000000000..6c68485a571
--- /dev/null
+++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneSearchBuilder.java
@@ -0,0 +1,141 @@
+package ca.uhn.fhir.jpa.dao.search;
+
+import ca.uhn.fhir.context.RuntimeSearchParam;
+import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
+import ca.uhn.fhir.model.api.IQueryParameterType;
+import ca.uhn.fhir.rest.api.Constants;
+import ca.uhn.fhir.rest.param.QuantityParam;
+import ca.uhn.fhir.rest.param.ReferenceParam;
+import ca.uhn.fhir.rest.param.StringParam;
+import ca.uhn.fhir.rest.param.TokenParam;
+import ca.uhn.fhir.rest.server.util.ISearchParamRegistry;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Search builder for lucene/elastic for token, string, and reference parameters.
+ */
+public class ExtendedLuceneSearchBuilder {
+	public static final String EMPTY_MODIFIER = "";
+
+	/**
+	 * These params have complicated semantics, or are best resolved at the JPA layer for now.
+	 */
+	public static final Set<String> ourUnsafeSearchParmeters = Sets.newHashSet("_id", "_tag", "_meta");
+
+	/**
+	 * Are any of the queries supported by our indexing?
+	 */
+	public boolean isSupportsSomeOf(SearchParameterMap myParams) {
+		return
+			myParams.entrySet().stream()
+				.filter(e -> !ourUnsafeSearchParmeters.contains(e.getKey()))
+				// each and clause may have a different modifier, so split down to the ORs
+				.flatMap(andList -> andList.getValue().stream())
+				.flatMap(Collection::stream)
+				.anyMatch(this::isParamSupported);
+	}
+
+	/**
+	 * Do we support this query param type+modifier?
+	 *
+	 * NOTE - keep this in sync with addAndConsumeAdvancedQueryClauses() below.
+	 */
+	private boolean isParamSupported(IQueryParameterType param) {
+		String modifier = StringUtils.defaultString(param.getQueryParameterQualifier(), EMPTY_MODIFIER);
+		if (param instanceof TokenParam) {
+			switch (modifier) {
+				case Constants.PARAMQUALIFIER_TOKEN_TEXT:
+				case "":
+					// we support plain token and token:text
+					return true;
+				default:
+					return false;
+			}
+		} else if (param instanceof StringParam) {
+			switch (modifier) {
+				// we support string:text, string:contains, string:exact, and unmodified string.
+				case Constants.PARAMQUALIFIER_TOKEN_TEXT:
+				case Constants.PARAMQUALIFIER_STRING_EXACT:
+				case Constants.PARAMQUALIFIER_STRING_CONTAINS:
+				case EMPTY_MODIFIER:
+					return true;
+				default:
+					return false;
+			}
+		} else if (param instanceof QuantityParam) {
+			return false;
+		} else if (param instanceof ReferenceParam) {
+			//We cannot search by chain.
+			if (((ReferenceParam) param).getChain() != null) {
+				return false;
+			}
+			switch (modifier) {
+				case EMPTY_MODIFIER:
+					return true;
+				case Constants.PARAMQUALIFIER_MDM:
+				default:
+					return false;
+			}
+		} else {
+			return false;
+		}
+	}
+
+	public void addAndConsumeAdvancedQueryClauses(ExtendedLuceneClauseBuilder builder, String theResourceType, SearchParameterMap theParams, ISearchParamRegistry theSearchParamRegistry) {
+		// copy the keys to avoid concurrent modification error
+		ArrayList<String> paramNames = Lists.newArrayList(theParams.keySet());
+		for (String nextParam : paramNames) {
+			if (ourUnsafeSearchParmeters.contains(nextParam)) {
+				continue;
+			}
+			RuntimeSearchParam activeParam = theSearchParamRegistry.getActiveSearchParam(theResourceType, nextParam);
+			if (activeParam == null) {
+				// ignore magic params handled in JPA
+				continue;
+			}
+
+			// NOTE - keep this in sync with isParamSupported() above.
+			switch (activeParam.getParamType()) {
+				case TOKEN:
+					List<List<IQueryParameterType>> tokenTextAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_TOKEN_TEXT);
+					builder.addStringTextSearch(nextParam, tokenTextAndOrTerms);
+
+					List<List<IQueryParameterType>> tokenUnmodifiedAndOrTerms = theParams.removeByNameUnmodified(nextParam);
+					builder.addTokenUnmodifiedSearch(nextParam, tokenUnmodifiedAndOrTerms);
+
+					break;
+				case STRING:
+					List<List<IQueryParameterType>> stringTextAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_TOKEN_TEXT);
+					builder.addStringTextSearch(nextParam, stringTextAndOrTerms);
+
+					List<List<IQueryParameterType>> stringExactAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_STRING_EXACT);
+					builder.addStringExactSearch(nextParam, stringExactAndOrTerms);
+
+					List<List<IQueryParameterType>> stringContainsAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_STRING_CONTAINS);
+					builder.addStringContainsSearch(nextParam, stringContainsAndOrTerms);
+
+					List<List<IQueryParameterType>> stringAndOrTerms = theParams.removeByNameUnmodified(nextParam);
+					builder.addStringUnmodifiedSearch(nextParam, stringAndOrTerms);
+					break;
+
+				case QUANTITY:
+					break;
+
+				case REFERENCE:
+					List<List<IQueryParameterType>> referenceAndOrTerms = theParams.removeByNameUnmodified(nextParam);
+					builder.addReferenceUnchainedSearch(nextParam, referenceAndOrTerms);
+					break;
+
+				default:
+					// ignore unsupported param types/modifiers.  They will be processed up in SearchBuilder.
+			}
+		}
+	}
+}
diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java
new file mode 100644
index 00000000000..3d31547f9e0
--- /dev/null
+++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java
@@ -0,0 +1,21 @@
+/**
+ * Extended fhir indexing for Hibernate Search using Lucene/Elasticsearch.
+ *
+ * By default, Lucene indexing only provides support for _text, and _content search parameters using
+ * {@link ca.uhn.fhir.jpa.model.entity.ResourceTable#myNarrativeText} and
+ * {@link ca.uhn.fhir.jpa.model.entity.ResourceTable#myContentText}.
+ *
+ * Both {@link ca.uhn.fhir.jpa.search.builder.SearchBuilder} and {@link ca.uhn.fhir.jpa.dao.LegacySearchBuilder} delegate the
+ * search to {@link ca.uhn.fhir.jpa.dao.FulltextSearchSvcImpl} when active.
+ * The fulltext search runs first and interprets any search parameters it understands, returning a pid list.
+ * This pid list is used as a narrowing where clause against the remaining unprocessed search parameters.
+ *
+ * This package extends this search to support token, string, and reference parameters via {@link ca.uhn.fhir.jpa.model.entity.ResourceTable#myLuceneIndexData}.
+ * When active, the extracted search parameters which are written to the HFJ_SPIDX_* tables are also written to the Lucene index document.
+ *
+ * @see ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter
+ * @see ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData
+ *
+ * Activated by {@link ca.uhn.fhir.jpa.api.config.DaoConfig#setAdvancedLuceneIndexing(boolean)}.
+ */
+package ca.uhn.fhir.jpa.dao.search;
diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/builder/SearchBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/builder/SearchBuilder.java
index 79c1908260b..6e5faae5713 100644
--- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/builder/SearchBuilder.java
+++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/builder/SearchBuilder.java
@@ -310,7 +310,7 @@ public class SearchBuilder implements ISearchBuilder {
 
 		List<ResourcePersistentId> pids = new ArrayList<>();
 
-		if (requiresHibernateSearchAccess()) {
+		if (checkUseHibernateSearch()) {
 			if (myParams.isLastN()) {
 				pids = executeLastNAgainstIndex(theMaximumResults);
 			} else {
@@ -345,22 +345,28 @@ public class SearchBuilder implements ISearchBuilder {
 		return queries;
 	}
 
-	private boolean requiresHibernateSearchAccess() {
-		boolean result = (myFulltextSearchSvc != null) &&
-			!myFulltextSearchSvc.isDisabled() &&
-			myFulltextSearchSvc.supportsSomeOf(myParams);
+	/**
+	 * Check to see if query should use Hibernate Search, and error if the query can't continue.
+	 *
+	 * @return true if the query should first be processed by Hibernate Search
+	 * @throws InvalidRequestException if fulltext search is not enabled but the query requires it - _content or _text
+	 */
+	private boolean checkUseHibernateSearch() {
+		boolean fulltextEnabled = (myFulltextSearchSvc != null) && !myFulltextSearchSvc.isDisabled();
 
-		if (myParams.containsKey(Constants.PARAM_CONTENT) || myParams.containsKey(Constants.PARAM_TEXT)) {
-			if (myFulltextSearchSvc == null || myFulltextSearchSvc.isDisabled()) {
-				if (myParams.containsKey(Constants.PARAM_TEXT)) {
-					throw new InvalidRequestException("Fulltext search is not enabled on this service, can not process parameter: " + Constants.PARAM_TEXT);
-				} else if (myParams.containsKey(Constants.PARAM_CONTENT)) {
-					throw new InvalidRequestException("Fulltext search is not enabled on this service, can not process parameter: " + Constants.PARAM_CONTENT);
-				}
-			}
+		if (!fulltextEnabled) {
+			failIfUsed(Constants.PARAM_TEXT);
+			failIfUsed(Constants.PARAM_CONTENT);
 		}
 
-		return result;
+		// TODO MB someday we'll want a query planner to figure out if we _should_ use the ft index, not just if we can.
+		return fulltextEnabled && myFulltextSearchSvc.supportsSomeOf(myParams);
+	}
+
+	private void failIfUsed(String theParamName) {
+		if (myParams.containsKey(theParamName)) {
+			throw new InvalidRequestException("Fulltext search is not enabled on this service, can not process parameter: " + theParamName);
+		}
 	}
 
 	private List<ResourcePersistentId> executeLastNAgainstIndex(Integer theMaximumResults) {
diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestR4ConfigWithElasticSearch.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestR4ConfigWithElasticSearch.java
index f6590f54eee..0c8939d7baa 100644
--- a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestR4ConfigWithElasticSearch.java
+++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestR4ConfigWithElasticSearch.java
@@ -35,7 +35,7 @@ public class TestR4ConfigWithElasticSearch extends TestR4Config {
 		int httpPort = elasticContainer().getMappedPort(9200);//9200 is the HTTP port
 		String host = elasticContainer().getHost();
 
-		ourLog.warn("Hibernate Search: using elasticsearch - host {} {}", host, httpPort);
+		ourLog.info("Hibernate Search: using elasticsearch - host {} {}", host, httpPort);
 
 		new ElasticsearchHibernatePropertiesBuilder()
 			.setDebugIndexSyncStrategy("read-sync")