From 1a822178b23e1a82ae7050c931160618a48f8316 Mon Sep 17 00:00:00 2001 From: michaelabuckley Date: Mon, 1 Nov 2021 11:31:23 -0400 Subject: [PATCH] Docs and cleanup for #2997 (#3126) * Docs and cleanup for #2997 Auto-merge triggered too soon. * Change link extraction to support multi-paths. Cleanup naming. * Remove this != null check * Cleanup and comments * Update hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md Co-authored-by: Tadgh * Update hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md Co-authored-by: Tadgh * Reformat and comments Co-authored-by: Tadgh Co-authored-by: Tadgh --- .../2841-word-boundary-text-searches.yaml | 3 +- .../5_6_0/3000-Extend-lucene-indexing.yaml | 3 +- .../ca/uhn/hapi/fhir/docs/files.properties | 1 + .../uhn/hapi/fhir/docs/server_jpa/elastic.md | 30 ++++ .../server_jpa_partitioning/partitioning.md | 2 + .../ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java | 2 +- .../fhir/jpa/dao/FulltextSearchSvcImpl.java | 163 ++---------------- .../uhn/fhir/jpa/dao/IFulltextSearchSvc.java | 3 +- .../ExtendedLuceneClauseBuilder.java} | 9 +- .../search/ExtendedLuceneIndexExtractor.java | 68 ++++++++ .../search/ExtendedLuceneSearchBuilder.java | 141 +++++++++++++++ .../uhn/fhir/jpa/dao/search/package-info.java | 21 +++ .../jpa/search/builder/SearchBuilder.java | 34 ++-- .../config/TestR4ConfigWithElasticSearch.java | 2 +- 14 files changed, 306 insertions(+), 176 deletions(-) create mode 100644 hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md rename hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/{HibernateSearchQueryBuilder.java => search/ExtendedLuceneClauseBuilder.java} (97%) create mode 100644 hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java create mode 100644 hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneSearchBuilder.java create mode 100644 hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_5_0/2841-word-boundary-text-searches.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_5_0/2841-word-boundary-text-searches.yaml index e6c390cdf95..5f67c3be7c0 100644 --- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_5_0/2841-word-boundary-text-searches.yaml +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_5_0/2841-word-boundary-text-searches.yaml @@ -2,4 +2,5 @@ type: add issue: 2841 title: "The [:text](https://www.hl7.org/fhir/search.html#text) Search Parameter modifier now searches by word boundary of the text content - as opposed to only searching at the start of the text. Add * to match word prefixes (e.g. weig* will match weight)." + as opposed to only searching at the start of the text when using Lucene/Elasticsearch indexing. + Add * to match word prefixes (e.g. weig* will match weight)." diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_6_0/3000-Extend-lucene-indexing.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_6_0/3000-Extend-lucene-indexing.yaml index 3f4b6d092a3..e1aea10c669 100644 --- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_6_0/3000-Extend-lucene-indexing.yaml +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_6_0/3000-Extend-lucene-indexing.yaml @@ -1,4 +1,5 @@ --- type: add issue: 2999 -title: "Lucene/Elasticsearch indexing has been extended to string and token parameters. This can be controlled by the new `setAdvancedLuceneIndexing()` property of DaoConfig." +title: "Lucene/Elasticsearch indexing has been extended to string, token, and reference parameters. + This can be enabled by the new `setAdvancedLuceneIndexing()` property of DaoConfig." diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/files.properties b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/files.properties index 6e3bb52e90c..27d50aa20ab 100644 --- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/files.properties +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/files.properties @@ -60,6 +60,7 @@ page.server_jpa.performance=Performance page.server_jpa.upgrading=Upgrade Guide page.server_jpa.diff=Diff Operation page.server_jpa.lastn=LastN Operation +page.server_jpa.elastic=Lucene/Elasticsearch Indexing page.server_jpa.terminology=Terminology section.server_jpa_mdm.title=JPA Server: MDM diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md new file mode 100644 index 00000000000..2213c615ecb --- /dev/null +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa/elastic.md @@ -0,0 +1,30 @@ +# HAPI FHIR JPA Lucene/Elasticsearch Indexing + +The HAPI JPA Server supports optional indexing via Hibernate Search when configured to use Lucene or Elasticsearch. +This is required to support the `_content`, or `_text` search parameters. + +# Experimental Advanced Lucene/Elasticsearch Indexing + +Additional indexing is implemented for simple search parameters of type token, string, and reference. +These implement the basic search, as well as several modifiers: +This **experimental** feature is enabled via the `setAdvancedLuceneIndexing()` property of DaoConfig. + +## String search + +The Advanced Lucene string search indexing supports the default search, as well as the modifiers defined in https://www.hl7.org/fhir/search.html#string. +- Default searching matches by prefix, insensitive to case or accents +- `:exact` matches the entire string, matching case and accents +- `:contains` extends the default search to match any substring of the text +- `:text` provides a rich search syntax as using the Simple Query Syntax as defined by +[Lucene](https://lucene.apache.org/core/8_10_1/queryparser/org/apache/lucene/queryparser/simple/SimpleQueryParser.html) and +[Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html#simple-query-string-syntax). + +## Token search + +The Advance Lucene indexing supports the default token search by code, system, or system+code, +as well as with the `:text` modifier. +The `:text` modifier provides the same Simple Query Syntax used by string `:text` searches. +See https://www.hl7.org/fhir/search.html#token. + + + diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md index 4ab1a4c4e98..dc6f3ac868f 100644 --- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_partitioning/partitioning.md @@ -172,3 +172,5 @@ None of the limitations listed here are considered permanent. Over time the HAPI * **Bulk Operations are not partition aware**: Bulk export operations will export data across all partitions. * **Package Operations are not partition aware**: Package operations will only create, update and query resources in the default partition. + +* **Advanced Elasticsearch indexing is not partition optimized**: The results are correctly partitioned, but the extended indexing is not optimized to account for partitions. diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java index 457b065ac9c..41be8c7d55a 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/BaseHapiFhirDao.java @@ -1672,7 +1672,7 @@ public abstract class BaseHapiFhirDao extends BaseStora theEntity.setNarrativeText(parseNarrativeTextIntoWords(theResource)); theEntity.setContentText(parseContentTextIntoWords(theContext, theResource)); if (myDaoConfig.isAdvancedLuceneIndexing()) { - ExtendedLuceneIndexData luceneIndexData = myFulltextSearchSvc.extractLuceneIndexData(theContext, theResource, theNewParams); + ExtendedLuceneIndexData luceneIndexData = myFulltextSearchSvc.extractLuceneIndexData(theResource, theNewParams); theEntity.setLuceneIndexData(luceneIndexData); } } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java index 3ad3ad857a5..c95edb71ce2 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java @@ -24,7 +24,9 @@ import ca.uhn.fhir.context.FhirContext; import ca.uhn.fhir.context.RuntimeSearchParam; import ca.uhn.fhir.jpa.api.config.DaoConfig; import ca.uhn.fhir.jpa.dao.data.IForcedIdDao; -import ca.uhn.fhir.jpa.model.entity.ResourceLink; +import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneSearchBuilder; +import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneIndexExtractor; +import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneClauseBuilder; import ca.uhn.fhir.jpa.model.entity.ResourceTable; import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData; import ca.uhn.fhir.jpa.searchparam.SearchParameterMap; @@ -33,14 +35,9 @@ import ca.uhn.fhir.model.api.IQueryParameterType; import ca.uhn.fhir.rest.api.Constants; import ca.uhn.fhir.rest.api.server.RequestDetails; import ca.uhn.fhir.rest.api.server.storage.ResourcePersistentId; -import ca.uhn.fhir.rest.param.QuantityParam; -import ca.uhn.fhir.rest.param.ReferenceParam; import ca.uhn.fhir.rest.param.StringParam; import ca.uhn.fhir.rest.param.TokenParam; import ca.uhn.fhir.rest.server.util.ISearchParamRegistry; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import org.apache.commons.lang3.StringUtils; import org.hibernate.search.mapper.orm.Search; import org.hibernate.search.mapper.orm.session.SearchSession; import org.hl7.fhir.instance.model.api.IAnyResource; @@ -53,19 +50,14 @@ import org.springframework.transaction.support.TransactionTemplate; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.PersistenceContextType; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.stream.Collectors; import static org.apache.commons.lang3.StringUtils.isNotBlank; public class FulltextSearchSvcImpl implements IFulltextSearchSvc { private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FulltextSearchSvcImpl.class); - public static final String EMPTY_MODIFIER = ""; @Autowired protected IForcedIdDao myForcedIdDao; @PersistenceContext(type = PersistenceContextType.TRANSACTION) @@ -78,7 +70,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc { private ISearchParamRegistry mySearchParamRegistry; @Autowired private DaoConfig myDaoConfig; - + private ExtendedLuceneSearchBuilder myAdvancedIndexQueryBuilder = new ExtendedLuceneSearchBuilder(); private Boolean ourDisabled; @@ -89,54 +81,13 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc { super(); } - public ExtendedLuceneIndexData extractLuceneIndexData(FhirContext theContext, IBaseResource theResource, ResourceIndexedSearchParams theNewParams) { - ExtendedLuceneIndexData retVal = new ExtendedLuceneIndexData(myFhirContext); - - theNewParams.myStringParams.forEach(nextParam -> - retVal.addStringIndexData(nextParam.getParamName(), nextParam.getValueExact())); - - theNewParams.myTokenParams.forEach(nextParam -> - retVal.addTokenIndexData(nextParam.getParamName(), nextParam.getSystem(), nextParam.getValue())); - - if (!theNewParams.myLinks.isEmpty()) { - Map spNameToLinkMap = buildSpNameToLinkMap(theResource, theNewParams); - - spNameToLinkMap.entrySet() - .forEach(nextEntry -> { - ResourceLink resourceLink = nextEntry.getValue(); - String qualifiedTargetResourceId = resourceLink.getTargetResourceType() + "/" + resourceLink.getTargetResourceId(); - retVal.addResourceLinkIndexData(nextEntry.getKey(), qualifiedTargetResourceId); - }); - - } - return retVal; - } - - private Map buildSpNameToLinkMap(IBaseResource theResource, ResourceIndexedSearchParams theNewParams) { + public ExtendedLuceneIndexData extractLuceneIndexData(IBaseResource theResource, ResourceIndexedSearchParams theNewParams) { String resourceType = myFhirContext.getResourceType(theResource); - - Map paramNameToRuntimeParam = - theNewParams.getPopulatedResourceLinkParameters().stream() - .collect(Collectors.toMap( - (theParam) -> theParam, - (theParam) -> mySearchParamRegistry.getActiveSearchParam(resourceType, theParam))); - - Map paramNameToIndexedLink = new HashMap<>(); - for ( Map.Entry entry :paramNameToRuntimeParam.entrySet()) { - ResourceLink link = theNewParams.myLinks.stream().filter(resourceLink -> - entry.getValue().getPathsSplit().stream() - .anyMatch(path -> path.equalsIgnoreCase(resourceLink.getSourcePath()))) - .findFirst().orElse(null); - paramNameToIndexedLink.put(entry.getKey(), link); - } - return paramNameToIndexedLink; + Map activeSearchParams = mySearchParamRegistry.getActiveSearchParams(resourceType); + ExtendedLuceneIndexExtractor extractor = new ExtendedLuceneIndexExtractor(myFhirContext, activeSearchParams); + return extractor.extract(theNewParams); } - /** - * These params have complicated semantics, or are best resolved at the JPA layer for now. - */ - static final Set ourUnsafeSearchParmeters = Sets.newHashSet("_id", "_tag", "_meta"); - @Override public boolean supportsSomeOf(SearchParameterMap myParams) { // keep this in sync with the guts of doSearch @@ -144,56 +95,12 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc { requiresHibernateSearchAccess |= myDaoConfig.isAdvancedLuceneIndexing() && - myParams.entrySet().stream() - .filter(e -> !ourUnsafeSearchParmeters.contains(e.getKey())) - // each and clause may have a different modifier, so split down to the ORs - .flatMap(andList -> andList.getValue().stream()) - .flatMap(Collection::stream) - .anyMatch(this::isParamSupported); + myAdvancedIndexQueryBuilder.isSupportsSomeOf(myParams); return requiresHibernateSearchAccess; } - private boolean isParamSupported(IQueryParameterType param) { - String modifier = StringUtils.defaultString(param.getQueryParameterQualifier(), EMPTY_MODIFIER); - if (param instanceof TokenParam) { - switch (modifier) { - case Constants.PARAMQUALIFIER_TOKEN_TEXT: - case "": - // we support plain token and token:text - return true; - default: - return false; - } - } else if (param instanceof StringParam) { - switch (modifier) { - // we support string:text, string:contains, string:exact, and unmodified string. - case Constants.PARAMQUALIFIER_TOKEN_TEXT: - case Constants.PARAMQUALIFIER_STRING_EXACT: - case Constants.PARAMQUALIFIER_STRING_CONTAINS: - case EMPTY_MODIFIER: - return true; - default: - return false; - } - } else if (param instanceof QuantityParam) { - return false; - } else if (param instanceof ReferenceParam) { - //We cannot search by chain. - if (((ReferenceParam) param).getChain() != null) { - return false; - } - switch (modifier) { - case EMPTY_MODIFIER: - return true; - case Constants.PARAMQUALIFIER_MDM: - default: - return false; - } - } else { - return false; - } - } + private List doSearch(String theResourceType, SearchParameterMap theParams, ResourcePersistentId theReferencingPid) { @@ -207,7 +114,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc { ) .where( f -> f.bool(b -> { - HibernateSearchQueryBuilder builder = new HibernateSearchQueryBuilder(myFhirContext, b, f); + ExtendedLuceneClauseBuilder builder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f); /* * Handle _content parameter (resource body content) @@ -232,53 +139,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc { * Handle other supported parameters */ if (myDaoConfig.isAdvancedLuceneIndexing()) { - // copy the keys to avoid concurrent modification error - ArrayList paramNames = Lists.newArrayList(theParams.keySet()); - for(String nextParam: paramNames) { - if (ourUnsafeSearchParmeters.contains(nextParam)) { - continue; - } - RuntimeSearchParam activeParam = mySearchParamRegistry.getActiveSearchParam(theResourceType, nextParam); - if (activeParam == null) { - // ignore magic params handled in JPA - continue; - } - switch (activeParam.getParamType()) { - case TOKEN: - List> tokenTextAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_TOKEN_TEXT); - builder.addStringTextSearch(nextParam, tokenTextAndOrTerms); - - List> tokenUnmodifiedAndOrTerms = theParams.removeByNameUnmodified(nextParam); - builder.addTokenUnmodifiedSearch(nextParam, tokenUnmodifiedAndOrTerms); - - break; - case STRING: - List> stringTextAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_TOKEN_TEXT); - builder.addStringTextSearch(nextParam, stringTextAndOrTerms); - - List> stringExactAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_STRING_EXACT); - builder.addStringExactSearch(nextParam, stringExactAndOrTerms); - - List> stringContainsAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_STRING_CONTAINS); - builder.addStringContainsSearch(nextParam, stringContainsAndOrTerms); - - List> stringAndOrTerms = theParams.removeByNameUnmodified(nextParam); - builder.addStringUnmodifiedSearch(nextParam, stringAndOrTerms); - break; - - case QUANTITY: - break; - - case REFERENCE: - List> referenceAndOrTerms = theParams.removeByNameUnmodified(nextParam); - builder.addReferenceUnchainedSearch(nextParam, referenceAndOrTerms); - break; - - default: - // ignore unsupported param types/modifiers. They will be processed up in SearchBuilder. - } - } - + myAdvancedIndexQueryBuilder.addAndConsumeAdvancedQueryClauses(builder, theResourceType, theParams, mySearchParamRegistry); } //DROP EARLY HERE IF BOOL IS EMPTY? diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/IFulltextSearchSvc.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/IFulltextSearchSvc.java index 93284123642..6ef820cace0 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/IFulltextSearchSvc.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/IFulltextSearchSvc.java @@ -22,7 +22,6 @@ package ca.uhn.fhir.jpa.dao; import java.util.List; -import ca.uhn.fhir.context.FhirContext; import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData; import ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams; import ca.uhn.fhir.rest.api.server.storage.ResourcePersistentId; @@ -47,7 +46,7 @@ public interface IFulltextSearchSvc { boolean isDisabled(); - ExtendedLuceneIndexData extractLuceneIndexData(FhirContext theContext, IBaseResource theResource, ResourceIndexedSearchParams theNewParams); + ExtendedLuceneIndexData extractLuceneIndexData(IBaseResource theResource, ResourceIndexedSearchParams theNewParams); boolean supportsSomeOf(SearchParameterMap myParams); } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/HibernateSearchQueryBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java similarity index 97% rename from hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/HibernateSearchQueryBuilder.java rename to hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java index 525bfbf1d29..b80c5df895f 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/HibernateSearchQueryBuilder.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java @@ -1,4 +1,4 @@ -package ca.uhn.fhir.jpa.dao; +package ca.uhn.fhir.jpa.dao.search; import ca.uhn.fhir.context.FhirContext; import ca.uhn.fhir.model.api.IQueryParameterType; @@ -26,20 +26,19 @@ import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_TEXT; import static org.apache.commons.lang3.StringUtils.isNotBlank; -public class HibernateSearchQueryBuilder { - private static final Logger ourLog = LoggerFactory.getLogger(HibernateSearchQueryBuilder.class); +public class ExtendedLuceneClauseBuilder { + private static final Logger ourLog = LoggerFactory.getLogger(ExtendedLuceneClauseBuilder.class); final FhirContext myFhirContext; final SearchPredicateFactory myPredicateFactory; final BooleanPredicateClausesStep myRootClause; - public HibernateSearchQueryBuilder(FhirContext myFhirContext, BooleanPredicateClausesStep myRootClause, SearchPredicateFactory myPredicateFactory) { + public ExtendedLuceneClauseBuilder(FhirContext myFhirContext, BooleanPredicateClausesStep myRootClause, SearchPredicateFactory myPredicateFactory) { this.myFhirContext = myFhirContext; this.myRootClause = myRootClause; this.myPredicateFactory = myPredicateFactory; } - @Nonnull private Set extractOrStringParams(List nextAnd) { Set terms = new HashSet<>(); diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java new file mode 100644 index 00000000000..b8b15a2562e --- /dev/null +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java @@ -0,0 +1,68 @@ +package ca.uhn.fhir.jpa.dao.search; + +import ca.uhn.fhir.context.FhirContext; +import ca.uhn.fhir.context.RuntimeSearchParam; +import ca.uhn.fhir.jpa.model.entity.ResourceLink; +import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData; +import ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams; +import org.jetbrains.annotations.NotNull; + +import java.util.*; + +/** + * Extract search params for advanced lucene indexing. + * + * This class re-uses the extracted JPA entities to build an ExtendedLuceneIndexData instance. + */ +public class ExtendedLuceneIndexExtractor { + + private final FhirContext myContext; + private final Map myParams; + + public ExtendedLuceneIndexExtractor(FhirContext theContext, Map theActiveParams) { + myContext = theContext; + myParams = theActiveParams; + } + + @NotNull + public ExtendedLuceneIndexData extract(ResourceIndexedSearchParams theNewParams) { + // wip mb this is testable now. + ExtendedLuceneIndexData retVal = new ExtendedLuceneIndexData(myContext); + + theNewParams.myStringParams.forEach(nextParam -> + retVal.addStringIndexData(nextParam.getParamName(), nextParam.getValueExact())); + + theNewParams.myTokenParams.forEach(nextParam -> + retVal.addTokenIndexData(nextParam.getParamName(), nextParam.getSystem(), nextParam.getValue())); + + if (!theNewParams.myLinks.isEmpty()) { + + // awkwardly, links are shared between different search params if they use the same path, + // so we re-build the linkage. + // WIP MB is this the right design? Or should we follow JPA and share these? + Map> linkPathToParamName = new HashMap<>(); + for (String nextParamName : theNewParams.getPopulatedResourceLinkParameters()) { + RuntimeSearchParam sp = myParams.get(nextParamName); + List pathsSplit = sp.getPathsSplit(); + for (String nextPath : pathsSplit) { + // we want case-insensitive matching + nextPath = nextPath.toLowerCase(Locale.ROOT); + + linkPathToParamName + .computeIfAbsent(nextPath, (p) -> new ArrayList<>()) + .add(nextParamName); + } + } + + for (ResourceLink nextLink : theNewParams.getResourceLinks()) { + String insensitivePath = nextLink.getSourcePath().toLowerCase(Locale.ROOT); + List paramNames = linkPathToParamName.getOrDefault(insensitivePath, Collections.emptyList()); + for (String nextParamName : paramNames) { + String qualifiedTargetResourceId = nextLink.getTargetResourceType() + "/" + nextLink.getTargetResourceId(); + retVal.addResourceLinkIndexData(nextParamName, qualifiedTargetResourceId); + } + } + } + return retVal; + } +} diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneSearchBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneSearchBuilder.java new file mode 100644 index 00000000000..6c68485a571 --- /dev/null +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneSearchBuilder.java @@ -0,0 +1,141 @@ +package ca.uhn.fhir.jpa.dao.search; + +import ca.uhn.fhir.context.RuntimeSearchParam; +import ca.uhn.fhir.jpa.searchparam.SearchParameterMap; +import ca.uhn.fhir.model.api.IQueryParameterType; +import ca.uhn.fhir.rest.api.Constants; +import ca.uhn.fhir.rest.param.QuantityParam; +import ca.uhn.fhir.rest.param.ReferenceParam; +import ca.uhn.fhir.rest.param.StringParam; +import ca.uhn.fhir.rest.param.TokenParam; +import ca.uhn.fhir.rest.server.util.ISearchParamRegistry; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import org.apache.commons.lang3.StringUtils; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +/** + * Search builder for lucene/elastic for token, string, and reference parameters. + */ +public class ExtendedLuceneSearchBuilder { + public static final String EMPTY_MODIFIER = ""; + + /** + * These params have complicated semantics, or are best resolved at the JPA layer for now. + */ + public static final Set ourUnsafeSearchParmeters = Sets.newHashSet("_id", "_tag", "_meta"); + + /** + * Are any of the queries supported by our indexing? + */ + public boolean isSupportsSomeOf(SearchParameterMap myParams) { + return + myParams.entrySet().stream() + .filter(e -> !ourUnsafeSearchParmeters.contains(e.getKey())) + // each and clause may have a different modifier, so split down to the ORs + .flatMap(andList -> andList.getValue().stream()) + .flatMap(Collection::stream) + .anyMatch(this::isParamSupported); + } + + /** + * Do we support this query param type+modifier? + * + * NOTE - keep this in sync with addAndConsumeAdvancedQueryClauses() below. + */ + private boolean isParamSupported(IQueryParameterType param) { + String modifier = StringUtils.defaultString(param.getQueryParameterQualifier(), EMPTY_MODIFIER); + if (param instanceof TokenParam) { + switch (modifier) { + case Constants.PARAMQUALIFIER_TOKEN_TEXT: + case "": + // we support plain token and token:text + return true; + default: + return false; + } + } else if (param instanceof StringParam) { + switch (modifier) { + // we support string:text, string:contains, string:exact, and unmodified string. + case Constants.PARAMQUALIFIER_TOKEN_TEXT: + case Constants.PARAMQUALIFIER_STRING_EXACT: + case Constants.PARAMQUALIFIER_STRING_CONTAINS: + case EMPTY_MODIFIER: + return true; + default: + return false; + } + } else if (param instanceof QuantityParam) { + return false; + } else if (param instanceof ReferenceParam) { + //We cannot search by chain. + if (((ReferenceParam) param).getChain() != null) { + return false; + } + switch (modifier) { + case EMPTY_MODIFIER: + return true; + case Constants.PARAMQUALIFIER_MDM: + default: + return false; + } + } else { + return false; + } + } + + public void addAndConsumeAdvancedQueryClauses(ExtendedLuceneClauseBuilder builder, String theResourceType, SearchParameterMap theParams, ISearchParamRegistry theSearchParamRegistry) { + // copy the keys to avoid concurrent modification error + ArrayList paramNames = Lists.newArrayList(theParams.keySet()); + for (String nextParam : paramNames) { + if (ourUnsafeSearchParmeters.contains(nextParam)) { + continue; + } + RuntimeSearchParam activeParam = theSearchParamRegistry.getActiveSearchParam(theResourceType, nextParam); + if (activeParam == null) { + // ignore magic params handled in JPA + continue; + } + + // NOTE - keep this in sync with isParamSupported() above. + switch (activeParam.getParamType()) { + case TOKEN: + List> tokenTextAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_TOKEN_TEXT); + builder.addStringTextSearch(nextParam, tokenTextAndOrTerms); + + List> tokenUnmodifiedAndOrTerms = theParams.removeByNameUnmodified(nextParam); + builder.addTokenUnmodifiedSearch(nextParam, tokenUnmodifiedAndOrTerms); + + break; + case STRING: + List> stringTextAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_TOKEN_TEXT); + builder.addStringTextSearch(nextParam, stringTextAndOrTerms); + + List> stringExactAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_STRING_EXACT); + builder.addStringExactSearch(nextParam, stringExactAndOrTerms); + + List> stringContainsAndOrTerms = theParams.removeByNameAndModifier(nextParam, Constants.PARAMQUALIFIER_STRING_CONTAINS); + builder.addStringContainsSearch(nextParam, stringContainsAndOrTerms); + + List> stringAndOrTerms = theParams.removeByNameUnmodified(nextParam); + builder.addStringUnmodifiedSearch(nextParam, stringAndOrTerms); + break; + + case QUANTITY: + break; + + case REFERENCE: + List> referenceAndOrTerms = theParams.removeByNameUnmodified(nextParam); + builder.addReferenceUnchainedSearch(nextParam, referenceAndOrTerms); + break; + + default: + // ignore unsupported param types/modifiers. They will be processed up in SearchBuilder. + } + } + } +} diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java new file mode 100644 index 00000000000..3d31547f9e0 --- /dev/null +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java @@ -0,0 +1,21 @@ +/** + * Extended fhir indexing for Hibernate Search using Lucene/Elasticsearch. + * + * By default, Lucene indexing only provides support for _text, and _content search parameters using + * {@link ca.uhn.fhir.jpa.model.entity.ResourceTable#myNarrativeText} and + * {@link ca.uhn.fhir.jpa.model.entity.ResourceTable#myContentText}. + * + * Both {@link ca.uhn.fhir.jpa.search.builder.SearchBuilder} and {@link ca.uhn.fhir.jpa.dao.LegacySearchBuilder} delegate the + * search to {@link ca.uhn.fhir.jpa.dao.FulltextSearchSvcImpl} when active. + * The fulltext search runs first and interprets any search parameters it understands, returning a pid list. + * This pid list is used as a narrowing where clause against the remaining unprocessed search parameters. + * + * This package extends this search to support token, string, and reference parameters via {@link ca.uhn.fhir.jpa.model.entity.ResourceTable#myLuceneIndexData}. + * When active, the extracted search parameters which are written to the HFJ_SPIDX_* tables are also written to the Lucene index document. + * + * @see ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter + * @see ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData + * + * Activated by {@link ca.uhn.fhir.jpa.api.config.DaoConfig#setAdvancedLuceneIndexing(boolean)}. + */ +package ca.uhn.fhir.jpa.dao.search; diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/builder/SearchBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/builder/SearchBuilder.java index 79c1908260b..6e5faae5713 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/builder/SearchBuilder.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/builder/SearchBuilder.java @@ -310,7 +310,7 @@ public class SearchBuilder implements ISearchBuilder { List pids = new ArrayList<>(); - if (requiresHibernateSearchAccess()) { + if (checkUseHibernateSearch()) { if (myParams.isLastN()) { pids = executeLastNAgainstIndex(theMaximumResults); } else { @@ -345,22 +345,28 @@ public class SearchBuilder implements ISearchBuilder { return queries; } - private boolean requiresHibernateSearchAccess() { - boolean result = (myFulltextSearchSvc != null) && - !myFulltextSearchSvc.isDisabled() && - myFulltextSearchSvc.supportsSomeOf(myParams); + /** + * Check to see if query should use Hibernate Search, and error if the query can't continue. + * + * @return true if the query should first be processed by Hibernate Search + * @throws InvalidRequestException if fulltext search is not enabled but the query requires it - _content or _text + */ + private boolean checkUseHibernateSearch() { + boolean fulltextEnabled = (myFulltextSearchSvc != null) && !myFulltextSearchSvc.isDisabled(); - if (myParams.containsKey(Constants.PARAM_CONTENT) || myParams.containsKey(Constants.PARAM_TEXT)) { - if (myFulltextSearchSvc == null || myFulltextSearchSvc.isDisabled()) { - if (myParams.containsKey(Constants.PARAM_TEXT)) { - throw new InvalidRequestException("Fulltext search is not enabled on this service, can not process parameter: " + Constants.PARAM_TEXT); - } else if (myParams.containsKey(Constants.PARAM_CONTENT)) { - throw new InvalidRequestException("Fulltext search is not enabled on this service, can not process parameter: " + Constants.PARAM_CONTENT); - } - } + if (!fulltextEnabled) { + failIfUsed(Constants.PARAM_TEXT); + failIfUsed(Constants.PARAM_CONTENT); } - return result; + // TODO MB someday we'll want a query planner to figure out if we _should_ use the ft index, not just if we can. + return fulltextEnabled && myFulltextSearchSvc.supportsSomeOf(myParams); + } + + private void failIfUsed(String theParamName) { + if (myParams.containsKey(theParamName)) { + throw new InvalidRequestException("Fulltext search is not enabled on this service, can not process parameter: " + theParamName); + } } private List executeLastNAgainstIndex(Integer theMaximumResults) { diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestR4ConfigWithElasticSearch.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestR4ConfigWithElasticSearch.java index f6590f54eee..0c8939d7baa 100644 --- a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestR4ConfigWithElasticSearch.java +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestR4ConfigWithElasticSearch.java @@ -35,7 +35,7 @@ public class TestR4ConfigWithElasticSearch extends TestR4Config { int httpPort = elasticContainer().getMappedPort(9200);//9200 is the HTTP port String host = elasticContainer().getHost(); - ourLog.warn("Hibernate Search: using elasticsearch - host {} {}", host, httpPort); + ourLog.info("Hibernate Search: using elasticsearch - host {} {}", host, httpPort); new ElasticsearchHibernatePropertiesBuilder() .setDebugIndexSyncStrategy("read-sync")