diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java index 53e3eaafab8..6f5fe871238 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java @@ -33,6 +33,7 @@ import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData; import ca.uhn.fhir.jpa.search.autocomplete.ValueSetAutocompleteOptions; import ca.uhn.fhir.jpa.search.autocomplete.ValueSetAutocompleteSearch; import ca.uhn.fhir.jpa.searchparam.SearchParameterMap; +import ca.uhn.fhir.jpa.searchparam.extractor.ISearchParamExtractor; import ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams; import ca.uhn.fhir.model.api.IQueryParameterType; import ca.uhn.fhir.rest.api.Constants; @@ -75,6 +76,8 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc { private ISearchParamRegistry mySearchParamRegistry; @Autowired private DaoConfig myDaoConfig; + @Autowired + ISearchParamExtractor mySearchParamExtractor; final private ExtendedLuceneSearchBuilder myAdvancedIndexQueryBuilder = new ExtendedLuceneSearchBuilder(); private Boolean ourDisabled; @@ -89,8 +92,8 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc { public ExtendedLuceneIndexData extractLuceneIndexData(IBaseResource theResource, ResourceIndexedSearchParams theNewParams) { String resourceType = myFhirContext.getResourceType(theResource); Map activeSearchParams = mySearchParamRegistry.getActiveSearchParams(resourceType); - ExtendedLuceneIndexExtractor extractor = new ExtendedLuceneIndexExtractor(myFhirContext, activeSearchParams); - return extractor.extract(theNewParams); + ExtendedLuceneIndexExtractor extractor = new ExtendedLuceneIndexExtractor(myFhirContext, activeSearchParams, mySearchParamExtractor); + return extractor.extract(theResource,theNewParams); } @Override diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java index 26bdf3b461a..f1955d906bc 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java @@ -20,8 +20,8 @@ package ca.uhn.fhir.jpa.dao.search; * #L% */ -import ca.uhn.fhir.i18n.Msg; import ca.uhn.fhir.context.FhirContext; +import ca.uhn.fhir.i18n.Msg; import ca.uhn.fhir.model.api.IQueryParameterType; import ca.uhn.fhir.model.api.TemporalPrecisionEnum; import ca.uhn.fhir.rest.api.Constants; @@ -46,7 +46,6 @@ import org.slf4j.LoggerFactory; import javax.annotation.Nonnull; import java.time.Instant; import java.util.Arrays; -import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -57,6 +56,7 @@ import java.util.stream.Collectors; import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_EXACT; import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_NORMALIZED; import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_TEXT; +import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.SEARCH_PARAM_ROOT; import static org.apache.commons.lang3.StringUtils.isNotBlank; public class ExtendedLuceneClauseBuilder { @@ -144,7 +144,7 @@ public class ExtendedLuceneClauseBuilder { return myPredicateFactory.match().field(getTokenCodeFieldPath(theSearchParamName)).matching(token.getValue()); } else if (StringUtils.isBlank(token.getValue())) { // system without value - return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".system").matching(token.getSystem()); + return myPredicateFactory.match().field(SEARCH_PARAM_ROOT + "." + theSearchParamName + ".token" + ".system").matching(token.getSystem()); } else { // system + value return myPredicateFactory.match().field(getTokenSystemCodeFieldPath(theSearchParamName)).matching(token.getValueAsQueryToken(this.myFhirContext)); @@ -167,12 +167,12 @@ public class ExtendedLuceneClauseBuilder { @Nonnull public static String getTokenCodeFieldPath(String theSearchParamName) { - return "sp." + theSearchParamName + ".token" + ".code"; + return SEARCH_PARAM_ROOT + "." + theSearchParamName + ".token" + ".code"; } @Nonnull public static String getTokenSystemCodeFieldPath(@Nonnull String theSearchParamName) { - return "sp." + theSearchParamName + ".token" + ".code-system"; + return SEARCH_PARAM_ROOT + "." + theSearchParamName + ".token" + ".code-system"; } public void addStringTextSearch(String theSearchParamName, List> stringAndOrTerms) { @@ -190,7 +190,7 @@ public class ExtendedLuceneClauseBuilder { fieldName = "myNarrativeText"; break; default: - fieldName = "sp." + theSearchParamName + ".string." + IDX_STRING_TEXT; + fieldName = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".string." + IDX_STRING_TEXT; break; } @@ -213,7 +213,7 @@ public class ExtendedLuceneClauseBuilder { } public void addStringExactSearch(String theSearchParamName, List> theStringAndOrTerms) { - String fieldPath = "sp." + theSearchParamName + ".string." + IDX_STRING_EXACT; + String fieldPath = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".string." + IDX_STRING_EXACT; for (List nextAnd : theStringAndOrTerms) { Set terms = extractOrStringParams(nextAnd); @@ -227,7 +227,7 @@ public class ExtendedLuceneClauseBuilder { } public void addStringContainsSearch(String theSearchParamName, List> theStringAndOrTerms) { - String fieldPath = "sp." + theSearchParamName + ".string." + IDX_STRING_NORMALIZED; + String fieldPath = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".string." + IDX_STRING_NORMALIZED; for (List nextAnd : theStringAndOrTerms) { Set terms = extractOrStringParams(nextAnd); ourLog.debug("addStringContainsSearch {} {}", theSearchParamName, terms); @@ -241,7 +241,7 @@ public class ExtendedLuceneClauseBuilder { } public void addStringUnmodifiedSearch(String theSearchParamName, List> theStringAndOrTerms) { - String fieldPath = "sp." + theSearchParamName + ".string." + IDX_STRING_NORMALIZED; + String fieldPath = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".string." + IDX_STRING_NORMALIZED; for (List nextAnd : theStringAndOrTerms) { Set terms = extractOrStringParams(nextAnd); ourLog.debug("addStringUnmodifiedSearch {} {}", theSearchParamName, terms); @@ -255,7 +255,7 @@ public class ExtendedLuceneClauseBuilder { } public void addReferenceUnchainedSearch(String theSearchParamName, List> theReferenceAndOrTerms) { - String fieldPath = "sp." + theSearchParamName + ".reference.value"; + String fieldPath = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".reference.value"; for (List nextAnd : theReferenceAndOrTerms) { Set terms = extractOrStringParams(nextAnd); ourLog.trace("reference unchained search {}", terms); @@ -362,8 +362,8 @@ public class ExtendedLuceneClauseBuilder { } private PredicateFinalStep generateDateOrdinalSearchTerms(String theSearchParamName, DateParam theDateParam) { - String lowerOrdinalField = "sp." + theSearchParamName + ".dt.lower-ord"; - String upperOrdinalField = "sp." + theSearchParamName + ".dt.upper-ord"; + String lowerOrdinalField = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".dt.lower-ord"; + String upperOrdinalField = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".dt.upper-ord"; int lowerBoundAsOrdinal; int upperBoundAsOrdinal; ParamPrefixEnum prefix = theDateParam.getPrefix(); @@ -411,8 +411,8 @@ public class ExtendedLuceneClauseBuilder { } private PredicateFinalStep generateDateInstantSearchTerms(String theSearchParamName, DateParam theDateParam) { - String lowerInstantField = "sp." + theSearchParamName + ".dt.lower"; - String upperInstantField = "sp." + theSearchParamName + ".dt.upper"; + String lowerInstantField = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".dt.lower"; + String upperInstantField = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".dt.upper"; ParamPrefixEnum prefix = theDateParam.getPrefix(); if (ParamPrefixEnum.NOT_EQUAL == prefix) { diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java index 075d541a4ca..552709fc219 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneIndexExtractor.java @@ -24,7 +24,12 @@ import ca.uhn.fhir.context.FhirContext; import ca.uhn.fhir.context.RuntimeSearchParam; import ca.uhn.fhir.jpa.model.entity.ResourceLink; import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData; +import ca.uhn.fhir.jpa.searchparam.extractor.ISearchParamExtractor; import ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams; +import ca.uhn.fhir.rest.api.RestSearchParameterTypeEnum; +import org.hl7.fhir.instance.model.api.IBase; +import org.hl7.fhir.instance.model.api.IBaseCoding; +import org.hl7.fhir.instance.model.api.IBaseResource; import org.jetbrains.annotations.NotNull; import java.util.ArrayList; @@ -43,21 +48,25 @@ public class ExtendedLuceneIndexExtractor { private final FhirContext myContext; private final Map myParams; + private final ISearchParamExtractor mySearchParamExtractor; - public ExtendedLuceneIndexExtractor(FhirContext theContext, Map theActiveParams) { + public ExtendedLuceneIndexExtractor(FhirContext theContext, Map theActiveParams, ISearchParamExtractor theSearchParamExtractor) { myContext = theContext; myParams = theActiveParams; + mySearchParamExtractor = theSearchParamExtractor; } @NotNull - public ExtendedLuceneIndexData extract(ResourceIndexedSearchParams theNewParams) { + public ExtendedLuceneIndexData extract(IBaseResource theResource, ResourceIndexedSearchParams theNewParams) { ExtendedLuceneIndexData retVal = new ExtendedLuceneIndexData(myContext); + extractAutocompleteTokens(theResource, retVal); + theNewParams.myStringParams.forEach(nextParam -> retVal.addStringIndexData(nextParam.getParamName(), nextParam.getValueExact())); theNewParams.myTokenParams.forEach(nextParam -> - retVal.addTokenIndexData(nextParam.getParamName(), nextParam.getSystem(), nextParam.getValue())); + retVal.addTokenIndexDataIfNotPresent(nextParam.getParamName(), nextParam.getSystem(), nextParam.getValue())); theNewParams.myDateParams.forEach(nextParam -> retVal.addDateIndexData(nextParam.getParamName(), nextParam.getValueLow(), nextParam.getValueLowDateOrdinal(), @@ -93,4 +102,53 @@ public class ExtendedLuceneIndexExtractor { return retVal; } + + /** + * Re-extract token parameters so we can distinguish + */ + private void extractAutocompleteTokens(IBaseResource theResource, ExtendedLuceneIndexData theRetVal) { + // we need to re-index token params to match up display with codes. + myParams.values().stream() + .filter(p->p.getParamType() == RestSearchParameterTypeEnum.TOKEN) + // TODO it would be nice to reuse TokenExtractor + .forEach(p-> mySearchParamExtractor.extractValues(p.getPath(), theResource) + .stream() + .forEach(nextValue->indexTokenValue(theRetVal, p, nextValue) + )); + } + + private void indexTokenValue(ExtendedLuceneIndexData theRetVal, RuntimeSearchParam p, IBase nextValue) { + String nextType = mySearchParamExtractor.toRootTypeName(nextValue); + String spName = p.getName(); + switch (nextType) { + case "CodeableConcept": + addToken_CodeableConcept(theRetVal, spName, nextValue); + break; + case "Coding": + addToken_Coding(theRetVal, spName, (IBaseCoding) nextValue); + break; + // TODO share this with TokenExtractor and introduce a ITokenIndexer interface. + // Ignore unknown types for now. + // This is just for autocomplete, and we are focused on Observation.code, category, combo-code, etc. +// case "Identifier": +// mySearchParamExtractor.addToken_Identifier(myResourceTypeName, params, searchParam, value); +// break; +// case "ContactPoint": +// mySearchParamExtractor.addToken_ContactPoint(myResourceTypeName, params, searchParam, value); +// break; + default: + break; + } + } + + private void addToken_CodeableConcept(ExtendedLuceneIndexData theRetVal, String theSpName, IBase theValue) { + List codings = mySearchParamExtractor.getCodingsFromCodeableConcept(theValue); + for (IBase nextCoding : codings) { + addToken_Coding(theRetVal, theSpName, (IBaseCoding) nextCoding); + } + } + + private void addToken_Coding(ExtendedLuceneIndexData theRetVal, String theSpName, IBaseCoding theNextValue) { + theRetVal.addTokenIndexData(theSpName, theNextValue); + } } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/LastNAggregation.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/LastNAggregation.java index 239e3e7a13f..f43722745cc 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/LastNAggregation.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/LastNAggregation.java @@ -23,7 +23,6 @@ package ca.uhn.fhir.jpa.dao.search; import com.google.gson.Gson; import com.google.gson.JsonArray; import com.google.gson.JsonObject; -import org.hibernate.search.engine.search.aggregation.AggregationKey; import javax.annotation.Nonnull; import java.util.List; @@ -31,13 +30,15 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; +import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.SEARCH_PARAM_ROOT; + /** * Builds lastN aggregation, and parse the results */ public class LastNAggregation { - static final String SP_SUBJECT = "sp.subject.reference.value"; - private static final String SP_CODE_TOKEN_CODE_AND_SYSTEM = "sp.code.token.code-system"; - private static final String SP_DATE_DT_UPPER = "sp.date.dt.upper"; + static final String SP_SUBJECT = SEARCH_PARAM_ROOT + ".subject.reference.value"; + private static final String SP_CODE_TOKEN_CODE_AND_SYSTEM = SEARCH_PARAM_ROOT + ".code.token.code-system"; + private static final String SP_DATE_DT_UPPER = SEARCH_PARAM_ROOT + ".date.dt.upper"; private static final String GROUP_BY_CODE_SYSTEM_SUB_AGGREGATION = "group_by_code_system"; private static final String MOST_RECENT_EFFECTIVE_SUB_AGGREGATION = "most_recent_effective"; diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java index f67a9fe77c2..826bcd700b8 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/package-info.java @@ -7,7 +7,7 @@ * This package extends this search to support token, string, and reference parameters via {@link ca.uhn.fhir.jpa.model.entity.ResourceTable#myLuceneIndexData}. * When active, the extracted search parameters which are written to the HFJ_SPIDX_* tables are also written to the Lucene index document. * For now, we use the existing JPA index entities to populate the {@link ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData} - * in {@link ca.uhn.fhir.jpa.dao.search.ExtendedLuceneIndexExtractor#extract(ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams)} ()} + * in {@link ca.uhn.fhir.jpa.dao.search.ExtendedLuceneIndexExtractor#extract(org.hl7.fhir.instance.model.api.IBaseResource, ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams)} ()} * *

Implementation

* Both {@link ca.uhn.fhir.jpa.search.builder.SearchBuilder} and {@link ca.uhn.fhir.jpa.dao.LegacySearchBuilder} delegate the diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/RawElasticJsonBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/RawElasticJsonBuilder.java new file mode 100644 index 00000000000..dcb373f7236 --- /dev/null +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/RawElasticJsonBuilder.java @@ -0,0 +1,40 @@ +package ca.uhn.fhir.jpa.search.autocomplete; + +import com.google.gson.JsonObject; +import org.apache.commons.lang3.Validate; + +import javax.annotation.Nonnull; + +public class RawElasticJsonBuilder { + @Nonnull + static JsonObject makeMatchBoolPrefixPredicate(String theFieldName, String queryText) { + + JsonObject matchBoolBody = new JsonObject(); + matchBoolBody.addProperty(theFieldName, queryText); + + JsonObject predicate = new JsonObject(); + predicate.add("match_bool_prefix", matchBoolBody); + return predicate; + } + + public static JsonObject makeWildcardPredicate(String theFieldName, String theQueryText) { + Validate.notEmpty(theQueryText); + + JsonObject params = new JsonObject(); + params.addProperty("value", theQueryText); + + JsonObject wildcardBody = new JsonObject(); + wildcardBody.add(theFieldName, params); + + JsonObject predicate = new JsonObject(); + predicate.add("wildcard", wildcardBody); + return predicate; + } + + @Nonnull + public static JsonObject makeMatchAllPredicate() { + JsonObject o = new JsonObject(); + o.add("match_all", new JsonObject()); + return o; + } +} diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteAggregation.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteAggregation.java index 9e6c60ecf40..4ff2ffe3298 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteAggregation.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteAggregation.java @@ -20,10 +20,8 @@ package ca.uhn.fhir.jpa.search.autocomplete; * #L% */ -import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneClauseBuilder; import com.google.gson.Gson; import com.google.gson.JsonArray; -import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.jayway.jsonpath.Configuration; import com.jayway.jsonpath.DocumentContext; @@ -31,6 +29,7 @@ import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.ParseContext; import com.jayway.jsonpath.spi.json.GsonJsonProvider; import com.jayway.jsonpath.spi.mapper.GsonMappingProvider; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import javax.annotation.Nonnull; @@ -38,33 +37,59 @@ import java.util.List; import java.util.stream.Collectors; import java.util.stream.StreamSupport; +import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_TEXT; +import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.NESTED_SEARCH_PARAM_ROOT; + /** * Compose the autocomplete aggregation, and parse the results. */ class TokenAutocompleteAggregation { - static final String NESTED_AGG_NAME = "nestedTopNAgg"; /** * Aggregation template json. * * https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html */ static final JsonObject AGGREGATION_TEMPLATE = - new Gson().fromJson("{\n" + - " \"terms\": {\n" + - " \"field\": \"sp.TEMPLATE_DUMMY.token.code-system\",\n" + - " \"size\": 30,\n" + - " \"min_doc_count\": 1\n" + - " },\n" + - " \"aggs\": {\n" + - " \"" + NESTED_AGG_NAME + "\": {\n" + - " \"top_hits\": {\n" + - " \"_source\": {\n" + - " \"includes\": [ \"sp.TEMPLATE_DUMMY\" ]\n" + - " },\n" + - " \"size\": 1\n" + - " }\n" + - " }\n" + - " }}", JsonObject.class); + new Gson().fromJson("" + + " {" + + " \"nested\": { \"path\": \"nsp.PLACEHOLDER\" }," + + " \"aggs\": {" + + " \"search\": {" + + " \"filter\": {" + + " \"bool\": {" + + " \"must\": [" + + " { \"match_bool_prefix\":" + + " { \"nsp.PLACEHOLDER.string.text\": {" + + " \"query\": \"Mors\"}" + + " }" + + " }" + + " ]" + + " }" + + " }," + + " \"aggs\": {" + + " \"group_by_token\": {" + + " \"terms\": {" + + " \"field\": \"nsp.PLACEHOLDER.token.code-system\"," + + " \"size\": 30," + + " \"min_doc_count\": 1," + + " \"shard_min_doc_count\": 0," + + " \"show_term_doc_count_error\": false" + + " }," + + " \"aggs\": {" + + " \"top_tags_hits\": {" + + " \"top_hits\": {" + + " \"_source\": {" + + " \"includes\": [ \"nsp.PLACEHOLDER\" ]" + + " }," + + " \"size\": 1" + + " }" + + " }" + + " }" + + " }" + + " }" + + " }" + + " }" + + " }", JsonObject.class); static final Configuration configuration = Configuration .builder() @@ -75,12 +100,28 @@ class TokenAutocompleteAggregation { private final String mySpName; private final int myCount; + private final JsonObject mySearch; - public TokenAutocompleteAggregation(String theSpName, int theCount) { + public TokenAutocompleteAggregation(String theSpName, int theCount, String theSearchText, String theSearchModifier) { Validate.notEmpty(theSpName); Validate.isTrue(theCount>0, "count must be positive"); + Validate.isTrue("text".equalsIgnoreCase(theSearchModifier) || "".equals(theSearchModifier) || theSearchModifier == null, "Unsupported search modifier " + theSearchModifier); mySpName = theSpName; myCount = theCount; + mySearch = makeSearch(theSearchText, theSearchModifier); + } + + private JsonObject makeSearch(String theSearchText, String theSearchModifier) { + theSearchText = StringUtils.defaultString(theSearchText); + theSearchModifier = StringUtils.defaultString(theSearchModifier); + + if (StringUtils.isEmpty(theSearchText)) { + return RawElasticJsonBuilder.makeMatchAllPredicate(); + } else if ("text".equalsIgnoreCase(theSearchModifier)) { + return RawElasticJsonBuilder.makeMatchBoolPrefixPredicate(NESTED_SEARCH_PARAM_ROOT + "." + mySpName + ".string." + IDX_STRING_TEXT, theSearchText); + } else { + return RawElasticJsonBuilder.makeWildcardPredicate(NESTED_SEARCH_PARAM_ROOT + "." + mySpName + ".token.code", theSearchText + "*"); + } } /** @@ -92,9 +133,12 @@ class TokenAutocompleteAggregation { // clone and modify the template with the actual field names. JsonObject result = AGGREGATION_TEMPLATE.deepCopy(); DocumentContext documentContext = parseContext.parse(result); - documentContext.set("terms.field", ExtendedLuceneClauseBuilder.getTokenSystemCodeFieldPath(mySpName)); - documentContext.set("terms.size", myCount); - documentContext.set("aggs." + NESTED_AGG_NAME + ".top_hits._source.includes[0]","sp." + mySpName); + String nestedSearchParamPath = NESTED_SEARCH_PARAM_ROOT + "." + mySpName; + documentContext.set("nested.path", nestedSearchParamPath); + documentContext.set("aggs.search.filter.bool.must[0]", mySearch); + documentContext.set("aggs.search.aggs.group_by_token.terms.field", NESTED_SEARCH_PARAM_ROOT + "." + mySpName + ".token" + ".code-system"); + documentContext.set("aggs.search.aggs.group_by_token.terms.size", myCount); + documentContext.set("aggs.search.aggs.group_by_token.aggs.top_tags_hits.top_hits._source.includes[0]", nestedSearchParamPath); return result; } @@ -108,7 +152,11 @@ class TokenAutocompleteAggregation { List extractResults(@Nonnull JsonObject theAggregationResult) { Validate.notNull(theAggregationResult); - JsonArray buckets = theAggregationResult.getAsJsonArray("buckets"); + JsonArray buckets = theAggregationResult + .getAsJsonObject("search") + .getAsJsonObject("group_by_token") + .getAsJsonArray("buckets"); + List result = StreamSupport.stream(buckets.spliterator(), false) .map(b-> bucketToEntry((JsonObject) b)) .collect(Collectors.toList()); @@ -129,16 +177,7 @@ class TokenAutocompleteAggregation { String bucketKey = documentContext.read("key", String.class); // The inner bucket has a hits array, and we only need the first. - JsonObject spRootNode = documentContext.read(NESTED_AGG_NAME + ".hits.hits[0]._source.sp"); - // MB - JsonPath doesn't have placeholders, and I don't want to screw-up quoting mySpName, so read the JsonObject explicitly - JsonObject spNode = spRootNode.getAsJsonObject(mySpName); - JsonElement exactNode = spNode.get("string").getAsJsonObject().get("exact"); - String displayText; - if (exactNode.isJsonArray()) { - displayText = exactNode.getAsJsonArray().get(0).getAsString(); - } else { - displayText = exactNode.getAsString(); - } + String displayText = documentContext.read("top_tags_hits.hits.hits[0]._source.string.text", String.class); return new TokenAutocompleteHit(bucketKey,displayText); } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteHit.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteHit.java index f55497b6b59..e37467e06d0 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteHit.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteHit.java @@ -21,6 +21,7 @@ package ca.uhn.fhir.jpa.search.autocomplete; */ import org.apache.commons.lang3.Validate; +import org.apache.commons.lang3.builder.ToStringBuilder; import javax.annotation.Nonnull; @@ -42,4 +43,12 @@ class TokenAutocompleteHit { public String getSystemCode() { return mySystemCode; } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("mySystemCode", mySystemCode) + .append("myDisplayText", myDisplayText) + .toString(); + } } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteSearch.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteSearch.java index d7a19dd8358..c7b56949803 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteSearch.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteSearch.java @@ -21,18 +21,12 @@ package ca.uhn.fhir.jpa.search.autocomplete; */ import ca.uhn.fhir.context.FhirContext; -import ca.uhn.fhir.i18n.Msg; import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneClauseBuilder; import ca.uhn.fhir.jpa.model.entity.ResourceTable; -import ca.uhn.fhir.model.api.IQueryParameterType; -import ca.uhn.fhir.rest.param.StringParam; import com.google.gson.JsonObject; -import org.apache.commons.lang3.StringUtils; import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension; import org.hibernate.search.engine.search.aggregation.AggregationKey; import org.hibernate.search.engine.search.aggregation.SearchAggregation; -import org.hibernate.search.engine.search.predicate.dsl.BooleanPredicateClausesStep; -import org.hibernate.search.engine.search.predicate.dsl.SearchPredicateFactory; import org.hibernate.search.engine.search.query.SearchResult; import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep; import org.hibernate.search.mapper.orm.search.loading.dsl.SearchLoadingOptionsStep; @@ -41,7 +35,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nonnull; -import java.util.Collections; import java.util.List; import static org.apache.commons.lang3.StringUtils.isNotBlank; @@ -63,23 +56,28 @@ class TokenAutocompleteSearch { /** - * Search for tokens indexed by theSPName on theResourceType matching theSearchText. - * @param theResourceType The resource type (e.g. Observation) + * Search for tokens indexed by theSPName on theResourceName matching theSearchText. + * @param theResourceName The resource type (e.g. Observation) * @param theSPName The search param code (e.g. combo-code) * @param theSearchText The search test (e.g. "bloo") * @return A collection of Coding elements */ @Nonnull - public List search(String theResourceType, String theSPName, String theSearchText, String theSearchModifier, int theCount) { + public List search(String theResourceName, String theSPName, String theSearchText, String theSearchModifier, int theCount) { - ourLog.trace("search: {}?{}:{}={}", theResourceType,theSPName, theSearchModifier, theSearchText); - - TokenAutocompleteAggregation tokenAutocompleteAggregation = new TokenAutocompleteAggregation(theSPName, theCount); + TokenAutocompleteAggregation tokenAutocompleteAggregation = + new TokenAutocompleteAggregation(theSPName, theCount, theSearchText, theSearchModifier); // compose the query json SearchQueryOptionsStep query = mySession.search(ResourceTable.class) - .where(f -> f.bool(b -> - buildQueryPredicate(b, f, theResourceType, theSPName, theSearchModifier, theSearchText))) + .where(predFactory -> predFactory.bool(boolBuilder -> { + ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, boolBuilder, predFactory); + + // we apply resource-level predicates here, at the top level + if (isNotBlank(theResourceName)) { + clauseBuilder.addResourceTypeClause(theResourceName); + } + })) .aggregation(AGGREGATION_KEY, buildAggregation(tokenAutocompleteAggregation)); // run the query, but with 0 results. We only care about the aggregations. @@ -92,39 +90,6 @@ class TokenAutocompleteSearch { return aggEntries; } - void buildQueryPredicate(BooleanPredicateClausesStep b, SearchPredicateFactory f, String theResourceType, String theSPName, String theSearchModifier, String theSearchText) { - ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f); - - if (isNotBlank(theResourceType)) { - clauseBuilder.addResourceTypeClause(theResourceType); - } - - String queryText = StringUtils.defaultString(theSearchText, ""); - if (StringUtils.isNotEmpty(queryText)) { - switch (StringUtils.defaultString(theSearchModifier)) { - case "text": - // Add a wildcard to act like match_bool_prefix - // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-bool-prefix-query.html - queryText = queryText + "*"; - StringParam stringParam = new StringParam(queryText); - List> andOrTerms = Collections.singletonList(Collections.singletonList(stringParam)); - clauseBuilder.addStringTextSearch(theSPName, andOrTerms); - break; - case "": - b.must( - // use wildcard to allow matching prefix of keyword indexed field. - f.wildcard() - .field(ExtendedLuceneClauseBuilder.getTokenCodeFieldPath(theSPName)) - .matching(queryText + "*") - .toPredicate()); - break; - default: - throw new IllegalArgumentException(Msg.code(2034) + "Autocomplete only accepts text search for now."); - } - - } - } - /** * Hibernate-search doesn't support nested aggregations, so we use an extension to build what we need from raw JSON. */ diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/package-info.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/package-info.java index 338ceba1708..07148a71b14 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/package-info.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/package-info.java @@ -5,13 +5,8 @@ * * This work depends on on the Hibernate Search infrastructure in {@link ca.uhn.fhir.jpa.dao.search}. * - * NIH sponsored this work to provide an interactive-autocomplete when browsing codes in a research dataset. + * Provides an interactive-autocomplete when browsing codes in a dataset. * - * https://gitlab.com/simpatico.ai/cdr/-/issues/2452 - * wipmb TODO-LIST - * wipmb - docs - no partition support - * wipmb - link to docs - * wipmb what if the sp isn't of type token? do we check, or discard results without tokens? * */ package ca.uhn.fhir.jpa.search.autocomplete; diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestDataBuilderConfig.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestDataBuilderConfig.java new file mode 100644 index 00000000000..dce96d4106f --- /dev/null +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/config/TestDataBuilderConfig.java @@ -0,0 +1,23 @@ +package ca.uhn.fhir.jpa.config; + +import ca.uhn.fhir.context.FhirContext; +import ca.uhn.fhir.jpa.api.dao.DaoRegistry; +import ca.uhn.fhir.jpa.dao.DaoTestDataBuilder; +import ca.uhn.fhir.jpa.partition.SystemRequestDetails; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class TestDataBuilderConfig { + + @Autowired + FhirContext myFhirContext; + @Autowired + DaoRegistry myDaoRegistry; + + @Bean + DaoTestDataBuilder testDataBuilder() { + return new DaoTestDataBuilder(myFhirContext, myDaoRegistry, new SystemRequestDetails()); + } +} diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/RawElasticJsonBuilderTest.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/RawElasticJsonBuilderTest.java new file mode 100644 index 00000000000..18380e721a4 --- /dev/null +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/RawElasticJsonBuilderTest.java @@ -0,0 +1,47 @@ +package ca.uhn.fhir.jpa.search.autocomplete; + +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class RawElasticJsonBuilderTest { + + JsonObject myJson; + + @Test + public void matchAll() { + myJson = RawElasticJsonBuilder.makeMatchAllPredicate(); + + assertJson(""" + {"match_all": {}}"""); + } + + @Test + public void wildcard() { + myJson = RawElasticJsonBuilder.makeWildcardPredicate("a.field", "pattern_text"); + + assertJson(""" + { "wildcard": { + "a.field": { + "value": "pattern_text" + }}}"""); + } + + @Test + public void matchBoolPrefix() { + myJson = RawElasticJsonBuilder.makeMatchBoolPrefixPredicate("a.field", "query_text"); + + assertJson(""" + { "match_bool_prefix" : { + "a.field" : "query_text" + }}"""); + } + + private void assertJson(String expected) { + assertEquals(JsonParser.parseString(expected).getAsJsonObject(), myJson); + } + + +} diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteAggregationTest.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteAggregationTest.java index ac89fecf90f..2eb406000d6 100644 --- a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteAggregationTest.java +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteAggregationTest.java @@ -6,7 +6,6 @@ import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import java.util.List; -import java.util.Optional; import static com.jayway.jsonpath.matchers.JsonPathMatchers.isJson; import static com.jayway.jsonpath.matchers.JsonPathMatchers.withJsonPath; @@ -30,8 +29,10 @@ class TokenAutocompleteAggregationTest { myCode = "combo-code"; buildAggregation(); - assertThat("terms field is sp", myAggJson, isJson(withJsonPath("terms.field", equalTo("sp.combo-code.token.code-system")))); - assertThat("fetched piece is sp", myAggJson, isJson(withJsonPath("aggs.nestedTopNAgg.top_hits._source.includes[0]", equalTo("sp.combo-code")))); + + assertThat("nested clause includes sp", myAggJson, isJson(withJsonPath("nested.path", equalTo("nsp.combo-code")))); + assertThat("terms field is sp", myAggJson, isJson(withJsonPath("aggs.search.aggs.group_by_token.terms.field", equalTo("nsp.combo-code.token.code-system")))); + assertThat("fetched piece is sp", myAggJson, isJson(withJsonPath("aggs.search.aggs.group_by_token.aggs.top_tags_hits.top_hits._source.includes[0]", equalTo("nsp.combo-code")))); } @Test @@ -40,36 +41,103 @@ class TokenAutocompleteAggregationTest { myCount = 77; buildAggregation(); - assertThat("terms field is sp", myAggJson, isJson(withJsonPath("terms.size", equalTo(77)))); + + assertThat("count for top n", myAggJson, isJson(withJsonPath("aggs.search.aggs.group_by_token.terms.size", equalTo(77)))); } private void buildAggregation() { - myAggJson = new TokenAutocompleteAggregation(myCode, myCount).toJsonAggregation().toString(); + myAggJson = new TokenAutocompleteAggregation(myCode, myCount, null, null).toJsonAggregation().toString(); } } @Nested public class ResultExtraction { // Sample result from elastic for Observation.code - String resultJson = "" + - "{ \"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0," + - " \"buckets\": [" + - " { \"key\": \"http://loinc.org|88262-1\"," + - " \"doc_count\":3," + - " \"nestedTopNAgg\": " + - " { \"hits\":" + - " { \"total\":{\"value\":3,\"relation\":\"eq\"}, \"max_score\":1.0," + - " \"hits\":[" + - " { \"_index\":\"resourcetable-000001\",\"_type\":\"_doc\",\"_id\":\"13\",\"_score\":1.0," + - " \"_source\":{\"sp\":{\"code\":" + - " { \"string\":{\"exact\":\"Gram positive blood culture panel by Probe in Positive blood culture\",\"text\":\"Gram positive blood culture panel by Probe in Positive blood culture\",\"norm\":\"Gram positive blood culture panel by Probe in Positive blood culture\"}," + - " \"token\":{\"code\":\"88262-1\",\"system\":\"http://loinc.org\",\"code-system\":\"http://loinc.org|88262-1\"}}}}}]}}}," + - // a second result - "{\"key\":\"http://loinc.org|4544-3\",\"doc_count\":1,\"nestedTopNAgg\":{\"hits\":{\"total\":{\"value\":1,\"relation\":\"eq\"},\"max_score\":1.0,\"hits\":[{\"_index\":\"resourcetable-000001\",\"_type\":\"_doc\",\"_id\":\"12\",\"_score\":1.0,\"_source\":{\"sp\":{\"code\":{\"string\":{\"exact\":\"Hematocrit [Volume Fraction] of Blood by Automated count\",\"text\":\"Hematocrit [Volume Fraction] of Blood by Automated count\",\"norm\":\"Hematocrit [Volume Fraction] of Blood by Automated count\"},\"token\":{\"code\":\"4544-3\",\"system\":\"http://loinc.org\",\"code-system\":\"http://loinc.org|4544-3\"}}}}}]}}}," + - "{\"key\":\"http://loinc.org|4548-4\",\"doc_count\":1,\"nestedTopNAgg\":{\"hits\":{\"total\":{\"value\":1,\"relation\":\"eq\"},\"max_score\":1.0,\"hits\":[{\"_index\":\"resourcetable-000001\",\"_type\":\"_doc\",\"_id\":\"11\",\"_score\":1.0,\"_source\":{\"sp\":{\"code\":{\"string\":{\"exact\":\"Hemoglobin A1c/Hemoglobin.total in Blood\",\"text\":\"Hemoglobin A1c/Hemoglobin.total in Blood\",\"norm\":\"Hemoglobin A1c/Hemoglobin.total in Blood\"},\"token\":{\"code\":\"4548-4\",\"system\":\"http://loinc.org\",\"code-system\":\"http://loinc.org|4548-4\"}}}}}]}}}" + - "]}"; + String resultJson = """ + { + "doc_count": 22770, + "search": { + "doc_count": 4, + "group_by_token": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "http://loinc.org|59460-6", + "doc_count": 2, + "top_tags_hits": { + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 4.9845064e-05, + "hits": [ + { + "_index": "resourcetable-000001", + "_type": "_doc", + "_id": "1405280", + "_nested": { + "field": "nsp.code", + "offset": 0 + }, + "_score": 4.9845064e-05, + "_source": { + "string": { + "text": "Fall risk total [Morse Fall Scale]" + }, + "token": { + "code": "59460-6", + "system": "http://loinc.org", + "code-system": "http://loinc.org|59460-6" + } + } + } + ] + } + } + }, + { + "key": "http://loinc.org|59461-4", + "doc_count": 2, + "top_tags_hits": { + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 4.9845064e-05, + "hits": [ + { + "_index": "resourcetable-000001", + "_type": "_doc", + "_id": "1405281", + "_nested": { + "field": "nsp.code", + "offset": 0 + }, + "_score": 4.9845064e-05, + "_source": { + "string": { + "text": "Fall risk level [Morse Fall Scale]" + }, + "token": { + "code": "59461-4", + "system": "http://loinc.org", + "code-system": "http://loinc.org|59461-4" + } + } + } + ] + } + } + } + ] + } + } + }"""; JsonObject parsedResult = new Gson().fromJson(resultJson, JsonObject.class); - TokenAutocompleteAggregation myAutocompleteAggregation = new TokenAutocompleteAggregation("code", 22); + TokenAutocompleteAggregation myAutocompleteAggregation = new TokenAutocompleteAggregation("code", 22, null, null); @Test public void testResultExtraction() { @@ -77,69 +145,24 @@ class TokenAutocompleteAggregationTest { List hits = myAutocompleteAggregation.extractResults(parsedResult); assertThat(hits, is(not(empty()))); - assertThat(hits, (hasSize(3))); + assertThat(hits, (hasSize(2))); } @Test public void testBucketExtraction() { - JsonObject bucket = (JsonObject) parsedResult.getAsJsonArray("buckets").get(0); + JsonObject bucket = parsedResult + .getAsJsonObject("search") + .getAsJsonObject("group_by_token") + .getAsJsonArray("buckets") + .get(0) + .getAsJsonObject(); TokenAutocompleteHit entry = myAutocompleteAggregation.bucketToEntry(bucket); - assertThat(entry.mySystemCode, equalTo("http://loinc.org|88262-1")); - assertThat(entry.myDisplayText, equalTo("Gram positive blood culture panel by Probe in Positive blood culture")); + assertThat(entry.mySystemCode, equalTo("http://loinc.org|59460-6")); + assertThat(entry.myDisplayText, equalTo("Fall risk total [Morse Fall Scale]")); } - /** - * Until we move to nested, we may have multiple Coding in a code. This is broken. - */ - @Test - public void testMultiValuedBucketExtraction() { - JsonObject bucket = new Gson().fromJson("{" + - " \"key\": \"http://loinc.org|2708-6\"," + - " \"doc_count\": 14," + - " \"nestedTopNAgg\": {" + - " \"hits\": {" + - " \"total\": {" + - " \"value\": 14," + - " \"relation\": \"eq\"" + - " }," + - " \"max_score\": 1.0000025," + - " \"hits\": [" + - " {" + - " \"_index\": \"resourcetable-000001\"," + - " \"_type\": \"_doc\"," + - " \"_id\": \"1393284\"," + - " \"_score\": 1.0000025," + - " \"_source\": {" + - " \"sp\": {" + - " \"code\": {" + - " \"string\": {" + - " \"exact\": [" + - " \"Oxygen saturation in Arterial blood by Pulse oximetry\"," + - " \"Oxygen saturation in Arterial blood\"" + - " ]" + - " }," + - " \"token\": {" + - " \"code-system\": [" + - " \"http://loinc.org|2708-6\"," + - " \"http://loinc.org|59408-5\"" + - " ]" + - " }" + - " }" + - " }" + - " }" + - " }" + - " ]" + - " }" + - " }" + - "}", JsonObject.class); - - TokenAutocompleteHit entry = myAutocompleteAggregation.bucketToEntry(bucket); - assertThat(entry.mySystemCode, equalTo("http://loinc.org|2708-6")); - assertThat(entry.myDisplayText, equalTo("Oxygen saturation in Arterial blood by Pulse oximetry")); - - } } diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteElasticsearchIT.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteElasticsearchIT.java index 27149ead657..376c59be7b8 100644 --- a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteElasticsearchIT.java +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteElasticsearchIT.java @@ -6,6 +6,7 @@ import ca.uhn.fhir.jpa.api.dao.IFhirResourceDao; import ca.uhn.fhir.jpa.api.dao.IFhirSystemDao; import ca.uhn.fhir.jpa.api.svc.ISearchCoordinatorSvc; import ca.uhn.fhir.jpa.bulk.export.api.IBulkDataExportSvc; +import ca.uhn.fhir.jpa.config.TestDataBuilderConfig; import ca.uhn.fhir.jpa.config.TestHibernateSearchAddInConfig; import ca.uhn.fhir.jpa.config.TestR4Config; import ca.uhn.fhir.jpa.dao.BaseJpaTest; @@ -13,6 +14,7 @@ import ca.uhn.fhir.jpa.search.reindex.IResourceReindexingSvc; import ca.uhn.fhir.jpa.sp.ISearchParamPresenceSvc; import ca.uhn.fhir.rest.server.servlet.ServletRequestDetails; import ca.uhn.fhir.rest.server.util.ISearchParamRegistry; +import ca.uhn.fhir.test.utilities.ITestDataBuilder; import ca.uhn.fhir.test.utilities.docker.RequiresDocker; import org.hamcrest.Description; import org.hamcrest.Matcher; @@ -35,9 +37,9 @@ import javax.annotation.Nonnull; import javax.persistence.EntityManager; import java.util.List; import java.util.Objects; -import java.util.stream.Collectors; import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; @@ -46,8 +48,11 @@ import static org.hamcrest.Matchers.not; @ExtendWith(SpringExtension.class) @RequiresDocker -@ContextConfiguration(classes = {TestR4Config.class, TestHibernateSearchAddInConfig.Elasticsearch.class}) -public class TokenAutocompleteElasticsearchIT extends BaseJpaTest { +@ContextConfiguration(classes = { + TestR4Config.class, TestHibernateSearchAddInConfig.Elasticsearch.class, TestDataBuilderConfig.class +}) +public class TokenAutocompleteElasticsearchIT extends BaseJpaTest{ + public static final Coding erythrocyte_by_volume = new Coding("http://loinc.org", "789-8", "Erythrocytes [#/volume] in Blood by Automated count"); @Autowired protected PlatformTransactionManager myTxManager; protected ServletRequestDetails mySrd = new ServletRequestDetails(); @@ -72,6 +77,12 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest { IResourceReindexingSvc myResourceReindexingSvc; @Autowired IBulkDataExportSvc myBulkDataExportSvc; + @Autowired + ITestDataBuilder myDataBuilder; + + // a few different codes + static final Coding mean_blood_pressure = new Coding("http://loinc.org", "8478-0", "Mean blood pressure"); + static final Coding gram_positive_culture = new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture"); @BeforeEach public void beforePurgeDatabase() { @@ -88,14 +99,11 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest { protected PlatformTransactionManager getTxManager() { return myTxManager; } + @Test public void testAutocompleteByCodeDisplay() { - // a few different codes - Coding mean_blood_pressure = new Coding("http://loinc.org", "8478-0", "Mean blood pressure"); - Coding gram_positive_culture = new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture"); - - createObservationWithCode(new Coding("http://loinc.org", "789-8", "Erythrocytes [#/volume] in Blood by Automated count")); + createObservationWithCode(erythrocyte_by_volume); createObservationWithCode(mean_blood_pressure); createObservationWithCode(mean_blood_pressure); createObservationWithCode(new Coding("http://loinc.org", "788-0", "Erythrocyte distribution width [Ratio] by Automated count")); @@ -146,6 +154,29 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest { } + /** + * If an observation has multiple codes, make sure searching by text only matches the right code. + */ + @Test + public void testAutocompleteDistinguishesMultipleCodes() { + + createObservationWithCode(erythrocyte_by_volume,mean_blood_pressure,gram_positive_culture); + createObservationWithCode(gram_positive_culture); + createObservationWithCode(mean_blood_pressure); + + List codes = autocompleteSearch("Observation", "code", null, null); + assertThat("null finds all three codes", codes, hasSize(3)); + + codes = autocompleteSearch("Observation", "code", null, "789"); + assertThat("token prefix finds the matching code", codes, hasItem(matchingSystemAndCode(erythrocyte_by_volume))); + assertThat("token prefix finds only the matching code, not all codes on the resource", codes, contains(matchingSystemAndCode(erythrocyte_by_volume))); + + codes = autocompleteSearch("Observation", "code", "text", "erythrocyte"); + assertThat("text finds the matching code", codes, hasItem(matchingSystemAndCode(erythrocyte_by_volume))); + assertThat("text finds only the matching code, not all codes on the resource", codes, contains(matchingSystemAndCode(erythrocyte_by_volume))); + + } + List autocompleteSearch(String theResourceType, String theSPName, String theModifier, String theSearchText) { return new TransactionTemplate(myTxManager).execute(s -> { TokenAutocompleteSearch tokenAutocompleteSearch = new TokenAutocompleteSearch(myFhirCtx, Search.session(myEntityManager)); @@ -153,9 +184,11 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest { }); } - private IIdType createObservationWithCode(Coding c) { + private IIdType createObservationWithCode(Coding... theCodings) { Observation obs1 = new Observation(); - obs1.getCode().addCoding(c); + for (Coding coding : theCodings) { + obs1.getCode().addCoding(coding); + } return myObservationDao.create(obs1, mySrd).getId().toUnqualifiedVersionless(); } diff --git a/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/ExtendedLuceneIndexData.java b/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/ExtendedLuceneIndexData.java index 3c9245a9795..c05e6867170 100644 --- a/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/ExtendedLuceneIndexData.java +++ b/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/ExtendedLuceneIndexData.java @@ -21,16 +21,17 @@ package ca.uhn.fhir.jpa.model.search; */ import ca.uhn.fhir.context.FhirContext; -import ca.uhn.fhir.rest.param.TokenParam; +import ca.uhn.fhir.model.dstu2.composite.CodingDt; import com.google.common.collect.HashMultimap; import com.google.common.collect.SetMultimap; import org.hibernate.search.engine.backend.document.DocumentElement; +import org.hl7.fhir.instance.model.api.IBaseCoding; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.function.BiConsumer; - import java.util.Date; +import java.util.Objects; +import java.util.function.BiConsumer; /** * Collects our lucene extended indexing data. @@ -41,7 +42,7 @@ public class ExtendedLuceneIndexData { final FhirContext myFhirContext; final SetMultimap mySearchParamStrings = HashMultimap.create(); - final SetMultimap mySearchParamTokens = HashMultimap.create(); + final SetMultimap mySearchParamTokens = HashMultimap.create(); final SetMultimap mySearchParamLinks = HashMultimap.create(); final SetMultimap mySearchParamDates = HashMultimap.create(); @@ -73,8 +74,19 @@ public class ExtendedLuceneIndexData { mySearchParamStrings.put(theSpName, theText); } - public void addTokenIndexData(String theSpName, String theSystem, String theValue) { - mySearchParamTokens.put(theSpName, new TokenParam(theSystem, theValue)); + /** + * Add if not already present. + */ + public void addTokenIndexDataIfNotPresent(String theSpName, String theSystem, String theValue) { + boolean isPresent = mySearchParamTokens.get(theSpName).stream() + .anyMatch(c -> Objects.equals(c.getSystem(), theSystem) && Objects.equals(c.getCode(), theValue)); + if (!isPresent) { + addTokenIndexData(theSpName, new CodingDt(theSystem, theValue)); + } + } + + public void addTokenIndexData(String theSpName, IBaseCoding theNextValue) { + mySearchParamTokens.put(theSpName, theNextValue); } public void addResourceLinkIndexData(String theSpName, String theTargetResourceId) { @@ -84,4 +96,5 @@ public class ExtendedLuceneIndexData { public void addDateIndexData(String theSpName, Date theLowerBound, int theLowerBoundOrdinal, Date theUpperBound, int theUpperBoundOrdinal) { mySearchParamDates.put(theSpName, new DateSearchIndexData(theLowerBound, theLowerBoundOrdinal, theUpperBound, theUpperBoundOrdinal)); } + } diff --git a/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/HibernateSearchIndexWriter.java b/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/HibernateSearchIndexWriter.java index fa7165ec980..29f2c54ecd6 100644 --- a/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/HibernateSearchIndexWriter.java +++ b/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/HibernateSearchIndexWriter.java @@ -21,8 +21,9 @@ package ca.uhn.fhir.jpa.model.search; */ import ca.uhn.fhir.context.FhirContext; -import ca.uhn.fhir.rest.param.TokenParam; +import org.apache.commons.lang3.StringUtils; import org.hibernate.search.engine.backend.document.DocumentElement; +import org.hl7.fhir.instance.model.api.IBaseCoding; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,6 +32,8 @@ public class HibernateSearchIndexWriter { public static final String IDX_STRING_NORMALIZED = "norm"; public static final String IDX_STRING_EXACT = "exact"; public static final String IDX_STRING_TEXT = "text"; + public static final String NESTED_SEARCH_PARAM_ROOT = "nsp"; + public static final String SEARCH_PARAM_ROOT = "sp"; final HibernateSearchElementCache myNodeCache; final FhirContext myFhirContext; @@ -40,7 +43,7 @@ public class HibernateSearchIndexWriter { } public DocumentElement getSearchParamIndexNode(String theSearchParamName, String theIndexType) { - return myNodeCache.getObjectElement("sp", theSearchParamName, theIndexType); + return myNodeCache.getObjectElement(SEARCH_PARAM_ROOT, theSearchParamName, theIndexType); } @@ -57,14 +60,25 @@ public class HibernateSearchIndexWriter { ourLog.debug("Adding Search Param Text: {} -- {}", theSearchParam, theValue); } - public void writeTokenIndex(String theSearchParam, TokenParam theValue) { + public void writeTokenIndex(String theSearchParam, IBaseCoding theValue) { + DocumentElement nestedRoot = myNodeCache.getObjectElement(NESTED_SEARCH_PARAM_ROOT); + DocumentElement nestedSpNode = nestedRoot.addObject(theSearchParam); + DocumentElement nestedTokenNode = nestedSpNode.addObject("token"); + nestedTokenNode.addValue("code", theValue.getCode()); + nestedTokenNode.addValue("system", theValue.getSystem()); + nestedTokenNode.addValue("code-system", theValue.getSystem() + "|" + theValue.getCode()); + if (StringUtils.isNotEmpty(theValue.getDisplay())) { + DocumentElement nestedStringNode = nestedSpNode.addObject("string"); + nestedStringNode.addValue(IDX_STRING_TEXT, theValue.getDisplay()); + } + DocumentElement tokenIndexNode = getSearchParamIndexNode(theSearchParam, "token"); // TODO mb we can use a token_filter with pattern_capture to generate all three off a single value. Do this next, after merge. - tokenIndexNode.addValue("code", theValue.getValue()); + tokenIndexNode.addValue("code", theValue.getCode()); tokenIndexNode.addValue("system", theValue.getSystem()); - //This next one returns as system|value - tokenIndexNode.addValue("code-system", theValue.getValueAsQueryToken(myFhirContext)); + tokenIndexNode.addValue("code-system", theValue.getSystem() + "|" + theValue.getCode()); ourLog.debug("Adding Search Param Token: {} -- {}", theSearchParam, theValue); + // TODO mb should we write the strings here too? Or leave it to the old spidx indexing? } public void writeReferenceIndex(String theSearchParam, String theValue) { diff --git a/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/SearchParamTextPropertyBinder.java b/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/SearchParamTextPropertyBinder.java index fdf908291c7..91b0e9b1233 100644 --- a/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/SearchParamTextPropertyBinder.java +++ b/hapi-fhir-jpaserver-model/src/main/java/ca/uhn/fhir/jpa/model/search/SearchParamTextPropertyBinder.java @@ -38,7 +38,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.time.Instant; -import java.time.LocalDateTime; import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_EXACT; import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_NORMALIZED; @@ -108,17 +107,16 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr // The following section is a bit ugly. We need to enforce order and dependency or the object matches will be too big. { - IndexSchemaObjectField spfield = indexSchemaElement.objectField("sp", ObjectStructure.FLATTENED); + IndexSchemaObjectField spfield = indexSchemaElement.objectField(HibernateSearchIndexWriter.SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED); spfield.toReference(); + IndexSchemaObjectField nestedSpField = indexSchemaElement.objectField(HibernateSearchIndexWriter.NESTED_SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED); + nestedSpField.toReference(); // TODO MB: the lucene/elastic independent api is hurting a bit here. // For lucene, we need a separate field for each analyzer. So we'll add string (for :exact), and text (for :text). // They aren't marked stored, so there's no space cost beyond the index for each. // But for elastic, I'd rather have a single field defined, with multi-field sub-fields. The index cost is the same, // but elastic will actually store all fields in the source document. - // Something like this. But we'll need two index writers (lucene vs hibernate). -// ElasticsearchNativeIndexFieldTypeMappingStep nativeStep = indexFieldTypeFactory.extension(ElasticsearchExtension.get()).asNative(); -// nativeStep.mapping() // So triplicate the storage for now. :-( String stringPathGlob = "*.string"; @@ -127,6 +125,9 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr spfield.fieldTemplate("string-exact", exactAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_EXACT).multiValued(); spfield.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued(); + nestedSpField.objectFieldTemplate("nestedStringIndex", ObjectStructure.FLATTENED).matchingPathGlob(stringPathGlob); + nestedSpField.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued(); + // token // Ideally, we'd store a single code-system string and use a custom tokenizer to // generate "system|" "|code" and "system|code" tokens to support all three. @@ -139,6 +140,11 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr spfield.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued(); spfield.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued(); + nestedSpField.objectFieldTemplate("nestedTokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob); + nestedSpField.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued(); + nestedSpField.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued(); + nestedSpField.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued(); + // reference // reference @@ -154,6 +160,9 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr // last, since the globs are matched in declaration order, and * matches even nested nodes. spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*"); + + // we use nested search params for the autocomplete search. + nestedSpField.objectFieldTemplate("nestedSpObject", ObjectStructure.NESTED).matchingPathGlob("*").multiValued(); } } diff --git a/hapi-fhir-jpaserver-searchparam/src/main/java/ca/uhn/fhir/jpa/searchparam/extractor/BaseSearchParamExtractor.java b/hapi-fhir-jpaserver-searchparam/src/main/java/ca/uhn/fhir/jpa/searchparam/extractor/BaseSearchParamExtractor.java index 11ead69cac2..e55f240382a 100644 --- a/hapi-fhir-jpaserver-searchparam/src/main/java/ca/uhn/fhir/jpa/searchparam/extractor/BaseSearchParamExtractor.java +++ b/hapi-fhir-jpaserver-searchparam/src/main/java/ca/uhn/fhir/jpa/searchparam/extractor/BaseSearchParamExtractor.java @@ -730,7 +730,7 @@ public abstract class BaseSearchParamExtractor implements ISearchParamExtractor if ("Coding".equals(nextType)) { String system = extractValueAsString(myCodingSystemValueChild, theValue); String code = extractValueAsString(myCodingCodeValueChild, theValue); - return createTokenIndexIfNotBlank(theResourceType, theSearchParam, system, code); + return createTokenIndexIfNotBlank(theResourceType, system, code, theSearchParam.getName()); } else { return null; } @@ -1079,7 +1079,7 @@ public abstract class BaseSearchParamExtractor implements ISearchParamExtractor } private void createTokenIndexIfNotBlankAndAdd(String theResourceType, Set theParams, RuntimeSearchParam theSearchParam, String theSystem, String theValue) { - ResourceIndexedSearchParamToken nextEntity = createTokenIndexIfNotBlank(theResourceType, theSearchParam, theSystem, theValue); + ResourceIndexedSearchParamToken nextEntity = createTokenIndexIfNotBlank(theResourceType, theSystem, theValue, theSearchParam.getName()); if (nextEntity != null) { theParams.add(nextEntity); } @@ -1090,11 +1090,6 @@ public abstract class BaseSearchParamExtractor implements ISearchParamExtractor myPartitionSettings = thePartitionSettings; } - private ResourceIndexedSearchParamToken createTokenIndexIfNotBlank(String theResourceType, RuntimeSearchParam theSearchParam, String theSystem, String theValue) { - String searchParamName = theSearchParam.getName(); - return createTokenIndexIfNotBlank(theResourceType, theSystem, theValue, searchParamName); - } - private ResourceIndexedSearchParamToken createTokenIndexIfNotBlank(String theResourceType, String theSystem, String theValue, String searchParamName) { String system = theSystem; String value = theValue; diff --git a/hapi-fhir-test-utilities/src/main/java/ca/uhn/fhir/test/utilities/ITestDataBuilder.java b/hapi-fhir-test-utilities/src/main/java/ca/uhn/fhir/test/utilities/ITestDataBuilder.java index 59c797b669b..3b668f9abda 100644 --- a/hapi-fhir-test-utilities/src/main/java/ca/uhn/fhir/test/utilities/ITestDataBuilder.java +++ b/hapi-fhir-test-utilities/src/main/java/ca/uhn/fhir/test/utilities/ITestDataBuilder.java @@ -24,6 +24,7 @@ import ca.uhn.fhir.context.BaseRuntimeChildDefinition; import ca.uhn.fhir.context.BaseRuntimeElementCompositeDefinition; import ca.uhn.fhir.context.FhirContext; import ca.uhn.fhir.context.RuntimeResourceDefinition; +import ca.uhn.fhir.util.FhirTerser; import org.hl7.fhir.instance.model.api.IBase; import org.hl7.fhir.instance.model.api.IBaseReference; import org.hl7.fhir.instance.model.api.IBaseResource; @@ -185,13 +186,10 @@ public interface ITestDataBuilder { default Consumer withObservationCode(@Nullable String theSystem, @Nullable String theCode) { return t -> { - ICompositeType codeableConcept = (ICompositeType) getFhirContext().getElementDefinition("CodeableConcept").newInstance(); - IBase coding = getFhirContext().newTerser().addElement(codeableConcept, "coding"); - getFhirContext().newTerser().addElement(coding, "system", theSystem); - getFhirContext().newTerser().addElement(coding, "code", theCode); - - RuntimeResourceDefinition resourceDef = getFhirContext().getResourceDefinition(t.getClass()); - resourceDef.getChildByName("code").getMutator().addValue(t, codeableConcept); + FhirTerser terser = getFhirContext().newTerser(); + IBase coding = terser.addElement(t, "code.coding"); + terser.addElement(coding, "system", theSystem); + terser.addElement(coding, "code", theCode); }; } diff --git a/lgtm.yml b/lgtm.yml index f6c8f992193..2ea9f29a472 100644 --- a/lgtm.yml +++ b/lgtm.yml @@ -4,5 +4,5 @@ extraction: index: java_version: 17 maven: - version: 3.8.1 + version: 3.8.4