From ae33cf825b971588dbe0d7d39a3d34b416e162aa Mon Sep 17 00:00:00 2001 From: michaelabuckley Date: Wed, 16 Feb 2022 10:23:16 -0500 Subject: [PATCH] Handle token search by partial code, and various nulls (#3399) --- .../fhir/jpa/dao/FulltextSearchSvcImpl.java | 2 +- .../search/ExtendedLuceneClauseBuilder.java | 21 ++++-- .../autocomplete/TokenAutocompleteSearch.java | 72 +++++++++++-------- .../TokenAutocompleteElasticsearchIT.java | 41 ++++++++--- 4 files changed, 92 insertions(+), 44 deletions(-) diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java index 225ec1cbcd4..53e3eaafab8 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/FulltextSearchSvcImpl.java @@ -147,7 +147,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc { } if (isNotBlank(theResourceType)) { - b.must(f.match().field("myResourceType").matching(theResourceType)); + builder.addResourceTypeClause(theResourceType); } /* diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java index 564f548ac52..26bdf3b461a 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/dao/search/ExtendedLuceneClauseBuilder.java @@ -63,8 +63,8 @@ public class ExtendedLuceneClauseBuilder { private static final Logger ourLog = LoggerFactory.getLogger(ExtendedLuceneClauseBuilder.class); final FhirContext myFhirContext; - final SearchPredicateFactory myPredicateFactory; - final BooleanPredicateClausesStep myRootClause; + public final SearchPredicateFactory myPredicateFactory; + public final BooleanPredicateClausesStep myRootClause; final List ordinalSearchPrecisions = Arrays.asList(TemporalPrecisionEnum.YEAR, TemporalPrecisionEnum.MONTH, TemporalPrecisionEnum.DAY); @@ -74,6 +74,14 @@ public class ExtendedLuceneClauseBuilder { this.myPredicateFactory = myPredicateFactory; } + /** + * Restrict search to resources of a type + * @param theResourceType the type to match. e.g. "Observation" + */ + public void addResourceTypeClause(String theResourceType) { + myRootClause.must(myPredicateFactory.match().field("myResourceType").matching(theResourceType)); + } + @Nonnull private Set extractOrStringParams(List nextAnd) { Set terms = new HashSet<>(); @@ -133,7 +141,7 @@ public class ExtendedLuceneClauseBuilder { TokenParam token = (TokenParam) orTerm; if (StringUtils.isBlank(token.getSystem())) { // bare value - return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".code").matching(token.getValue()); + return myPredicateFactory.match().field(getTokenCodeFieldPath(theSearchParamName)).matching(token.getValue()); } else if (StringUtils.isBlank(token.getValue())) { // system without value return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".system").matching(token.getSystem()); @@ -145,7 +153,7 @@ public class ExtendedLuceneClauseBuilder { // MB I don't quite understand why FhirResourceDaoR4SearchNoFtTest.testSearchByIdParamWrongType() uses String but here we are StringParam string = (StringParam) orTerm; // treat a string as a code with no system (like _id) - return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".code").matching(string.getValue()); + return myPredicateFactory.match().field(getTokenCodeFieldPath(theSearchParamName)).matching(string.getValue()); } else { throw new IllegalArgumentException(Msg.code(1089) + "Unexpected param type for token search-param: " + orTerm.getClass().getName()); } @@ -157,6 +165,11 @@ public class ExtendedLuceneClauseBuilder { } + @Nonnull + public static String getTokenCodeFieldPath(String theSearchParamName) { + return "sp." + theSearchParamName + ".token" + ".code"; + } + @Nonnull public static String getTokenSystemCodeFieldPath(@Nonnull String theSearchParamName) { return "sp." + theSearchParamName + ".token" + ".code-system"; diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteSearch.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteSearch.java index 325fc5edd6b..d7a19dd8358 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteSearch.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteSearch.java @@ -31,6 +31,8 @@ import org.apache.commons.lang3.StringUtils; import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension; import org.hibernate.search.engine.search.aggregation.AggregationKey; import org.hibernate.search.engine.search.aggregation.SearchAggregation; +import org.hibernate.search.engine.search.predicate.dsl.BooleanPredicateClausesStep; +import org.hibernate.search.engine.search.predicate.dsl.SearchPredicateFactory; import org.hibernate.search.engine.search.query.SearchResult; import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep; import org.hibernate.search.mapper.orm.search.loading.dsl.SearchLoadingOptionsStep; @@ -70,40 +72,15 @@ class TokenAutocompleteSearch { @Nonnull public List search(String theResourceType, String theSPName, String theSearchText, String theSearchModifier, int theCount) { - TokenAutocompleteAggregation tokenAutocompleteAggregation = new TokenAutocompleteAggregation(theSPName, theCount); + ourLog.trace("search: {}?{}:{}={}", theResourceType,theSPName, theSearchModifier, theSearchText); - if (theSearchText.equals(StringUtils.stripEnd(theSearchText,null))) { - // no trailing whitespace. Add a wildcard to act like match_bool_prefix - // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-bool-prefix-query.html - theSearchText = theSearchText + "*"; - } - String queryText = theSearchText; + TokenAutocompleteAggregation tokenAutocompleteAggregation = new TokenAutocompleteAggregation(theSPName, theCount); // compose the query json SearchQueryOptionsStep query = mySession.search(ResourceTable.class) - .where( - f -> f.bool(b -> { - ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f); - - if (isNotBlank(theResourceType)) { - b.must(f.match().field("myResourceType").matching(theResourceType)); - } - - switch(theSearchModifier) { - case "text": - StringParam stringParam = new StringParam(queryText); - List> andOrTerms = Collections.singletonList(Collections.singletonList(stringParam)); - clauseBuilder.addStringTextSearch(theSPName, andOrTerms); - break; - case "": - default: - throw new IllegalArgumentException(Msg.code(2034) + "Autocomplete only accepts text search for now."); - - } - - - })) - .aggregation(AGGREGATION_KEY, buildESAggregation(tokenAutocompleteAggregation)); + .where(f -> f.bool(b -> + buildQueryPredicate(b, f, theResourceType, theSPName, theSearchModifier, theSearchText))) + .aggregation(AGGREGATION_KEY, buildAggregation(tokenAutocompleteAggregation)); // run the query, but with 0 results. We only care about the aggregations. SearchResult result = query.fetch(0); @@ -115,10 +92,43 @@ class TokenAutocompleteSearch { return aggEntries; } + void buildQueryPredicate(BooleanPredicateClausesStep b, SearchPredicateFactory f, String theResourceType, String theSPName, String theSearchModifier, String theSearchText) { + ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f); + + if (isNotBlank(theResourceType)) { + clauseBuilder.addResourceTypeClause(theResourceType); + } + + String queryText = StringUtils.defaultString(theSearchText, ""); + if (StringUtils.isNotEmpty(queryText)) { + switch (StringUtils.defaultString(theSearchModifier)) { + case "text": + // Add a wildcard to act like match_bool_prefix + // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-bool-prefix-query.html + queryText = queryText + "*"; + StringParam stringParam = new StringParam(queryText); + List> andOrTerms = Collections.singletonList(Collections.singletonList(stringParam)); + clauseBuilder.addStringTextSearch(theSPName, andOrTerms); + break; + case "": + b.must( + // use wildcard to allow matching prefix of keyword indexed field. + f.wildcard() + .field(ExtendedLuceneClauseBuilder.getTokenCodeFieldPath(theSPName)) + .matching(queryText + "*") + .toPredicate()); + break; + default: + throw new IllegalArgumentException(Msg.code(2034) + "Autocomplete only accepts text search for now."); + } + + } + } + /** * Hibernate-search doesn't support nested aggregations, so we use an extension to build what we need from raw JSON. */ - SearchAggregation buildESAggregation(TokenAutocompleteAggregation tokenAutocompleteAggregation) { + SearchAggregation buildAggregation(TokenAutocompleteAggregation tokenAutocompleteAggregation) { JsonObject jsonAggregation = tokenAutocompleteAggregation.toJsonAggregation(); SearchAggregation aggregation = mySession diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteElasticsearchIT.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteElasticsearchIT.java index 14984cd1bef..04d1f037e6e 100644 --- a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteElasticsearchIT.java +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/search/autocomplete/TokenAutocompleteElasticsearchIT.java @@ -35,11 +35,14 @@ import javax.annotation.Nonnull; import javax.persistence.EntityManager; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; @ExtendWith(SpringExtension.class) @RequiresDocker @@ -90,9 +93,11 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest { // a few different codes Coding mean_blood_pressure = new Coding("http://loinc.org", "8478-0", "Mean blood pressure"); + Coding gram_positive_culture = new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture"); createObservationWithCode(new Coding("http://loinc.org", "789-8", "Erythrocytes [#/volume] in Blood by Automated count")); createObservationWithCode(mean_blood_pressure); + createObservationWithCode(mean_blood_pressure); createObservationWithCode(new Coding("http://loinc.org", "788-0", "Erythrocyte distribution width [Ratio] by Automated count")); createObservationWithCode(new Coding("http://loinc.org", "787-2", "MCV [Entitic volume] by Automated count")); createObservationWithCode(new Coding("http://loinc.org", "786-4", "MCHC [Mass/volume] by Automated count")); @@ -106,25 +111,45 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest { createObservationWithCode(new Coding("http://loinc.org", "4544-3", "Hematocrit [Volume Fraction] of Blood by Automated count")); // some repeats to make sure we only return singles - createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture")); - createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture")); - createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture")); + createObservationWithCode(gram_positive_culture); + createObservationWithCode(gram_positive_culture); + createObservationWithCode(gram_positive_culture); List codes; - codes = autocompleteSearch("Observation", "code", "blo"); + codes = autocompleteSearch("Observation", "code", "text", "blo"); assertThat("finds blood pressure", codes, hasItem(matchingSystemAndCode(mean_blood_pressure))); - codes = autocompleteSearch("Observation", "code", "pressure"); + codes = autocompleteSearch("Observation", "code", "text", "pressure"); assertThat("finds blood pressure", codes, hasItem(matchingSystemAndCode(mean_blood_pressure))); - codes = autocompleteSearch("Observation", "code", "nuclear"); + long hits = codes.stream() + .filter(c -> matchingSystemAndCode(mean_blood_pressure).matches(c)) + .count(); + assertThat("multiple matches returns single hit", hits, is(1L)); + + codes = autocompleteSearch("Observation", "code", "text", "nuclear"); assertThat("doesn't find nuclear", codes, is(empty())); + + codes = autocompleteSearch("Observation", "code", "text", null); + assertThat("empty filter finds some", codes, is(not(empty()))); + assertThat("empty finds most common first", codes.get(0), matchingSystemAndCode(gram_positive_culture)); + assertThat("empty finds most common first", codes.get(1), matchingSystemAndCode(mean_blood_pressure)); + + codes = autocompleteSearch("Observation", "code", null, "88262-1"); + assertThat("matches by code value", codes, hasItem(matchingSystemAndCode(gram_positive_culture))); + + codes = autocompleteSearch("Observation", "code", null, "8826"); + assertThat("matches by code prefix", codes, hasItem(matchingSystemAndCode(gram_positive_culture))); + + codes = autocompleteSearch("Observation", "code", null, null); + assertThat("null finds everything", codes, hasSize(13)); + } - List autocompleteSearch(String theResourceType, String theSPName, String theSearchText) { + List autocompleteSearch(String theResourceType, String theSPName, String theModifier, String theSearchText) { return new TransactionTemplate(myTxManager).execute(s -> { TokenAutocompleteSearch tokenAutocompleteSearch = new TokenAutocompleteSearch(myFhirCtx, Search.session(myEntityManager)); - return tokenAutocompleteSearch.search(theResourceType, theSPName, theSearchText, "text",30); + return tokenAutocompleteSearch.search(theResourceType, theSPName, theSearchText, theModifier,30); }); }