Handle token search by partial code, and various nulls (#3399)

This commit is contained in:
michaelabuckley 2022-02-16 10:23:16 -05:00 committed by GitHub
parent fb342cc2a6
commit ae33cf825b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 92 additions and 44 deletions

View File

@ -147,7 +147,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
} }
if (isNotBlank(theResourceType)) { if (isNotBlank(theResourceType)) {
b.must(f.match().field("myResourceType").matching(theResourceType)); builder.addResourceTypeClause(theResourceType);
} }
/* /*

View File

@ -63,8 +63,8 @@ public class ExtendedLuceneClauseBuilder {
private static final Logger ourLog = LoggerFactory.getLogger(ExtendedLuceneClauseBuilder.class); private static final Logger ourLog = LoggerFactory.getLogger(ExtendedLuceneClauseBuilder.class);
final FhirContext myFhirContext; final FhirContext myFhirContext;
final SearchPredicateFactory myPredicateFactory; public final SearchPredicateFactory myPredicateFactory;
final BooleanPredicateClausesStep<?> myRootClause; public final BooleanPredicateClausesStep<?> myRootClause;
final List<TemporalPrecisionEnum> ordinalSearchPrecisions = Arrays.asList(TemporalPrecisionEnum.YEAR, TemporalPrecisionEnum.MONTH, TemporalPrecisionEnum.DAY); final List<TemporalPrecisionEnum> ordinalSearchPrecisions = Arrays.asList(TemporalPrecisionEnum.YEAR, TemporalPrecisionEnum.MONTH, TemporalPrecisionEnum.DAY);
@ -74,6 +74,14 @@ public class ExtendedLuceneClauseBuilder {
this.myPredicateFactory = myPredicateFactory; this.myPredicateFactory = myPredicateFactory;
} }
/**
* Restrict search to resources of a type
* @param theResourceType the type to match. e.g. "Observation"
*/
public void addResourceTypeClause(String theResourceType) {
myRootClause.must(myPredicateFactory.match().field("myResourceType").matching(theResourceType));
}
@Nonnull @Nonnull
private Set<String> extractOrStringParams(List<? extends IQueryParameterType> nextAnd) { private Set<String> extractOrStringParams(List<? extends IQueryParameterType> nextAnd) {
Set<String> terms = new HashSet<>(); Set<String> terms = new HashSet<>();
@ -133,7 +141,7 @@ public class ExtendedLuceneClauseBuilder {
TokenParam token = (TokenParam) orTerm; TokenParam token = (TokenParam) orTerm;
if (StringUtils.isBlank(token.getSystem())) { if (StringUtils.isBlank(token.getSystem())) {
// bare value // bare value
return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".code").matching(token.getValue()); return myPredicateFactory.match().field(getTokenCodeFieldPath(theSearchParamName)).matching(token.getValue());
} else if (StringUtils.isBlank(token.getValue())) { } else if (StringUtils.isBlank(token.getValue())) {
// system without value // system without value
return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".system").matching(token.getSystem()); return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".system").matching(token.getSystem());
@ -145,7 +153,7 @@ public class ExtendedLuceneClauseBuilder {
// MB I don't quite understand why FhirResourceDaoR4SearchNoFtTest.testSearchByIdParamWrongType() uses String but here we are // MB I don't quite understand why FhirResourceDaoR4SearchNoFtTest.testSearchByIdParamWrongType() uses String but here we are
StringParam string = (StringParam) orTerm; StringParam string = (StringParam) orTerm;
// treat a string as a code with no system (like _id) // treat a string as a code with no system (like _id)
return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".code").matching(string.getValue()); return myPredicateFactory.match().field(getTokenCodeFieldPath(theSearchParamName)).matching(string.getValue());
} else { } else {
throw new IllegalArgumentException(Msg.code(1089) + "Unexpected param type for token search-param: " + orTerm.getClass().getName()); throw new IllegalArgumentException(Msg.code(1089) + "Unexpected param type for token search-param: " + orTerm.getClass().getName());
} }
@ -157,6 +165,11 @@ public class ExtendedLuceneClauseBuilder {
} }
@Nonnull
public static String getTokenCodeFieldPath(String theSearchParamName) {
return "sp." + theSearchParamName + ".token" + ".code";
}
@Nonnull @Nonnull
public static String getTokenSystemCodeFieldPath(@Nonnull String theSearchParamName) { public static String getTokenSystemCodeFieldPath(@Nonnull String theSearchParamName) {
return "sp." + theSearchParamName + ".token" + ".code-system"; return "sp." + theSearchParamName + ".token" + ".code-system";

View File

@ -31,6 +31,8 @@ import org.apache.commons.lang3.StringUtils;
import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension; import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension;
import org.hibernate.search.engine.search.aggregation.AggregationKey; import org.hibernate.search.engine.search.aggregation.AggregationKey;
import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.SearchAggregation;
import org.hibernate.search.engine.search.predicate.dsl.BooleanPredicateClausesStep;
import org.hibernate.search.engine.search.predicate.dsl.SearchPredicateFactory;
import org.hibernate.search.engine.search.query.SearchResult; import org.hibernate.search.engine.search.query.SearchResult;
import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep; import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep;
import org.hibernate.search.mapper.orm.search.loading.dsl.SearchLoadingOptionsStep; import org.hibernate.search.mapper.orm.search.loading.dsl.SearchLoadingOptionsStep;
@ -70,40 +72,15 @@ class TokenAutocompleteSearch {
@Nonnull @Nonnull
public List<TokenAutocompleteHit> search(String theResourceType, String theSPName, String theSearchText, String theSearchModifier, int theCount) { public List<TokenAutocompleteHit> search(String theResourceType, String theSPName, String theSearchText, String theSearchModifier, int theCount) {
TokenAutocompleteAggregation tokenAutocompleteAggregation = new TokenAutocompleteAggregation(theSPName, theCount); ourLog.trace("search: {}?{}:{}={}", theResourceType,theSPName, theSearchModifier, theSearchText);
if (theSearchText.equals(StringUtils.stripEnd(theSearchText,null))) { TokenAutocompleteAggregation tokenAutocompleteAggregation = new TokenAutocompleteAggregation(theSPName, theCount);
// no trailing whitespace. Add a wildcard to act like match_bool_prefix
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-bool-prefix-query.html
theSearchText = theSearchText + "*";
}
String queryText = theSearchText;
// compose the query json // compose the query json
SearchQueryOptionsStep<?, ?, SearchLoadingOptionsStep, ?, ?> query = mySession.search(ResourceTable.class) SearchQueryOptionsStep<?, ?, SearchLoadingOptionsStep, ?, ?> query = mySession.search(ResourceTable.class)
.where( .where(f -> f.bool(b ->
f -> f.bool(b -> { buildQueryPredicate(b, f, theResourceType, theSPName, theSearchModifier, theSearchText)))
ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f); .aggregation(AGGREGATION_KEY, buildAggregation(tokenAutocompleteAggregation));
if (isNotBlank(theResourceType)) {
b.must(f.match().field("myResourceType").matching(theResourceType));
}
switch(theSearchModifier) {
case "text":
StringParam stringParam = new StringParam(queryText);
List<List<IQueryParameterType>> andOrTerms = Collections.singletonList(Collections.singletonList(stringParam));
clauseBuilder.addStringTextSearch(theSPName, andOrTerms);
break;
case "":
default:
throw new IllegalArgumentException(Msg.code(2034) + "Autocomplete only accepts text search for now.");
}
}))
.aggregation(AGGREGATION_KEY, buildESAggregation(tokenAutocompleteAggregation));
// run the query, but with 0 results. We only care about the aggregations. // run the query, but with 0 results. We only care about the aggregations.
SearchResult<?> result = query.fetch(0); SearchResult<?> result = query.fetch(0);
@ -115,10 +92,43 @@ class TokenAutocompleteSearch {
return aggEntries; return aggEntries;
} }
void buildQueryPredicate(BooleanPredicateClausesStep<?> b, SearchPredicateFactory f, String theResourceType, String theSPName, String theSearchModifier, String theSearchText) {
ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f);
if (isNotBlank(theResourceType)) {
clauseBuilder.addResourceTypeClause(theResourceType);
}
String queryText = StringUtils.defaultString(theSearchText, "");
if (StringUtils.isNotEmpty(queryText)) {
switch (StringUtils.defaultString(theSearchModifier)) {
case "text":
// Add a wildcard to act like match_bool_prefix
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-bool-prefix-query.html
queryText = queryText + "*";
StringParam stringParam = new StringParam(queryText);
List<List<IQueryParameterType>> andOrTerms = Collections.singletonList(Collections.singletonList(stringParam));
clauseBuilder.addStringTextSearch(theSPName, andOrTerms);
break;
case "":
b.must(
// use wildcard to allow matching prefix of keyword indexed field.
f.wildcard()
.field(ExtendedLuceneClauseBuilder.getTokenCodeFieldPath(theSPName))
.matching(queryText + "*")
.toPredicate());
break;
default:
throw new IllegalArgumentException(Msg.code(2034) + "Autocomplete only accepts text search for now.");
}
}
}
/** /**
* Hibernate-search doesn't support nested aggregations, so we use an extension to build what we need from raw JSON. * Hibernate-search doesn't support nested aggregations, so we use an extension to build what we need from raw JSON.
*/ */
SearchAggregation<JsonObject> buildESAggregation(TokenAutocompleteAggregation tokenAutocompleteAggregation) { SearchAggregation<JsonObject> buildAggregation(TokenAutocompleteAggregation tokenAutocompleteAggregation) {
JsonObject jsonAggregation = tokenAutocompleteAggregation.toJsonAggregation(); JsonObject jsonAggregation = tokenAutocompleteAggregation.toJsonAggregation();
SearchAggregation<JsonObject> aggregation = mySession SearchAggregation<JsonObject> aggregation = mySession

View File

@ -35,11 +35,14 @@ import javax.annotation.Nonnull;
import javax.persistence.EntityManager; import javax.persistence.EntityManager;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.stream.Collectors;
import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasItem;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
@ExtendWith(SpringExtension.class) @ExtendWith(SpringExtension.class)
@RequiresDocker @RequiresDocker
@ -90,9 +93,11 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest {
// a few different codes // a few different codes
Coding mean_blood_pressure = new Coding("http://loinc.org", "8478-0", "Mean blood pressure"); Coding mean_blood_pressure = new Coding("http://loinc.org", "8478-0", "Mean blood pressure");
Coding gram_positive_culture = new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture");
createObservationWithCode(new Coding("http://loinc.org", "789-8", "Erythrocytes [#/volume] in Blood by Automated count")); createObservationWithCode(new Coding("http://loinc.org", "789-8", "Erythrocytes [#/volume] in Blood by Automated count"));
createObservationWithCode(mean_blood_pressure); createObservationWithCode(mean_blood_pressure);
createObservationWithCode(mean_blood_pressure);
createObservationWithCode(new Coding("http://loinc.org", "788-0", "Erythrocyte distribution width [Ratio] by Automated count")); createObservationWithCode(new Coding("http://loinc.org", "788-0", "Erythrocyte distribution width [Ratio] by Automated count"));
createObservationWithCode(new Coding("http://loinc.org", "787-2", "MCV [Entitic volume] by Automated count")); createObservationWithCode(new Coding("http://loinc.org", "787-2", "MCV [Entitic volume] by Automated count"));
createObservationWithCode(new Coding("http://loinc.org", "786-4", "MCHC [Mass/volume] by Automated count")); createObservationWithCode(new Coding("http://loinc.org", "786-4", "MCHC [Mass/volume] by Automated count"));
@ -106,25 +111,45 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest {
createObservationWithCode(new Coding("http://loinc.org", "4544-3", "Hematocrit [Volume Fraction] of Blood by Automated count")); createObservationWithCode(new Coding("http://loinc.org", "4544-3", "Hematocrit [Volume Fraction] of Blood by Automated count"));
// some repeats to make sure we only return singles // some repeats to make sure we only return singles
createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture")); createObservationWithCode(gram_positive_culture);
createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture")); createObservationWithCode(gram_positive_culture);
createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture")); createObservationWithCode(gram_positive_culture);
List<TokenAutocompleteHit> codes; List<TokenAutocompleteHit> codes;
codes = autocompleteSearch("Observation", "code", "blo"); codes = autocompleteSearch("Observation", "code", "text", "blo");
assertThat("finds blood pressure", codes, hasItem(matchingSystemAndCode(mean_blood_pressure))); assertThat("finds blood pressure", codes, hasItem(matchingSystemAndCode(mean_blood_pressure)));
codes = autocompleteSearch("Observation", "code", "pressure"); codes = autocompleteSearch("Observation", "code", "text", "pressure");
assertThat("finds blood pressure", codes, hasItem(matchingSystemAndCode(mean_blood_pressure))); assertThat("finds blood pressure", codes, hasItem(matchingSystemAndCode(mean_blood_pressure)));
codes = autocompleteSearch("Observation", "code", "nuclear"); long hits = codes.stream()
.filter(c -> matchingSystemAndCode(mean_blood_pressure).matches(c))
.count();
assertThat("multiple matches returns single hit", hits, is(1L));
codes = autocompleteSearch("Observation", "code", "text", "nuclear");
assertThat("doesn't find nuclear", codes, is(empty())); assertThat("doesn't find nuclear", codes, is(empty()));
codes = autocompleteSearch("Observation", "code", "text", null);
assertThat("empty filter finds some", codes, is(not(empty())));
assertThat("empty finds most common first", codes.get(0), matchingSystemAndCode(gram_positive_culture));
assertThat("empty finds most common first", codes.get(1), matchingSystemAndCode(mean_blood_pressure));
codes = autocompleteSearch("Observation", "code", null, "88262-1");
assertThat("matches by code value", codes, hasItem(matchingSystemAndCode(gram_positive_culture)));
codes = autocompleteSearch("Observation", "code", null, "8826");
assertThat("matches by code prefix", codes, hasItem(matchingSystemAndCode(gram_positive_culture)));
codes = autocompleteSearch("Observation", "code", null, null);
assertThat("null finds everything", codes, hasSize(13));
} }
List<TokenAutocompleteHit> autocompleteSearch(String theResourceType, String theSPName, String theSearchText) { List<TokenAutocompleteHit> autocompleteSearch(String theResourceType, String theSPName, String theModifier, String theSearchText) {
return new TransactionTemplate(myTxManager).execute(s -> { return new TransactionTemplate(myTxManager).execute(s -> {
TokenAutocompleteSearch tokenAutocompleteSearch = new TokenAutocompleteSearch(myFhirCtx, Search.session(myEntityManager)); TokenAutocompleteSearch tokenAutocompleteSearch = new TokenAutocompleteSearch(myFhirCtx, Search.session(myEntityManager));
return tokenAutocompleteSearch.search(theResourceType, theSPName, theSearchText, "text",30); return tokenAutocompleteSearch.search(theResourceType, theSPName, theSearchText, theModifier,30);
}); });
} }