Handle token search by partial code, and various nulls (#3399)

This commit is contained in:
michaelabuckley 2022-02-16 10:23:16 -05:00 committed by GitHub
parent fb342cc2a6
commit ae33cf825b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 92 additions and 44 deletions

View File

@ -147,7 +147,7 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
}
if (isNotBlank(theResourceType)) {
b.must(f.match().field("myResourceType").matching(theResourceType));
builder.addResourceTypeClause(theResourceType);
}
/*

View File

@ -63,8 +63,8 @@ public class ExtendedLuceneClauseBuilder {
private static final Logger ourLog = LoggerFactory.getLogger(ExtendedLuceneClauseBuilder.class);
final FhirContext myFhirContext;
final SearchPredicateFactory myPredicateFactory;
final BooleanPredicateClausesStep<?> myRootClause;
public final SearchPredicateFactory myPredicateFactory;
public final BooleanPredicateClausesStep<?> myRootClause;
final List<TemporalPrecisionEnum> ordinalSearchPrecisions = Arrays.asList(TemporalPrecisionEnum.YEAR, TemporalPrecisionEnum.MONTH, TemporalPrecisionEnum.DAY);
@ -74,6 +74,14 @@ public class ExtendedLuceneClauseBuilder {
this.myPredicateFactory = myPredicateFactory;
}
/**
* Restrict search to resources of a type
* @param theResourceType the type to match. e.g. "Observation"
*/
public void addResourceTypeClause(String theResourceType) {
myRootClause.must(myPredicateFactory.match().field("myResourceType").matching(theResourceType));
}
@Nonnull
private Set<String> extractOrStringParams(List<? extends IQueryParameterType> nextAnd) {
Set<String> terms = new HashSet<>();
@ -133,7 +141,7 @@ public class ExtendedLuceneClauseBuilder {
TokenParam token = (TokenParam) orTerm;
if (StringUtils.isBlank(token.getSystem())) {
// bare value
return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".code").matching(token.getValue());
return myPredicateFactory.match().field(getTokenCodeFieldPath(theSearchParamName)).matching(token.getValue());
} else if (StringUtils.isBlank(token.getValue())) {
// system without value
return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".system").matching(token.getSystem());
@ -145,7 +153,7 @@ public class ExtendedLuceneClauseBuilder {
// MB I don't quite understand why FhirResourceDaoR4SearchNoFtTest.testSearchByIdParamWrongType() uses String but here we are
StringParam string = (StringParam) orTerm;
// treat a string as a code with no system (like _id)
return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".code").matching(string.getValue());
return myPredicateFactory.match().field(getTokenCodeFieldPath(theSearchParamName)).matching(string.getValue());
} else {
throw new IllegalArgumentException(Msg.code(1089) + "Unexpected param type for token search-param: " + orTerm.getClass().getName());
}
@ -157,6 +165,11 @@ public class ExtendedLuceneClauseBuilder {
}
@Nonnull
public static String getTokenCodeFieldPath(String theSearchParamName) {
return "sp." + theSearchParamName + ".token" + ".code";
}
@Nonnull
public static String getTokenSystemCodeFieldPath(@Nonnull String theSearchParamName) {
return "sp." + theSearchParamName + ".token" + ".code-system";

View File

@ -31,6 +31,8 @@ import org.apache.commons.lang3.StringUtils;
import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension;
import org.hibernate.search.engine.search.aggregation.AggregationKey;
import org.hibernate.search.engine.search.aggregation.SearchAggregation;
import org.hibernate.search.engine.search.predicate.dsl.BooleanPredicateClausesStep;
import org.hibernate.search.engine.search.predicate.dsl.SearchPredicateFactory;
import org.hibernate.search.engine.search.query.SearchResult;
import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep;
import org.hibernate.search.mapper.orm.search.loading.dsl.SearchLoadingOptionsStep;
@ -70,40 +72,15 @@ class TokenAutocompleteSearch {
@Nonnull
public List<TokenAutocompleteHit> search(String theResourceType, String theSPName, String theSearchText, String theSearchModifier, int theCount) {
TokenAutocompleteAggregation tokenAutocompleteAggregation = new TokenAutocompleteAggregation(theSPName, theCount);
ourLog.trace("search: {}?{}:{}={}", theResourceType,theSPName, theSearchModifier, theSearchText);
if (theSearchText.equals(StringUtils.stripEnd(theSearchText,null))) {
// no trailing whitespace. Add a wildcard to act like match_bool_prefix
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-bool-prefix-query.html
theSearchText = theSearchText + "*";
}
String queryText = theSearchText;
TokenAutocompleteAggregation tokenAutocompleteAggregation = new TokenAutocompleteAggregation(theSPName, theCount);
// compose the query json
SearchQueryOptionsStep<?, ?, SearchLoadingOptionsStep, ?, ?> query = mySession.search(ResourceTable.class)
.where(
f -> f.bool(b -> {
ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f);
if (isNotBlank(theResourceType)) {
b.must(f.match().field("myResourceType").matching(theResourceType));
}
switch(theSearchModifier) {
case "text":
StringParam stringParam = new StringParam(queryText);
List<List<IQueryParameterType>> andOrTerms = Collections.singletonList(Collections.singletonList(stringParam));
clauseBuilder.addStringTextSearch(theSPName, andOrTerms);
break;
case "":
default:
throw new IllegalArgumentException(Msg.code(2034) + "Autocomplete only accepts text search for now.");
}
}))
.aggregation(AGGREGATION_KEY, buildESAggregation(tokenAutocompleteAggregation));
.where(f -> f.bool(b ->
buildQueryPredicate(b, f, theResourceType, theSPName, theSearchModifier, theSearchText)))
.aggregation(AGGREGATION_KEY, buildAggregation(tokenAutocompleteAggregation));
// run the query, but with 0 results. We only care about the aggregations.
SearchResult<?> result = query.fetch(0);
@ -115,10 +92,43 @@ class TokenAutocompleteSearch {
return aggEntries;
}
void buildQueryPredicate(BooleanPredicateClausesStep<?> b, SearchPredicateFactory f, String theResourceType, String theSPName, String theSearchModifier, String theSearchText) {
ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f);
if (isNotBlank(theResourceType)) {
clauseBuilder.addResourceTypeClause(theResourceType);
}
String queryText = StringUtils.defaultString(theSearchText, "");
if (StringUtils.isNotEmpty(queryText)) {
switch (StringUtils.defaultString(theSearchModifier)) {
case "text":
// Add a wildcard to act like match_bool_prefix
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-bool-prefix-query.html
queryText = queryText + "*";
StringParam stringParam = new StringParam(queryText);
List<List<IQueryParameterType>> andOrTerms = Collections.singletonList(Collections.singletonList(stringParam));
clauseBuilder.addStringTextSearch(theSPName, andOrTerms);
break;
case "":
b.must(
// use wildcard to allow matching prefix of keyword indexed field.
f.wildcard()
.field(ExtendedLuceneClauseBuilder.getTokenCodeFieldPath(theSPName))
.matching(queryText + "*")
.toPredicate());
break;
default:
throw new IllegalArgumentException(Msg.code(2034) + "Autocomplete only accepts text search for now.");
}
}
}
/**
* Hibernate-search doesn't support nested aggregations, so we use an extension to build what we need from raw JSON.
*/
SearchAggregation<JsonObject> buildESAggregation(TokenAutocompleteAggregation tokenAutocompleteAggregation) {
SearchAggregation<JsonObject> buildAggregation(TokenAutocompleteAggregation tokenAutocompleteAggregation) {
JsonObject jsonAggregation = tokenAutocompleteAggregation.toJsonAggregation();
SearchAggregation<JsonObject> aggregation = mySession

View File

@ -35,11 +35,14 @@ import javax.annotation.Nonnull;
import javax.persistence.EntityManager;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.hasItem;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
@ExtendWith(SpringExtension.class)
@RequiresDocker
@ -90,9 +93,11 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest {
// a few different codes
Coding mean_blood_pressure = new Coding("http://loinc.org", "8478-0", "Mean blood pressure");
Coding gram_positive_culture = new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture");
createObservationWithCode(new Coding("http://loinc.org", "789-8", "Erythrocytes [#/volume] in Blood by Automated count"));
createObservationWithCode(mean_blood_pressure);
createObservationWithCode(mean_blood_pressure);
createObservationWithCode(new Coding("http://loinc.org", "788-0", "Erythrocyte distribution width [Ratio] by Automated count"));
createObservationWithCode(new Coding("http://loinc.org", "787-2", "MCV [Entitic volume] by Automated count"));
createObservationWithCode(new Coding("http://loinc.org", "786-4", "MCHC [Mass/volume] by Automated count"));
@ -106,25 +111,45 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest {
createObservationWithCode(new Coding("http://loinc.org", "4544-3", "Hematocrit [Volume Fraction] of Blood by Automated count"));
// some repeats to make sure we only return singles
createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture"));
createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture"));
createObservationWithCode(new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture"));
createObservationWithCode(gram_positive_culture);
createObservationWithCode(gram_positive_culture);
createObservationWithCode(gram_positive_culture);
List<TokenAutocompleteHit> codes;
codes = autocompleteSearch("Observation", "code", "blo");
codes = autocompleteSearch("Observation", "code", "text", "blo");
assertThat("finds blood pressure", codes, hasItem(matchingSystemAndCode(mean_blood_pressure)));
codes = autocompleteSearch("Observation", "code", "pressure");
codes = autocompleteSearch("Observation", "code", "text", "pressure");
assertThat("finds blood pressure", codes, hasItem(matchingSystemAndCode(mean_blood_pressure)));
codes = autocompleteSearch("Observation", "code", "nuclear");
long hits = codes.stream()
.filter(c -> matchingSystemAndCode(mean_blood_pressure).matches(c))
.count();
assertThat("multiple matches returns single hit", hits, is(1L));
codes = autocompleteSearch("Observation", "code", "text", "nuclear");
assertThat("doesn't find nuclear", codes, is(empty()));
codes = autocompleteSearch("Observation", "code", "text", null);
assertThat("empty filter finds some", codes, is(not(empty())));
assertThat("empty finds most common first", codes.get(0), matchingSystemAndCode(gram_positive_culture));
assertThat("empty finds most common first", codes.get(1), matchingSystemAndCode(mean_blood_pressure));
codes = autocompleteSearch("Observation", "code", null, "88262-1");
assertThat("matches by code value", codes, hasItem(matchingSystemAndCode(gram_positive_culture)));
codes = autocompleteSearch("Observation", "code", null, "8826");
assertThat("matches by code prefix", codes, hasItem(matchingSystemAndCode(gram_positive_culture)));
codes = autocompleteSearch("Observation", "code", null, null);
assertThat("null finds everything", codes, hasSize(13));
}
List<TokenAutocompleteHit> autocompleteSearch(String theResourceType, String theSPName, String theSearchText) {
List<TokenAutocompleteHit> autocompleteSearch(String theResourceType, String theSPName, String theModifier, String theSearchText) {
return new TransactionTemplate(myTxManager).execute(s -> {
TokenAutocompleteSearch tokenAutocompleteSearch = new TokenAutocompleteSearch(myFhirCtx, Search.session(myEntityManager));
return tokenAutocompleteSearch.search(theResourceType, theSPName, theSearchText, "text",30);
return tokenAutocompleteSearch.search(theResourceType, theSPName, theSearchText, theModifier,30);
});
}