Distinguish multi-coded items during token autocomplete (#3429)

Add nested indexing to support autocomplete of multi-coded items.
This commit is contained in:
michaelabuckley 2022-02-28 19:59:09 -05:00 committed by GitHub
parent b263c7a595
commit 86f7373f16
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 496 additions and 231 deletions

View File

@ -33,6 +33,7 @@ import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData;
import ca.uhn.fhir.jpa.search.autocomplete.ValueSetAutocompleteOptions;
import ca.uhn.fhir.jpa.search.autocomplete.ValueSetAutocompleteSearch;
import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
import ca.uhn.fhir.jpa.searchparam.extractor.ISearchParamExtractor;
import ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams;
import ca.uhn.fhir.model.api.IQueryParameterType;
import ca.uhn.fhir.rest.api.Constants;
@ -75,6 +76,8 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
private ISearchParamRegistry mySearchParamRegistry;
@Autowired
private DaoConfig myDaoConfig;
@Autowired
ISearchParamExtractor mySearchParamExtractor;
final private ExtendedLuceneSearchBuilder myAdvancedIndexQueryBuilder = new ExtendedLuceneSearchBuilder();
private Boolean ourDisabled;
@ -89,8 +92,8 @@ public class FulltextSearchSvcImpl implements IFulltextSearchSvc {
public ExtendedLuceneIndexData extractLuceneIndexData(IBaseResource theResource, ResourceIndexedSearchParams theNewParams) {
String resourceType = myFhirContext.getResourceType(theResource);
Map<String, RuntimeSearchParam> activeSearchParams = mySearchParamRegistry.getActiveSearchParams(resourceType);
ExtendedLuceneIndexExtractor extractor = new ExtendedLuceneIndexExtractor(myFhirContext, activeSearchParams);
return extractor.extract(theNewParams);
ExtendedLuceneIndexExtractor extractor = new ExtendedLuceneIndexExtractor(myFhirContext, activeSearchParams, mySearchParamExtractor);
return extractor.extract(theResource,theNewParams);
}
@Override

View File

@ -20,8 +20,8 @@ package ca.uhn.fhir.jpa.dao.search;
* #L%
*/
import ca.uhn.fhir.i18n.Msg;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.i18n.Msg;
import ca.uhn.fhir.model.api.IQueryParameterType;
import ca.uhn.fhir.model.api.TemporalPrecisionEnum;
import ca.uhn.fhir.rest.api.Constants;
@ -46,7 +46,6 @@ import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.time.Instant;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
@ -57,6 +56,7 @@ import java.util.stream.Collectors;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_EXACT;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_NORMALIZED;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_TEXT;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.SEARCH_PARAM_ROOT;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
public class ExtendedLuceneClauseBuilder {
@ -144,7 +144,7 @@ public class ExtendedLuceneClauseBuilder {
return myPredicateFactory.match().field(getTokenCodeFieldPath(theSearchParamName)).matching(token.getValue());
} else if (StringUtils.isBlank(token.getValue())) {
// system without value
return myPredicateFactory.match().field("sp." + theSearchParamName + ".token" + ".system").matching(token.getSystem());
return myPredicateFactory.match().field(SEARCH_PARAM_ROOT + "." + theSearchParamName + ".token" + ".system").matching(token.getSystem());
} else {
// system + value
return myPredicateFactory.match().field(getTokenSystemCodeFieldPath(theSearchParamName)).matching(token.getValueAsQueryToken(this.myFhirContext));
@ -167,12 +167,12 @@ public class ExtendedLuceneClauseBuilder {
@Nonnull
public static String getTokenCodeFieldPath(String theSearchParamName) {
return "sp." + theSearchParamName + ".token" + ".code";
return SEARCH_PARAM_ROOT + "." + theSearchParamName + ".token" + ".code";
}
@Nonnull
public static String getTokenSystemCodeFieldPath(@Nonnull String theSearchParamName) {
return "sp." + theSearchParamName + ".token" + ".code-system";
return SEARCH_PARAM_ROOT + "." + theSearchParamName + ".token" + ".code-system";
}
public void addStringTextSearch(String theSearchParamName, List<List<IQueryParameterType>> stringAndOrTerms) {
@ -190,7 +190,7 @@ public class ExtendedLuceneClauseBuilder {
fieldName = "myNarrativeText";
break;
default:
fieldName = "sp." + theSearchParamName + ".string." + IDX_STRING_TEXT;
fieldName = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".string." + IDX_STRING_TEXT;
break;
}
@ -213,7 +213,7 @@ public class ExtendedLuceneClauseBuilder {
}
public void addStringExactSearch(String theSearchParamName, List<List<IQueryParameterType>> theStringAndOrTerms) {
String fieldPath = "sp." + theSearchParamName + ".string." + IDX_STRING_EXACT;
String fieldPath = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".string." + IDX_STRING_EXACT;
for (List<? extends IQueryParameterType> nextAnd : theStringAndOrTerms) {
Set<String> terms = extractOrStringParams(nextAnd);
@ -227,7 +227,7 @@ public class ExtendedLuceneClauseBuilder {
}
public void addStringContainsSearch(String theSearchParamName, List<List<IQueryParameterType>> theStringAndOrTerms) {
String fieldPath = "sp." + theSearchParamName + ".string." + IDX_STRING_NORMALIZED;
String fieldPath = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".string." + IDX_STRING_NORMALIZED;
for (List<? extends IQueryParameterType> nextAnd : theStringAndOrTerms) {
Set<String> terms = extractOrStringParams(nextAnd);
ourLog.debug("addStringContainsSearch {} {}", theSearchParamName, terms);
@ -241,7 +241,7 @@ public class ExtendedLuceneClauseBuilder {
}
public void addStringUnmodifiedSearch(String theSearchParamName, List<List<IQueryParameterType>> theStringAndOrTerms) {
String fieldPath = "sp." + theSearchParamName + ".string." + IDX_STRING_NORMALIZED;
String fieldPath = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".string." + IDX_STRING_NORMALIZED;
for (List<? extends IQueryParameterType> nextAnd : theStringAndOrTerms) {
Set<String> terms = extractOrStringParams(nextAnd);
ourLog.debug("addStringUnmodifiedSearch {} {}", theSearchParamName, terms);
@ -255,7 +255,7 @@ public class ExtendedLuceneClauseBuilder {
}
public void addReferenceUnchainedSearch(String theSearchParamName, List<List<IQueryParameterType>> theReferenceAndOrTerms) {
String fieldPath = "sp." + theSearchParamName + ".reference.value";
String fieldPath = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".reference.value";
for (List<? extends IQueryParameterType> nextAnd : theReferenceAndOrTerms) {
Set<String> terms = extractOrStringParams(nextAnd);
ourLog.trace("reference unchained search {}", terms);
@ -362,8 +362,8 @@ public class ExtendedLuceneClauseBuilder {
}
private PredicateFinalStep generateDateOrdinalSearchTerms(String theSearchParamName, DateParam theDateParam) {
String lowerOrdinalField = "sp." + theSearchParamName + ".dt.lower-ord";
String upperOrdinalField = "sp." + theSearchParamName + ".dt.upper-ord";
String lowerOrdinalField = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".dt.lower-ord";
String upperOrdinalField = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".dt.upper-ord";
int lowerBoundAsOrdinal;
int upperBoundAsOrdinal;
ParamPrefixEnum prefix = theDateParam.getPrefix();
@ -411,8 +411,8 @@ public class ExtendedLuceneClauseBuilder {
}
private PredicateFinalStep generateDateInstantSearchTerms(String theSearchParamName, DateParam theDateParam) {
String lowerInstantField = "sp." + theSearchParamName + ".dt.lower";
String upperInstantField = "sp." + theSearchParamName + ".dt.upper";
String lowerInstantField = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".dt.lower";
String upperInstantField = SEARCH_PARAM_ROOT + "." + theSearchParamName + ".dt.upper";
ParamPrefixEnum prefix = theDateParam.getPrefix();
if (ParamPrefixEnum.NOT_EQUAL == prefix) {

View File

@ -24,7 +24,12 @@ import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.context.RuntimeSearchParam;
import ca.uhn.fhir.jpa.model.entity.ResourceLink;
import ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData;
import ca.uhn.fhir.jpa.searchparam.extractor.ISearchParamExtractor;
import ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams;
import ca.uhn.fhir.rest.api.RestSearchParameterTypeEnum;
import org.hl7.fhir.instance.model.api.IBase;
import org.hl7.fhir.instance.model.api.IBaseCoding;
import org.hl7.fhir.instance.model.api.IBaseResource;
import org.jetbrains.annotations.NotNull;
import java.util.ArrayList;
@ -43,21 +48,25 @@ public class ExtendedLuceneIndexExtractor {
private final FhirContext myContext;
private final Map<String, RuntimeSearchParam> myParams;
private final ISearchParamExtractor mySearchParamExtractor;
public ExtendedLuceneIndexExtractor(FhirContext theContext, Map<String, RuntimeSearchParam> theActiveParams) {
public ExtendedLuceneIndexExtractor(FhirContext theContext, Map<String, RuntimeSearchParam> theActiveParams, ISearchParamExtractor theSearchParamExtractor) {
myContext = theContext;
myParams = theActiveParams;
mySearchParamExtractor = theSearchParamExtractor;
}
@NotNull
public ExtendedLuceneIndexData extract(ResourceIndexedSearchParams theNewParams) {
public ExtendedLuceneIndexData extract(IBaseResource theResource, ResourceIndexedSearchParams theNewParams) {
ExtendedLuceneIndexData retVal = new ExtendedLuceneIndexData(myContext);
extractAutocompleteTokens(theResource, retVal);
theNewParams.myStringParams.forEach(nextParam ->
retVal.addStringIndexData(nextParam.getParamName(), nextParam.getValueExact()));
theNewParams.myTokenParams.forEach(nextParam ->
retVal.addTokenIndexData(nextParam.getParamName(), nextParam.getSystem(), nextParam.getValue()));
retVal.addTokenIndexDataIfNotPresent(nextParam.getParamName(), nextParam.getSystem(), nextParam.getValue()));
theNewParams.myDateParams.forEach(nextParam ->
retVal.addDateIndexData(nextParam.getParamName(), nextParam.getValueLow(), nextParam.getValueLowDateOrdinal(),
@ -93,4 +102,53 @@ public class ExtendedLuceneIndexExtractor {
return retVal;
}
/**
* Re-extract token parameters so we can distinguish
*/
private void extractAutocompleteTokens(IBaseResource theResource, ExtendedLuceneIndexData theRetVal) {
// we need to re-index token params to match up display with codes.
myParams.values().stream()
.filter(p->p.getParamType() == RestSearchParameterTypeEnum.TOKEN)
// TODO it would be nice to reuse TokenExtractor
.forEach(p-> mySearchParamExtractor.extractValues(p.getPath(), theResource)
.stream()
.forEach(nextValue->indexTokenValue(theRetVal, p, nextValue)
));
}
private void indexTokenValue(ExtendedLuceneIndexData theRetVal, RuntimeSearchParam p, IBase nextValue) {
String nextType = mySearchParamExtractor.toRootTypeName(nextValue);
String spName = p.getName();
switch (nextType) {
case "CodeableConcept":
addToken_CodeableConcept(theRetVal, spName, nextValue);
break;
case "Coding":
addToken_Coding(theRetVal, spName, (IBaseCoding) nextValue);
break;
// TODO share this with TokenExtractor and introduce a ITokenIndexer interface.
// Ignore unknown types for now.
// This is just for autocomplete, and we are focused on Observation.code, category, combo-code, etc.
// case "Identifier":
// mySearchParamExtractor.addToken_Identifier(myResourceTypeName, params, searchParam, value);
// break;
// case "ContactPoint":
// mySearchParamExtractor.addToken_ContactPoint(myResourceTypeName, params, searchParam, value);
// break;
default:
break;
}
}
private void addToken_CodeableConcept(ExtendedLuceneIndexData theRetVal, String theSpName, IBase theValue) {
List<IBase> codings = mySearchParamExtractor.getCodingsFromCodeableConcept(theValue);
for (IBase nextCoding : codings) {
addToken_Coding(theRetVal, theSpName, (IBaseCoding) nextCoding);
}
}
private void addToken_Coding(ExtendedLuceneIndexData theRetVal, String theSpName, IBaseCoding theNextValue) {
theRetVal.addTokenIndexData(theSpName, theNextValue);
}
}

View File

@ -23,7 +23,6 @@ package ca.uhn.fhir.jpa.dao.search;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import org.hibernate.search.engine.search.aggregation.AggregationKey;
import javax.annotation.Nonnull;
import java.util.List;
@ -31,13 +30,15 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.SEARCH_PARAM_ROOT;
/**
* Builds lastN aggregation, and parse the results
*/
public class LastNAggregation {
static final String SP_SUBJECT = "sp.subject.reference.value";
private static final String SP_CODE_TOKEN_CODE_AND_SYSTEM = "sp.code.token.code-system";
private static final String SP_DATE_DT_UPPER = "sp.date.dt.upper";
static final String SP_SUBJECT = SEARCH_PARAM_ROOT + ".subject.reference.value";
private static final String SP_CODE_TOKEN_CODE_AND_SYSTEM = SEARCH_PARAM_ROOT + ".code.token.code-system";
private static final String SP_DATE_DT_UPPER = SEARCH_PARAM_ROOT + ".date.dt.upper";
private static final String GROUP_BY_CODE_SYSTEM_SUB_AGGREGATION = "group_by_code_system";
private static final String MOST_RECENT_EFFECTIVE_SUB_AGGREGATION = "most_recent_effective";

View File

@ -7,7 +7,7 @@
* This package extends this search to support token, string, and reference parameters via {@link ca.uhn.fhir.jpa.model.entity.ResourceTable#myLuceneIndexData}.
* When active, the extracted search parameters which are written to the HFJ_SPIDX_* tables are also written to the Lucene index document.
* For now, we use the existing JPA index entities to populate the {@link ca.uhn.fhir.jpa.model.search.ExtendedLuceneIndexData}
* in {@link ca.uhn.fhir.jpa.dao.search.ExtendedLuceneIndexExtractor#extract(ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams)} ()}
* in {@link ca.uhn.fhir.jpa.dao.search.ExtendedLuceneIndexExtractor#extract(org.hl7.fhir.instance.model.api.IBaseResource, ca.uhn.fhir.jpa.searchparam.extractor.ResourceIndexedSearchParams)} ()}
*
* <h2>Implementation</h2>
* Both {@link ca.uhn.fhir.jpa.search.builder.SearchBuilder} and {@link ca.uhn.fhir.jpa.dao.LegacySearchBuilder} delegate the

View File

@ -0,0 +1,40 @@
package ca.uhn.fhir.jpa.search.autocomplete;
import com.google.gson.JsonObject;
import org.apache.commons.lang3.Validate;
import javax.annotation.Nonnull;
public class RawElasticJsonBuilder {
@Nonnull
static JsonObject makeMatchBoolPrefixPredicate(String theFieldName, String queryText) {
JsonObject matchBoolBody = new JsonObject();
matchBoolBody.addProperty(theFieldName, queryText);
JsonObject predicate = new JsonObject();
predicate.add("match_bool_prefix", matchBoolBody);
return predicate;
}
public static JsonObject makeWildcardPredicate(String theFieldName, String theQueryText) {
Validate.notEmpty(theQueryText);
JsonObject params = new JsonObject();
params.addProperty("value", theQueryText);
JsonObject wildcardBody = new JsonObject();
wildcardBody.add(theFieldName, params);
JsonObject predicate = new JsonObject();
predicate.add("wildcard", wildcardBody);
return predicate;
}
@Nonnull
public static JsonObject makeMatchAllPredicate() {
JsonObject o = new JsonObject();
o.add("match_all", new JsonObject());
return o;
}
}

View File

@ -20,10 +20,8 @@ package ca.uhn.fhir.jpa.search.autocomplete;
* #L%
*/
import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneClauseBuilder;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.DocumentContext;
@ -31,6 +29,7 @@ import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.ParseContext;
import com.jayway.jsonpath.spi.json.GsonJsonProvider;
import com.jayway.jsonpath.spi.mapper.GsonMappingProvider;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import javax.annotation.Nonnull;
@ -38,33 +37,59 @@ import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_TEXT;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.NESTED_SEARCH_PARAM_ROOT;
/**
* Compose the autocomplete aggregation, and parse the results.
*/
class TokenAutocompleteAggregation {
static final String NESTED_AGG_NAME = "nestedTopNAgg";
/**
* Aggregation template json.
*
* https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html
*/
static final JsonObject AGGREGATION_TEMPLATE =
new Gson().fromJson("{\n" +
" \"terms\": {\n" +
" \"field\": \"sp.TEMPLATE_DUMMY.token.code-system\",\n" +
" \"size\": 30,\n" +
" \"min_doc_count\": 1\n" +
" },\n" +
" \"aggs\": {\n" +
" \"" + NESTED_AGG_NAME + "\": {\n" +
" \"top_hits\": {\n" +
" \"_source\": {\n" +
" \"includes\": [ \"sp.TEMPLATE_DUMMY\" ]\n" +
" },\n" +
" \"size\": 1\n" +
" }\n" +
" }\n" +
" }}", JsonObject.class);
new Gson().fromJson("" +
" {" +
" \"nested\": { \"path\": \"nsp.PLACEHOLDER\" }," +
" \"aggs\": {" +
" \"search\": {" +
" \"filter\": {" +
" \"bool\": {" +
" \"must\": [" +
" { \"match_bool_prefix\":" +
" { \"nsp.PLACEHOLDER.string.text\": {" +
" \"query\": \"Mors\"}" +
" }" +
" }" +
" ]" +
" }" +
" }," +
" \"aggs\": {" +
" \"group_by_token\": {" +
" \"terms\": {" +
" \"field\": \"nsp.PLACEHOLDER.token.code-system\"," +
" \"size\": 30," +
" \"min_doc_count\": 1," +
" \"shard_min_doc_count\": 0," +
" \"show_term_doc_count_error\": false" +
" }," +
" \"aggs\": {" +
" \"top_tags_hits\": {" +
" \"top_hits\": {" +
" \"_source\": {" +
" \"includes\": [ \"nsp.PLACEHOLDER\" ]" +
" }," +
" \"size\": 1" +
" }" +
" }" +
" }" +
" }" +
" }" +
" }" +
" }" +
" }", JsonObject.class);
static final Configuration configuration = Configuration
.builder()
@ -75,12 +100,28 @@ class TokenAutocompleteAggregation {
private final String mySpName;
private final int myCount;
private final JsonObject mySearch;
public TokenAutocompleteAggregation(String theSpName, int theCount) {
public TokenAutocompleteAggregation(String theSpName, int theCount, String theSearchText, String theSearchModifier) {
Validate.notEmpty(theSpName);
Validate.isTrue(theCount>0, "count must be positive");
Validate.isTrue("text".equalsIgnoreCase(theSearchModifier) || "".equals(theSearchModifier) || theSearchModifier == null, "Unsupported search modifier " + theSearchModifier);
mySpName = theSpName;
myCount = theCount;
mySearch = makeSearch(theSearchText, theSearchModifier);
}
private JsonObject makeSearch(String theSearchText, String theSearchModifier) {
theSearchText = StringUtils.defaultString(theSearchText);
theSearchModifier = StringUtils.defaultString(theSearchModifier);
if (StringUtils.isEmpty(theSearchText)) {
return RawElasticJsonBuilder.makeMatchAllPredicate();
} else if ("text".equalsIgnoreCase(theSearchModifier)) {
return RawElasticJsonBuilder.makeMatchBoolPrefixPredicate(NESTED_SEARCH_PARAM_ROOT + "." + mySpName + ".string." + IDX_STRING_TEXT, theSearchText);
} else {
return RawElasticJsonBuilder.makeWildcardPredicate(NESTED_SEARCH_PARAM_ROOT + "." + mySpName + ".token.code", theSearchText + "*");
}
}
/**
@ -92,9 +133,12 @@ class TokenAutocompleteAggregation {
// clone and modify the template with the actual field names.
JsonObject result = AGGREGATION_TEMPLATE.deepCopy();
DocumentContext documentContext = parseContext.parse(result);
documentContext.set("terms.field", ExtendedLuceneClauseBuilder.getTokenSystemCodeFieldPath(mySpName));
documentContext.set("terms.size", myCount);
documentContext.set("aggs." + NESTED_AGG_NAME + ".top_hits._source.includes[0]","sp." + mySpName);
String nestedSearchParamPath = NESTED_SEARCH_PARAM_ROOT + "." + mySpName;
documentContext.set("nested.path", nestedSearchParamPath);
documentContext.set("aggs.search.filter.bool.must[0]", mySearch);
documentContext.set("aggs.search.aggs.group_by_token.terms.field", NESTED_SEARCH_PARAM_ROOT + "." + mySpName + ".token" + ".code-system");
documentContext.set("aggs.search.aggs.group_by_token.terms.size", myCount);
documentContext.set("aggs.search.aggs.group_by_token.aggs.top_tags_hits.top_hits._source.includes[0]", nestedSearchParamPath);
return result;
}
@ -108,7 +152,11 @@ class TokenAutocompleteAggregation {
List<TokenAutocompleteHit> extractResults(@Nonnull JsonObject theAggregationResult) {
Validate.notNull(theAggregationResult);
JsonArray buckets = theAggregationResult.getAsJsonArray("buckets");
JsonArray buckets = theAggregationResult
.getAsJsonObject("search")
.getAsJsonObject("group_by_token")
.getAsJsonArray("buckets");
List<TokenAutocompleteHit> result = StreamSupport.stream(buckets.spliterator(), false)
.map(b-> bucketToEntry((JsonObject) b))
.collect(Collectors.toList());
@ -129,16 +177,7 @@ class TokenAutocompleteAggregation {
String bucketKey = documentContext.read("key", String.class);
// The inner bucket has a hits array, and we only need the first.
JsonObject spRootNode = documentContext.read(NESTED_AGG_NAME + ".hits.hits[0]._source.sp");
// MB - JsonPath doesn't have placeholders, and I don't want to screw-up quoting mySpName, so read the JsonObject explicitly
JsonObject spNode = spRootNode.getAsJsonObject(mySpName);
JsonElement exactNode = spNode.get("string").getAsJsonObject().get("exact");
String displayText;
if (exactNode.isJsonArray()) {
displayText = exactNode.getAsJsonArray().get(0).getAsString();
} else {
displayText = exactNode.getAsString();
}
String displayText = documentContext.read("top_tags_hits.hits.hits[0]._source.string.text", String.class);
return new TokenAutocompleteHit(bucketKey,displayText);
}

View File

@ -21,6 +21,7 @@ package ca.uhn.fhir.jpa.search.autocomplete;
*/
import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.builder.ToStringBuilder;
import javax.annotation.Nonnull;
@ -42,4 +43,12 @@ class TokenAutocompleteHit {
public String getSystemCode() {
return mySystemCode;
}
@Override
public String toString() {
return new ToStringBuilder(this)
.append("mySystemCode", mySystemCode)
.append("myDisplayText", myDisplayText)
.toString();
}
}

View File

@ -21,18 +21,12 @@ package ca.uhn.fhir.jpa.search.autocomplete;
*/
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.i18n.Msg;
import ca.uhn.fhir.jpa.dao.search.ExtendedLuceneClauseBuilder;
import ca.uhn.fhir.jpa.model.entity.ResourceTable;
import ca.uhn.fhir.model.api.IQueryParameterType;
import ca.uhn.fhir.rest.param.StringParam;
import com.google.gson.JsonObject;
import org.apache.commons.lang3.StringUtils;
import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension;
import org.hibernate.search.engine.search.aggregation.AggregationKey;
import org.hibernate.search.engine.search.aggregation.SearchAggregation;
import org.hibernate.search.engine.search.predicate.dsl.BooleanPredicateClausesStep;
import org.hibernate.search.engine.search.predicate.dsl.SearchPredicateFactory;
import org.hibernate.search.engine.search.query.SearchResult;
import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep;
import org.hibernate.search.mapper.orm.search.loading.dsl.SearchLoadingOptionsStep;
@ -41,7 +35,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.List;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
@ -63,23 +56,28 @@ class TokenAutocompleteSearch {
/**
* Search for tokens indexed by theSPName on theResourceType matching theSearchText.
* @param theResourceType The resource type (e.g. Observation)
* Search for tokens indexed by theSPName on theResourceName matching theSearchText.
* @param theResourceName The resource type (e.g. Observation)
* @param theSPName The search param code (e.g. combo-code)
* @param theSearchText The search test (e.g. "bloo")
* @return A collection of Coding elements
*/
@Nonnull
public List<TokenAutocompleteHit> search(String theResourceType, String theSPName, String theSearchText, String theSearchModifier, int theCount) {
public List<TokenAutocompleteHit> search(String theResourceName, String theSPName, String theSearchText, String theSearchModifier, int theCount) {
ourLog.trace("search: {}?{}:{}={}", theResourceType,theSPName, theSearchModifier, theSearchText);
TokenAutocompleteAggregation tokenAutocompleteAggregation = new TokenAutocompleteAggregation(theSPName, theCount);
TokenAutocompleteAggregation tokenAutocompleteAggregation =
new TokenAutocompleteAggregation(theSPName, theCount, theSearchText, theSearchModifier);
// compose the query json
SearchQueryOptionsStep<?, ?, SearchLoadingOptionsStep, ?, ?> query = mySession.search(ResourceTable.class)
.where(f -> f.bool(b ->
buildQueryPredicate(b, f, theResourceType, theSPName, theSearchModifier, theSearchText)))
.where(predFactory -> predFactory.bool(boolBuilder -> {
ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, boolBuilder, predFactory);
// we apply resource-level predicates here, at the top level
if (isNotBlank(theResourceName)) {
clauseBuilder.addResourceTypeClause(theResourceName);
}
}))
.aggregation(AGGREGATION_KEY, buildAggregation(tokenAutocompleteAggregation));
// run the query, but with 0 results. We only care about the aggregations.
@ -92,39 +90,6 @@ class TokenAutocompleteSearch {
return aggEntries;
}
void buildQueryPredicate(BooleanPredicateClausesStep<?> b, SearchPredicateFactory f, String theResourceType, String theSPName, String theSearchModifier, String theSearchText) {
ExtendedLuceneClauseBuilder clauseBuilder = new ExtendedLuceneClauseBuilder(myFhirContext, b, f);
if (isNotBlank(theResourceType)) {
clauseBuilder.addResourceTypeClause(theResourceType);
}
String queryText = StringUtils.defaultString(theSearchText, "");
if (StringUtils.isNotEmpty(queryText)) {
switch (StringUtils.defaultString(theSearchModifier)) {
case "text":
// Add a wildcard to act like match_bool_prefix
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-bool-prefix-query.html
queryText = queryText + "*";
StringParam stringParam = new StringParam(queryText);
List<List<IQueryParameterType>> andOrTerms = Collections.singletonList(Collections.singletonList(stringParam));
clauseBuilder.addStringTextSearch(theSPName, andOrTerms);
break;
case "":
b.must(
// use wildcard to allow matching prefix of keyword indexed field.
f.wildcard()
.field(ExtendedLuceneClauseBuilder.getTokenCodeFieldPath(theSPName))
.matching(queryText + "*")
.toPredicate());
break;
default:
throw new IllegalArgumentException(Msg.code(2034) + "Autocomplete only accepts text search for now.");
}
}
}
/**
* Hibernate-search doesn't support nested aggregations, so we use an extension to build what we need from raw JSON.
*/

View File

@ -5,13 +5,8 @@
*
* This work depends on on the Hibernate Search infrastructure in {@link ca.uhn.fhir.jpa.dao.search}.
*
* NIH sponsored this work to provide an interactive-autocomplete when browsing codes in a research dataset.
* Provides an interactive-autocomplete when browsing codes in a dataset.
*
* https://gitlab.com/simpatico.ai/cdr/-/issues/2452
* wipmb TODO-LIST
* wipmb - docs - no partition support
* wipmb - link to docs
* wipmb what if the sp isn't of type token? do we check, or discard results without tokens?
*
*/
package ca.uhn.fhir.jpa.search.autocomplete;

View File

@ -0,0 +1,23 @@
package ca.uhn.fhir.jpa.config;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.jpa.api.dao.DaoRegistry;
import ca.uhn.fhir.jpa.dao.DaoTestDataBuilder;
import ca.uhn.fhir.jpa.partition.SystemRequestDetails;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class TestDataBuilderConfig {
@Autowired
FhirContext myFhirContext;
@Autowired
DaoRegistry myDaoRegistry;
@Bean
DaoTestDataBuilder testDataBuilder() {
return new DaoTestDataBuilder(myFhirContext, myDaoRegistry, new SystemRequestDetails());
}
}

View File

@ -0,0 +1,47 @@
package ca.uhn.fhir.jpa.search.autocomplete;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
class RawElasticJsonBuilderTest {
JsonObject myJson;
@Test
public void matchAll() {
myJson = RawElasticJsonBuilder.makeMatchAllPredicate();
assertJson("""
{"match_all": {}}""");
}
@Test
public void wildcard() {
myJson = RawElasticJsonBuilder.makeWildcardPredicate("a.field", "pattern_text");
assertJson("""
{ "wildcard": {
"a.field": {
"value": "pattern_text"
}}}""");
}
@Test
public void matchBoolPrefix() {
myJson = RawElasticJsonBuilder.makeMatchBoolPrefixPredicate("a.field", "query_text");
assertJson("""
{ "match_bool_prefix" : {
"a.field" : "query_text"
}}""");
}
private void assertJson(String expected) {
assertEquals(JsonParser.parseString(expected).getAsJsonObject(), myJson);
}
}

View File

@ -6,7 +6,6 @@ import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.util.List;
import java.util.Optional;
import static com.jayway.jsonpath.matchers.JsonPathMatchers.isJson;
import static com.jayway.jsonpath.matchers.JsonPathMatchers.withJsonPath;
@ -30,8 +29,10 @@ class TokenAutocompleteAggregationTest {
myCode = "combo-code";
buildAggregation();
assertThat("terms field is sp", myAggJson, isJson(withJsonPath("terms.field", equalTo("sp.combo-code.token.code-system"))));
assertThat("fetched piece is sp", myAggJson, isJson(withJsonPath("aggs.nestedTopNAgg.top_hits._source.includes[0]", equalTo("sp.combo-code"))));
assertThat("nested clause includes sp", myAggJson, isJson(withJsonPath("nested.path", equalTo("nsp.combo-code"))));
assertThat("terms field is sp", myAggJson, isJson(withJsonPath("aggs.search.aggs.group_by_token.terms.field", equalTo("nsp.combo-code.token.code-system"))));
assertThat("fetched piece is sp", myAggJson, isJson(withJsonPath("aggs.search.aggs.group_by_token.aggs.top_tags_hits.top_hits._source.includes[0]", equalTo("nsp.combo-code"))));
}
@Test
@ -40,36 +41,103 @@ class TokenAutocompleteAggregationTest {
myCount = 77;
buildAggregation();
assertThat("terms field is sp", myAggJson, isJson(withJsonPath("terms.size", equalTo(77))));
assertThat("count for top n", myAggJson, isJson(withJsonPath("aggs.search.aggs.group_by_token.terms.size", equalTo(77))));
}
private void buildAggregation() {
myAggJson = new TokenAutocompleteAggregation(myCode, myCount).toJsonAggregation().toString();
myAggJson = new TokenAutocompleteAggregation(myCode, myCount, null, null).toJsonAggregation().toString();
}
}
@Nested
public class ResultExtraction {
// Sample result from elastic for Observation.code
String resultJson = "" +
"{ \"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0," +
" \"buckets\": [" +
" { \"key\": \"http://loinc.org|88262-1\"," +
" \"doc_count\":3," +
" \"nestedTopNAgg\": " +
" { \"hits\":" +
" { \"total\":{\"value\":3,\"relation\":\"eq\"}, \"max_score\":1.0," +
" \"hits\":[" +
" { \"_index\":\"resourcetable-000001\",\"_type\":\"_doc\",\"_id\":\"13\",\"_score\":1.0," +
" \"_source\":{\"sp\":{\"code\":" +
" { \"string\":{\"exact\":\"Gram positive blood culture panel by Probe in Positive blood culture\",\"text\":\"Gram positive blood culture panel by Probe in Positive blood culture\",\"norm\":\"Gram positive blood culture panel by Probe in Positive blood culture\"}," +
" \"token\":{\"code\":\"88262-1\",\"system\":\"http://loinc.org\",\"code-system\":\"http://loinc.org|88262-1\"}}}}}]}}}," +
// a second result
"{\"key\":\"http://loinc.org|4544-3\",\"doc_count\":1,\"nestedTopNAgg\":{\"hits\":{\"total\":{\"value\":1,\"relation\":\"eq\"},\"max_score\":1.0,\"hits\":[{\"_index\":\"resourcetable-000001\",\"_type\":\"_doc\",\"_id\":\"12\",\"_score\":1.0,\"_source\":{\"sp\":{\"code\":{\"string\":{\"exact\":\"Hematocrit [Volume Fraction] of Blood by Automated count\",\"text\":\"Hematocrit [Volume Fraction] of Blood by Automated count\",\"norm\":\"Hematocrit [Volume Fraction] of Blood by Automated count\"},\"token\":{\"code\":\"4544-3\",\"system\":\"http://loinc.org\",\"code-system\":\"http://loinc.org|4544-3\"}}}}}]}}}," +
"{\"key\":\"http://loinc.org|4548-4\",\"doc_count\":1,\"nestedTopNAgg\":{\"hits\":{\"total\":{\"value\":1,\"relation\":\"eq\"},\"max_score\":1.0,\"hits\":[{\"_index\":\"resourcetable-000001\",\"_type\":\"_doc\",\"_id\":\"11\",\"_score\":1.0,\"_source\":{\"sp\":{\"code\":{\"string\":{\"exact\":\"Hemoglobin A1c/Hemoglobin.total in Blood\",\"text\":\"Hemoglobin A1c/Hemoglobin.total in Blood\",\"norm\":\"Hemoglobin A1c/Hemoglobin.total in Blood\"},\"token\":{\"code\":\"4548-4\",\"system\":\"http://loinc.org\",\"code-system\":\"http://loinc.org|4548-4\"}}}}}]}}}" +
"]}";
String resultJson = """
{
"doc_count": 22770,
"search": {
"doc_count": 4,
"group_by_token": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "http://loinc.org|59460-6",
"doc_count": 2,
"top_tags_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 4.9845064e-05,
"hits": [
{
"_index": "resourcetable-000001",
"_type": "_doc",
"_id": "1405280",
"_nested": {
"field": "nsp.code",
"offset": 0
},
"_score": 4.9845064e-05,
"_source": {
"string": {
"text": "Fall risk total [Morse Fall Scale]"
},
"token": {
"code": "59460-6",
"system": "http://loinc.org",
"code-system": "http://loinc.org|59460-6"
}
}
}
]
}
}
},
{
"key": "http://loinc.org|59461-4",
"doc_count": 2,
"top_tags_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 4.9845064e-05,
"hits": [
{
"_index": "resourcetable-000001",
"_type": "_doc",
"_id": "1405281",
"_nested": {
"field": "nsp.code",
"offset": 0
},
"_score": 4.9845064e-05,
"_source": {
"string": {
"text": "Fall risk level [Morse Fall Scale]"
},
"token": {
"code": "59461-4",
"system": "http://loinc.org",
"code-system": "http://loinc.org|59461-4"
}
}
}
]
}
}
}
]
}
}
}""";
JsonObject parsedResult = new Gson().fromJson(resultJson, JsonObject.class);
TokenAutocompleteAggregation myAutocompleteAggregation = new TokenAutocompleteAggregation("code", 22);
TokenAutocompleteAggregation myAutocompleteAggregation = new TokenAutocompleteAggregation("code", 22, null, null);
@Test
public void testResultExtraction() {
@ -77,69 +145,24 @@ class TokenAutocompleteAggregationTest {
List<TokenAutocompleteHit> hits = myAutocompleteAggregation.extractResults(parsedResult);
assertThat(hits, is(not(empty())));
assertThat(hits, (hasSize(3)));
assertThat(hits, (hasSize(2)));
}
@Test
public void testBucketExtraction() {
JsonObject bucket = (JsonObject) parsedResult.getAsJsonArray("buckets").get(0);
JsonObject bucket = parsedResult
.getAsJsonObject("search")
.getAsJsonObject("group_by_token")
.getAsJsonArray("buckets")
.get(0)
.getAsJsonObject();
TokenAutocompleteHit entry = myAutocompleteAggregation.bucketToEntry(bucket);
assertThat(entry.mySystemCode, equalTo("http://loinc.org|88262-1"));
assertThat(entry.myDisplayText, equalTo("Gram positive blood culture panel by Probe in Positive blood culture"));
assertThat(entry.mySystemCode, equalTo("http://loinc.org|59460-6"));
assertThat(entry.myDisplayText, equalTo("Fall risk total [Morse Fall Scale]"));
}
/**
* Until we move to nested, we may have multiple Coding in a code. This is broken.
*/
@Test
public void testMultiValuedBucketExtraction() {
JsonObject bucket = new Gson().fromJson("{" +
" \"key\": \"http://loinc.org|2708-6\"," +
" \"doc_count\": 14," +
" \"nestedTopNAgg\": {" +
" \"hits\": {" +
" \"total\": {" +
" \"value\": 14," +
" \"relation\": \"eq\"" +
" }," +
" \"max_score\": 1.0000025," +
" \"hits\": [" +
" {" +
" \"_index\": \"resourcetable-000001\"," +
" \"_type\": \"_doc\"," +
" \"_id\": \"1393284\"," +
" \"_score\": 1.0000025," +
" \"_source\": {" +
" \"sp\": {" +
" \"code\": {" +
" \"string\": {" +
" \"exact\": [" +
" \"Oxygen saturation in Arterial blood by Pulse oximetry\"," +
" \"Oxygen saturation in Arterial blood\"" +
" ]" +
" }," +
" \"token\": {" +
" \"code-system\": [" +
" \"http://loinc.org|2708-6\"," +
" \"http://loinc.org|59408-5\"" +
" ]" +
" }" +
" }" +
" }" +
" }" +
" }" +
" ]" +
" }" +
" }" +
"}", JsonObject.class);
TokenAutocompleteHit entry = myAutocompleteAggregation.bucketToEntry(bucket);
assertThat(entry.mySystemCode, equalTo("http://loinc.org|2708-6"));
assertThat(entry.myDisplayText, equalTo("Oxygen saturation in Arterial blood by Pulse oximetry"));
}
}

View File

@ -6,6 +6,7 @@ import ca.uhn.fhir.jpa.api.dao.IFhirResourceDao;
import ca.uhn.fhir.jpa.api.dao.IFhirSystemDao;
import ca.uhn.fhir.jpa.api.svc.ISearchCoordinatorSvc;
import ca.uhn.fhir.jpa.bulk.export.api.IBulkDataExportSvc;
import ca.uhn.fhir.jpa.config.TestDataBuilderConfig;
import ca.uhn.fhir.jpa.config.TestHibernateSearchAddInConfig;
import ca.uhn.fhir.jpa.config.TestR4Config;
import ca.uhn.fhir.jpa.dao.BaseJpaTest;
@ -13,6 +14,7 @@ import ca.uhn.fhir.jpa.search.reindex.IResourceReindexingSvc;
import ca.uhn.fhir.jpa.sp.ISearchParamPresenceSvc;
import ca.uhn.fhir.rest.server.servlet.ServletRequestDetails;
import ca.uhn.fhir.rest.server.util.ISearchParamRegistry;
import ca.uhn.fhir.test.utilities.ITestDataBuilder;
import ca.uhn.fhir.test.utilities.docker.RequiresDocker;
import org.hamcrest.Description;
import org.hamcrest.Matcher;
@ -35,9 +37,9 @@ import javax.annotation.Nonnull;
import javax.persistence.EntityManager;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.hasItem;
import static org.hamcrest.Matchers.hasSize;
@ -46,8 +48,11 @@ import static org.hamcrest.Matchers.not;
@ExtendWith(SpringExtension.class)
@RequiresDocker
@ContextConfiguration(classes = {TestR4Config.class, TestHibernateSearchAddInConfig.Elasticsearch.class})
@ContextConfiguration(classes = {
TestR4Config.class, TestHibernateSearchAddInConfig.Elasticsearch.class, TestDataBuilderConfig.class
})
public class TokenAutocompleteElasticsearchIT extends BaseJpaTest{
public static final Coding erythrocyte_by_volume = new Coding("http://loinc.org", "789-8", "Erythrocytes [#/volume] in Blood by Automated count");
@Autowired
protected PlatformTransactionManager myTxManager;
protected ServletRequestDetails mySrd = new ServletRequestDetails();
@ -72,6 +77,12 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest {
IResourceReindexingSvc myResourceReindexingSvc;
@Autowired
IBulkDataExportSvc myBulkDataExportSvc;
@Autowired
ITestDataBuilder myDataBuilder;
// a few different codes
static final Coding mean_blood_pressure = new Coding("http://loinc.org", "8478-0", "Mean blood pressure");
static final Coding gram_positive_culture = new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture");
@BeforeEach
public void beforePurgeDatabase() {
@ -88,14 +99,11 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest {
protected PlatformTransactionManager getTxManager() {
return myTxManager;
}
@Test
public void testAutocompleteByCodeDisplay() {
// a few different codes
Coding mean_blood_pressure = new Coding("http://loinc.org", "8478-0", "Mean blood pressure");
Coding gram_positive_culture = new Coding("http://loinc.org", "88262-1", "Gram positive blood culture panel by Probe in Positive blood culture");
createObservationWithCode(new Coding("http://loinc.org", "789-8", "Erythrocytes [#/volume] in Blood by Automated count"));
createObservationWithCode(erythrocyte_by_volume);
createObservationWithCode(mean_blood_pressure);
createObservationWithCode(mean_blood_pressure);
createObservationWithCode(new Coding("http://loinc.org", "788-0", "Erythrocyte distribution width [Ratio] by Automated count"));
@ -146,6 +154,29 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest {
}
/**
* If an observation has multiple codes, make sure searching by text only matches the right code.
*/
@Test
public void testAutocompleteDistinguishesMultipleCodes() {
createObservationWithCode(erythrocyte_by_volume,mean_blood_pressure,gram_positive_culture);
createObservationWithCode(gram_positive_culture);
createObservationWithCode(mean_blood_pressure);
List<TokenAutocompleteHit> codes = autocompleteSearch("Observation", "code", null, null);
assertThat("null finds all three codes", codes, hasSize(3));
codes = autocompleteSearch("Observation", "code", null, "789");
assertThat("token prefix finds the matching code", codes, hasItem(matchingSystemAndCode(erythrocyte_by_volume)));
assertThat("token prefix finds only the matching code, not all codes on the resource", codes, contains(matchingSystemAndCode(erythrocyte_by_volume)));
codes = autocompleteSearch("Observation", "code", "text", "erythrocyte");
assertThat("text finds the matching code", codes, hasItem(matchingSystemAndCode(erythrocyte_by_volume)));
assertThat("text finds only the matching code, not all codes on the resource", codes, contains(matchingSystemAndCode(erythrocyte_by_volume)));
}
List<TokenAutocompleteHit> autocompleteSearch(String theResourceType, String theSPName, String theModifier, String theSearchText) {
return new TransactionTemplate(myTxManager).execute(s -> {
TokenAutocompleteSearch tokenAutocompleteSearch = new TokenAutocompleteSearch(myFhirCtx, Search.session(myEntityManager));
@ -153,9 +184,11 @@ public class TokenAutocompleteElasticsearchIT extends BaseJpaTest {
});
}
private IIdType createObservationWithCode(Coding c) {
private IIdType createObservationWithCode(Coding... theCodings) {
Observation obs1 = new Observation();
obs1.getCode().addCoding(c);
for (Coding coding : theCodings) {
obs1.getCode().addCoding(coding);
}
return myObservationDao.create(obs1, mySrd).getId().toUnqualifiedVersionless();
}

View File

@ -21,16 +21,17 @@ package ca.uhn.fhir.jpa.model.search;
*/
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.rest.param.TokenParam;
import ca.uhn.fhir.model.dstu2.composite.CodingDt;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.SetMultimap;
import org.hibernate.search.engine.backend.document.DocumentElement;
import org.hl7.fhir.instance.model.api.IBaseCoding;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.function.BiConsumer;
import java.util.Date;
import java.util.Objects;
import java.util.function.BiConsumer;
/**
* Collects our lucene extended indexing data.
@ -41,7 +42,7 @@ public class ExtendedLuceneIndexData {
final FhirContext myFhirContext;
final SetMultimap<String, String> mySearchParamStrings = HashMultimap.create();
final SetMultimap<String, TokenParam> mySearchParamTokens = HashMultimap.create();
final SetMultimap<String, IBaseCoding> mySearchParamTokens = HashMultimap.create();
final SetMultimap<String, String> mySearchParamLinks = HashMultimap.create();
final SetMultimap<String, DateSearchIndexData> mySearchParamDates = HashMultimap.create();
@ -73,8 +74,19 @@ public class ExtendedLuceneIndexData {
mySearchParamStrings.put(theSpName, theText);
}
public void addTokenIndexData(String theSpName, String theSystem, String theValue) {
mySearchParamTokens.put(theSpName, new TokenParam(theSystem, theValue));
/**
* Add if not already present.
*/
public void addTokenIndexDataIfNotPresent(String theSpName, String theSystem, String theValue) {
boolean isPresent = mySearchParamTokens.get(theSpName).stream()
.anyMatch(c -> Objects.equals(c.getSystem(), theSystem) && Objects.equals(c.getCode(), theValue));
if (!isPresent) {
addTokenIndexData(theSpName, new CodingDt(theSystem, theValue));
}
}
public void addTokenIndexData(String theSpName, IBaseCoding theNextValue) {
mySearchParamTokens.put(theSpName, theNextValue);
}
public void addResourceLinkIndexData(String theSpName, String theTargetResourceId) {
@ -84,4 +96,5 @@ public class ExtendedLuceneIndexData {
public void addDateIndexData(String theSpName, Date theLowerBound, int theLowerBoundOrdinal, Date theUpperBound, int theUpperBoundOrdinal) {
mySearchParamDates.put(theSpName, new DateSearchIndexData(theLowerBound, theLowerBoundOrdinal, theUpperBound, theUpperBoundOrdinal));
}
}

View File

@ -21,8 +21,9 @@ package ca.uhn.fhir.jpa.model.search;
*/
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.rest.param.TokenParam;
import org.apache.commons.lang3.StringUtils;
import org.hibernate.search.engine.backend.document.DocumentElement;
import org.hl7.fhir.instance.model.api.IBaseCoding;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -31,6 +32,8 @@ public class HibernateSearchIndexWriter {
public static final String IDX_STRING_NORMALIZED = "norm";
public static final String IDX_STRING_EXACT = "exact";
public static final String IDX_STRING_TEXT = "text";
public static final String NESTED_SEARCH_PARAM_ROOT = "nsp";
public static final String SEARCH_PARAM_ROOT = "sp";
final HibernateSearchElementCache myNodeCache;
final FhirContext myFhirContext;
@ -40,7 +43,7 @@ public class HibernateSearchIndexWriter {
}
public DocumentElement getSearchParamIndexNode(String theSearchParamName, String theIndexType) {
return myNodeCache.getObjectElement("sp", theSearchParamName, theIndexType);
return myNodeCache.getObjectElement(SEARCH_PARAM_ROOT, theSearchParamName, theIndexType);
}
@ -57,14 +60,25 @@ public class HibernateSearchIndexWriter {
ourLog.debug("Adding Search Param Text: {} -- {}", theSearchParam, theValue);
}
public void writeTokenIndex(String theSearchParam, TokenParam theValue) {
public void writeTokenIndex(String theSearchParam, IBaseCoding theValue) {
DocumentElement nestedRoot = myNodeCache.getObjectElement(NESTED_SEARCH_PARAM_ROOT);
DocumentElement nestedSpNode = nestedRoot.addObject(theSearchParam);
DocumentElement nestedTokenNode = nestedSpNode.addObject("token");
nestedTokenNode.addValue("code", theValue.getCode());
nestedTokenNode.addValue("system", theValue.getSystem());
nestedTokenNode.addValue("code-system", theValue.getSystem() + "|" + theValue.getCode());
if (StringUtils.isNotEmpty(theValue.getDisplay())) {
DocumentElement nestedStringNode = nestedSpNode.addObject("string");
nestedStringNode.addValue(IDX_STRING_TEXT, theValue.getDisplay());
}
DocumentElement tokenIndexNode = getSearchParamIndexNode(theSearchParam, "token");
// TODO mb we can use a token_filter with pattern_capture to generate all three off a single value. Do this next, after merge.
tokenIndexNode.addValue("code", theValue.getValue());
tokenIndexNode.addValue("code", theValue.getCode());
tokenIndexNode.addValue("system", theValue.getSystem());
//This next one returns as system|value
tokenIndexNode.addValue("code-system", theValue.getValueAsQueryToken(myFhirContext));
tokenIndexNode.addValue("code-system", theValue.getSystem() + "|" + theValue.getCode());
ourLog.debug("Adding Search Param Token: {} -- {}", theSearchParam, theValue);
// TODO mb should we write the strings here too? Or leave it to the old spidx indexing?
}
public void writeReferenceIndex(String theSearchParam, String theValue) {

View File

@ -38,7 +38,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Instant;
import java.time.LocalDateTime;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_EXACT;
import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_NORMALIZED;
@ -108,17 +107,16 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr
// The following section is a bit ugly. We need to enforce order and dependency or the object matches will be too big.
{
IndexSchemaObjectField spfield = indexSchemaElement.objectField("sp", ObjectStructure.FLATTENED);
IndexSchemaObjectField spfield = indexSchemaElement.objectField(HibernateSearchIndexWriter.SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED);
spfield.toReference();
IndexSchemaObjectField nestedSpField = indexSchemaElement.objectField(HibernateSearchIndexWriter.NESTED_SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED);
nestedSpField.toReference();
// TODO MB: the lucene/elastic independent api is hurting a bit here.
// For lucene, we need a separate field for each analyzer. So we'll add string (for :exact), and text (for :text).
// They aren't marked stored, so there's no space cost beyond the index for each.
// But for elastic, I'd rather have a single field defined, with multi-field sub-fields. The index cost is the same,
// but elastic will actually store all fields in the source document.
// Something like this. But we'll need two index writers (lucene vs hibernate).
// ElasticsearchNativeIndexFieldTypeMappingStep nativeStep = indexFieldTypeFactory.extension(ElasticsearchExtension.get()).asNative();
// nativeStep.mapping()
// So triplicate the storage for now. :-(
String stringPathGlob = "*.string";
@ -127,6 +125,9 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr
spfield.fieldTemplate("string-exact", exactAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_EXACT).multiValued();
spfield.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued();
nestedSpField.objectFieldTemplate("nestedStringIndex", ObjectStructure.FLATTENED).matchingPathGlob(stringPathGlob);
nestedSpField.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued();
// token
// Ideally, we'd store a single code-system string and use a custom tokenizer to
// generate "system|" "|code" and "system|code" tokens to support all three.
@ -139,6 +140,11 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr
spfield.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued();
spfield.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued();
nestedSpField.objectFieldTemplate("nestedTokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob);
nestedSpField.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued();
nestedSpField.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued();
nestedSpField.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued();
// reference
// reference
@ -154,6 +160,9 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr
// last, since the globs are matched in declaration order, and * matches even nested nodes.
spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*");
// we use nested search params for the autocomplete search.
nestedSpField.objectFieldTemplate("nestedSpObject", ObjectStructure.NESTED).matchingPathGlob("*").multiValued();
}
}

View File

@ -730,7 +730,7 @@ public abstract class BaseSearchParamExtractor implements ISearchParamExtractor
if ("Coding".equals(nextType)) {
String system = extractValueAsString(myCodingSystemValueChild, theValue);
String code = extractValueAsString(myCodingCodeValueChild, theValue);
return createTokenIndexIfNotBlank(theResourceType, theSearchParam, system, code);
return createTokenIndexIfNotBlank(theResourceType, system, code, theSearchParam.getName());
} else {
return null;
}
@ -1079,7 +1079,7 @@ public abstract class BaseSearchParamExtractor implements ISearchParamExtractor
}
private void createTokenIndexIfNotBlankAndAdd(String theResourceType, Set<BaseResourceIndexedSearchParam> theParams, RuntimeSearchParam theSearchParam, String theSystem, String theValue) {
ResourceIndexedSearchParamToken nextEntity = createTokenIndexIfNotBlank(theResourceType, theSearchParam, theSystem, theValue);
ResourceIndexedSearchParamToken nextEntity = createTokenIndexIfNotBlank(theResourceType, theSystem, theValue, theSearchParam.getName());
if (nextEntity != null) {
theParams.add(nextEntity);
}
@ -1090,11 +1090,6 @@ public abstract class BaseSearchParamExtractor implements ISearchParamExtractor
myPartitionSettings = thePartitionSettings;
}
private ResourceIndexedSearchParamToken createTokenIndexIfNotBlank(String theResourceType, RuntimeSearchParam theSearchParam, String theSystem, String theValue) {
String searchParamName = theSearchParam.getName();
return createTokenIndexIfNotBlank(theResourceType, theSystem, theValue, searchParamName);
}
private ResourceIndexedSearchParamToken createTokenIndexIfNotBlank(String theResourceType, String theSystem, String theValue, String searchParamName) {
String system = theSystem;
String value = theValue;

View File

@ -24,6 +24,7 @@ import ca.uhn.fhir.context.BaseRuntimeChildDefinition;
import ca.uhn.fhir.context.BaseRuntimeElementCompositeDefinition;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.context.RuntimeResourceDefinition;
import ca.uhn.fhir.util.FhirTerser;
import org.hl7.fhir.instance.model.api.IBase;
import org.hl7.fhir.instance.model.api.IBaseReference;
import org.hl7.fhir.instance.model.api.IBaseResource;
@ -185,13 +186,10 @@ public interface ITestDataBuilder {
default Consumer<IBaseResource> withObservationCode(@Nullable String theSystem, @Nullable String theCode) {
return t -> {
ICompositeType codeableConcept = (ICompositeType) getFhirContext().getElementDefinition("CodeableConcept").newInstance();
IBase coding = getFhirContext().newTerser().addElement(codeableConcept, "coding");
getFhirContext().newTerser().addElement(coding, "system", theSystem);
getFhirContext().newTerser().addElement(coding, "code", theCode);
RuntimeResourceDefinition resourceDef = getFhirContext().getResourceDefinition(t.getClass());
resourceDef.getChildByName("code").getMutator().addValue(t, codeableConcept);
FhirTerser terser = getFhirContext().newTerser();
IBase coding = terser.addElement(t, "code.coding");
terser.addElement(coding, "system", theSystem);
terser.addElement(coding, "code", theCode);
};
}

View File

@ -4,5 +4,5 @@ extraction:
index:
java_version: 17
maven:
version: 3.8.1
version: 3.8.4