cherry-picked pr 6051

This commit is contained in:
Tadgh 2024-06-26 16:20:15 -07:00 committed by Long Ma
parent 50f6344289
commit c044c4d4aa
10 changed files with 654 additions and 204 deletions

View File

@ -21,15 +21,22 @@ package ca.uhn.fhir.rest.param;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.model.primitive.UriDt;
import ca.uhn.fhir.rest.api.Constants;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.commons.lang3.StringUtils.defaultString;
public class SpecialParam extends BaseParam /*implements IQueryParameterType*/ {
private static final Logger ourLog = LoggerFactory.getLogger(StringParam.class);
private String myValue;
private boolean myContains;
/**
* Constructor
@ -40,8 +47,12 @@ public class SpecialParam extends BaseParam /*implements IQueryParameterType*/ {
@Override
String doGetQueryParameterQualifier() {
if (myContains) {
return Constants.PARAMQUALIFIER_STRING_CONTAINS;
} else {
return null;
}
}
/**
* {@inheritDoc}
@ -56,6 +67,15 @@ public class SpecialParam extends BaseParam /*implements IQueryParameterType*/ {
*/
@Override
void doSetValueAsQueryToken(FhirContext theContext, String theParamName, String theQualifier, String theParameter) {
if (Constants.PARAMQUALIFIER_STRING_CONTAINS.equals(theQualifier)) {
if (theParamName.equalsIgnoreCase(Constants.PARAM_TEXT)
|| theParamName.equalsIgnoreCase(Constants.PARAM_CONTENT)) {
setContains(true);
} else {
ourLog.debug(
"Attempted to set the :contains modifier on a special search parameter that was not `_text` or `_content`. This is not supported.");
}
}
setValue(ParameterUtil.unescape(theParameter));
}
@ -93,4 +113,52 @@ public class SpecialParam extends BaseParam /*implements IQueryParameterType*/ {
private static String toSystemValue(UriDt theSystem) {
return theSystem.getValueAsString();
}
/**
* Special parameter modifier <code>:contains</code> for _text and _content
*/
public boolean isContains() {
return myContains;
}
/**
* Special parameter modifier <code>:contains</code> for _text and _content
*/
public SpecialParam setContains(boolean theContains) {
myContains = theContains;
if (myContains) {
setMissing(null);
}
return this;
}
@Override
public int hashCode() {
return new HashCodeBuilder(17, 37)
.append(isContains())
.append(getValue())
.append(getMissing())
.toHashCode();
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (!(obj instanceof SpecialParam)) {
return false;
}
SpecialParam other = (SpecialParam) obj;
EqualsBuilder eb = new EqualsBuilder();
eb.append(myContains, other.myContains);
eb.append(myValue, other.myValue);
eb.append(getMissing(), other.getMissing());
return eb.isEquals();
}
}

View File

@ -0,0 +1,67 @@
package ca.uhn.fhir.rest.param;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.rest.api.Constants;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.spi.ILoggingEvent;
import ch.qos.logback.core.read.ListAppender;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.slf4j.LoggerFactory;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ExtendWith(MockitoExtension.class)
public class SpecialParamTest {
private static final Logger ourLog = (Logger) LoggerFactory.getLogger(StringParam.class);
private ListAppender<ILoggingEvent> myListAppender = new ListAppender<>();
@Mock
private FhirContext myContext;
@BeforeEach
public void beforeEach(){
myListAppender = new ListAppender<>();
myListAppender.start();
ourLog.addAppender(myListAppender);
}
@AfterEach
public void afterEach(){
myListAppender.stop();
}
@Test
public void testEquals() {
SpecialParam specialParam = new SpecialParam();
specialParam.setValueAsQueryToken(myContext, Constants.PARAM_TEXT, Constants.PARAMQUALIFIER_STRING_CONTAINS, "my-test-value");
SpecialParam specialParam2 = new SpecialParam();
specialParam2.setValueAsQueryToken(myContext, Constants.PARAM_TEXT, Constants.PARAMQUALIFIER_STRING_CONTAINS, "my-test-value");
assertThat(specialParam).isEqualTo(specialParam2);
}
@Test
public void testContainsOnlyWorksForSpecificParams() {
SpecialParam specialParamText = new SpecialParam();
specialParamText.setValueAsQueryToken(myContext, Constants.PARAM_TEXT, Constants.PARAMQUALIFIER_STRING_CONTAINS, "my-test-value");
assertTrue(specialParamText.isContains());
SpecialParam specialParamContent = new SpecialParam();
specialParamContent.setValueAsQueryToken(myContext, Constants.PARAM_CONTENT, Constants.PARAMQUALIFIER_STRING_CONTAINS, "my-test-value");
assertTrue(specialParamContent.isContains());
SpecialParam nonTextSpecialParam = new SpecialParam();
nonTextSpecialParam.setValueAsQueryToken(myContext, "name", Constants.PARAMQUALIFIER_STRING_CONTAINS, "my-test-value");
assertFalse(nonTextSpecialParam.isContains());
}
}

View File

@ -22,10 +22,8 @@ import java.util.List;
import java.util.stream.Collectors;
import static ca.uhn.fhir.rest.api.Constants.PARAMQUALIFIER_STRING_TEXT;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.*;
@ExtendWith(MockitoExtension.class)
public class StringParamTest {
@ -177,9 +175,9 @@ public class StringParamTest {
.collect(Collectors.toList());
if (theWasLogged) {
assertEquals(1, warningLogs.size());
assertThat(warningLogs).hasSize(1);
} else {
assertTrue(warningLogs.isEmpty());
assertThat(warningLogs).isEmpty();
}
}

View File

@ -0,0 +1,4 @@
---
type: fix
issue: 6046
title: "Previously, using `_text` and `_content` searches in Hibernate Search in R5 was not supported. This issue has been fixed."

View File

@ -0,0 +1,5 @@
---
type: add
issue: 6046
title: "Added support for `:contains` parameter qualifier on the `_text` and `_content` Search Parameters. When using Hibernate Search, this will cause
the search to perform an substring match on the provided value. Documentation can be found [here](/hapi-fhir/docs/server_jpa/elastic.html#performing-fulltext-search-in-luceneelasticsearch)."

View File

@ -3,6 +3,55 @@
The HAPI JPA Server supports optional indexing via Hibernate Search when configured to use Lucene or Elasticsearch.
This is required to support the `_content`, or `_text` search parameters.
# Performing Fulltext Search in Lucene/Elasticsearch
When enabled, searches for `_text` and `_content` are forwarded to the underlying Hibernate Search engine, which can be backed by either Elasticsearch or Lucene.
By default, search is supported in the way indicated in the [FHIR Specification on _text/_content Search](https://www.hl7.org/fhir/search.html#_text). This means that
queries like the following can be evaluated:
```http request
GET [base]/Observation?_content=cancer OR metastases OR tumor
```
To understand how this works, look at the following example. During ingestion, the fields required for `_content` and `_text` searches are stored in the backing engine, after undergoing normalization and analysis. For example consider this Observation:
```json
{
"resourceType" : "Observation",
"code" : {
"coding" : [{
"system" : "http://loinc.org",
"code" : "15074-8",
"display" : "Glucose [Moles/volume] in Blood Found during patient's visit!"
}]
}
"valueQuantity" : {
"value" : 6.3,
"unit" : "mmol/l",
"system" : "http://unitsofmeasure.org",
"code" : "mmol/L"
}
}
```
In the display section, once parsed and analyzed, will result in the followings tokens being generated to be able to be searched on:
```json
["glucose", "mole", "volume", "blood", "found", "during", "patient", "visit"]
```
You will notice that plurality is removed, and the text has been normalized, and special characters removed. When searched for, the search terms will be normalized in the same fashion.
However, the default implementation will not allow you to search for an exact match over a long string that contains special characters or other characters which could be broken apart during tokenization. E.g. an exact match for `_content=[Moles/volume]` would not return this result.
In order to perform such an exact string match in Lucene/Elasticsearch, you should modify the `_text` or `_content` Search Parameter with the `:contains` modifier, as follows:
```http request
GET [base]/Observation?_content:contains=[Moles/volume]
```
Using `:contains` on the `_text` or `_content` modifies the search engine to perform a direct substring match anywhere within the field.
# Experimental Extended Lucene/Elasticsearch Indexing
Additional indexing is implemented for simple search parameters of type token, string, and reference.
@ -69,14 +118,14 @@ See https://www.hl7.org/fhir/search.html#token.
## Supported Common and Special Search Parameters
| Parameter | Supported | type |
|--------------|-----------|--------|
|--------------|-----------|------------------------|
| _id | no | |
| _lastUpdated | yes | date |
| _tag | yes | token |
| _profile | yes | URI |
| _security | yes | token |
| _text | yes | string |
| _content | yes | string |
| _text | yes | string(R4) special(R5) |
| _content | yes | string(R4) special(R5) |
| _list | no | |
| _has | no | |
| _type | no | |

View File

@ -36,6 +36,7 @@ import ca.uhn.fhir.rest.param.NumberParam;
import ca.uhn.fhir.rest.param.ParamPrefixEnum;
import ca.uhn.fhir.rest.param.QuantityParam;
import ca.uhn.fhir.rest.param.ReferenceParam;
import ca.uhn.fhir.rest.param.SpecialParam;
import ca.uhn.fhir.rest.param.StringParam;
import ca.uhn.fhir.rest.param.TokenParam;
import ca.uhn.fhir.rest.param.UriParam;
@ -122,14 +123,12 @@ public class ExtendedHSearchClauseBuilder {
}
@Nonnull
private Set<String> extractOrStringParams(List<? extends IQueryParameterType> nextAnd) {
private Set<String> extractOrStringParams(String theSearchParamName, List<? extends IQueryParameterType> nextAnd) {
Set<String> terms = new HashSet<>();
for (IQueryParameterType nextOr : nextAnd) {
String nextValueTrimmed;
if (nextOr instanceof StringParam) {
StringParam nextOrString = (StringParam) nextOr;
nextValueTrimmed =
StringUtils.defaultString(nextOrString.getValue()).trim();
if (isStringParamOrEquivalent(theSearchParamName, nextOr)) {
nextValueTrimmed = getTrimmedStringValue(nextOr);
} else if (nextOr instanceof TokenParam) {
TokenParam nextOrToken = (TokenParam) nextOr;
nextValueTrimmed = nextOrToken.getValue();
@ -150,6 +149,34 @@ public class ExtendedHSearchClauseBuilder {
return terms;
}
private String getTrimmedStringValue(IQueryParameterType nextOr) {
String value;
if (nextOr instanceof StringParam) {
value = ((StringParam) nextOr).getValue();
} else if (nextOr instanceof SpecialParam) {
value = ((SpecialParam) nextOr).getValue();
} else {
throw new IllegalArgumentException(Msg.code(2535)
+ "Failed to extract value for fulltext search from parameter. Needs to be a `string` parameter, or `_text` or `_content` special parameter."
+ nextOr);
}
return StringUtils.defaultString(value).trim();
}
/**
* String Search params are valid, so are two special params, _content and _text.
*
* @param theSearchParamName The name of the SP
* @param nextOr the or values of the query parameter.
*
* @return a boolean indicating whether we can treat this as a string.
*/
private static boolean isStringParamOrEquivalent(String theSearchParamName, IQueryParameterType nextOr) {
List<String> specialSearchParamsToTreatAsStrings = List.of(Constants.PARAM_TEXT, Constants.PARAM_CONTENT);
return (nextOr instanceof StringParam)
|| (nextOr instanceof SpecialParam && specialSearchParamsToTreatAsStrings.contains(theSearchParamName));
}
public void addTokenUnmodifiedSearch(String theSearchParamName, List<List<IQueryParameterType>> theAndOrTerms) {
if (CollectionUtils.isEmpty(theAndOrTerms)) {
return;
@ -229,8 +256,27 @@ public class ExtendedHSearchClauseBuilder {
break;
}
if (isContainsSearch(theSearchParamName, stringAndOrTerms)) {
for (List<? extends IQueryParameterType> nextOrList : stringAndOrTerms) {
Set<String> orTerms = TermHelper.makePrefixSearchTerm(extractOrStringParams(nextOrList));
addPreciseMatchClauses(theSearchParamName, nextOrList, fieldName);
}
} else {
for (List<? extends IQueryParameterType> nextOrList : stringAndOrTerms) {
addSimpleQueryMatchClauses(theSearchParamName, nextOrList, fieldName);
}
}
}
/**
* This route is used for standard string searches, or `_text` or `_content`. For each term, we build a `simpleQueryString `element which allows hibernate search to search on normalized, analyzed, indexed fields.
*
* @param theSearchParamName The name of the search parameter
* @param nextOrList the list of query parameters
* @param fieldName the field name in the index document to compare with.
*/
private void addSimpleQueryMatchClauses(
String theSearchParamName, List<? extends IQueryParameterType> nextOrList, String fieldName) {
Set<String> orTerms = TermHelper.makePrefixSearchTerm(extractOrStringParams(theSearchParamName, nextOrList));
ourLog.debug("addStringTextSearch {}, {}", theSearchParamName, orTerms);
if (!orTerms.isEmpty()) {
String query = orTerms.stream().map(s -> "( " + s + " )").collect(Collectors.joining(" | "));
@ -239,20 +285,36 @@ public class ExtendedHSearchClauseBuilder {
.field(fieldName)
.matching(query)
.defaultOperator(
BooleanOperator
.AND)); // term value may contain multiple tokens. Require all of them to be
BooleanOperator.AND)); // term value may contain multiple tokens. Require all of them to
// be
// present.
} else {
ourLog.warn("No Terms found in query parameter {}", nextOrList);
}
}
/**
* Note that this `match()` operation is different from out standard behaviour, which uses simpleQueryString(). This `match()` forces a precise string match, Whereas `simpleQueryString()` uses a more nebulous
* and loose check against a collection of terms. We only use this when we see ` _text:contains=` or `_content:contains=` search.
*
* @param theSearchParamName the Name of the search parameter
* @param nextOrList the list of query parameters
* @param fieldName the field name in the index document to compare with.
*/
private void addPreciseMatchClauses(
String theSearchParamName, List<? extends IQueryParameterType> nextOrList, String fieldName) {
Set<String> orTerms = TermHelper.makePrefixSearchTerm(extractOrStringParams(theSearchParamName, nextOrList));
for (String orTerm : orTerms) {
myRootClause.must(myRootContext.match().field(fieldName).matching(orTerm));
}
}
public void addStringExactSearch(String theSearchParamName, List<List<IQueryParameterType>> theStringAndOrTerms) {
String fieldPath = joinPath(SEARCH_PARAM_ROOT, theSearchParamName, INDEX_TYPE_STRING, IDX_STRING_EXACT);
for (List<? extends IQueryParameterType> nextAnd : theStringAndOrTerms) {
Set<String> terms = extractOrStringParams(nextAnd);
Set<String> terms = extractOrStringParams(theSearchParamName, nextAnd);
ourLog.debug("addStringExactSearch {} {}", theSearchParamName, terms);
List<? extends PredicateFinalStep> orTerms = terms.stream()
.map(s -> myRootContext.match().field(fieldPath).matching(s))
@ -266,7 +328,7 @@ public class ExtendedHSearchClauseBuilder {
String theSearchParamName, List<List<IQueryParameterType>> theStringAndOrTerms) {
String fieldPath = joinPath(SEARCH_PARAM_ROOT, theSearchParamName, INDEX_TYPE_STRING, IDX_STRING_NORMALIZED);
for (List<? extends IQueryParameterType> nextAnd : theStringAndOrTerms) {
Set<String> terms = extractOrStringParams(nextAnd);
Set<String> terms = extractOrStringParams(theSearchParamName, nextAnd);
ourLog.debug("addStringContainsSearch {} {}", theSearchParamName, terms);
List<? extends PredicateFinalStep> orTerms = terms.stream()
// wildcard is a term-level query, so queries aren't analyzed. Do our own normalization first.
@ -294,7 +356,7 @@ public class ExtendedHSearchClauseBuilder {
String theSearchParamName, List<List<IQueryParameterType>> theStringAndOrTerms) {
PathContext context = contextForFlatSP(theSearchParamName);
for (List<? extends IQueryParameterType> nextOrList : theStringAndOrTerms) {
Set<String> terms = extractOrStringParams(nextOrList);
Set<String> terms = extractOrStringParams(theSearchParamName, nextOrList);
ourLog.debug("addStringUnmodifiedSearch {} {}", theSearchParamName, terms);
List<PredicateFinalStep> orTerms = terms.stream()
.map(s -> buildStringUnmodifiedClause(s, context))
@ -317,7 +379,7 @@ public class ExtendedHSearchClauseBuilder {
String theSearchParamName, List<List<IQueryParameterType>> theReferenceAndOrTerms) {
String fieldPath = joinPath(SEARCH_PARAM_ROOT, theSearchParamName, "reference", "value");
for (List<? extends IQueryParameterType> nextAnd : theReferenceAndOrTerms) {
Set<String> terms = extractOrStringParams(nextAnd);
Set<String> terms = extractOrStringParams(theSearchParamName, nextAnd);
ourLog.trace("reference unchained search {}", terms);
List<? extends PredicateFinalStep> orTerms = terms.stream()
@ -832,4 +894,17 @@ public class ExtendedHSearchClauseBuilder {
return compositeClause;
}
private boolean hasAContainsModifier(List<List<IQueryParameterType>> stringAndOrTerms) {
return stringAndOrTerms.stream()
.flatMap(List::stream)
.anyMatch(next ->
Constants.PARAMQUALIFIER_STRING_CONTAINS.equalsIgnoreCase(next.getQueryParameterQualifier()));
}
private boolean isContainsSearch(String theSearchParamName, List<List<IQueryParameterType>> stringAndOrTerms) {
return (Constants.PARAM_TEXT.equalsIgnoreCase(theSearchParamName)
|| Constants.PARAM_CONTENT.equalsIgnoreCase(theSearchParamName))
&& hasAContainsModifier(stringAndOrTerms);
}
}

View File

@ -8,5 +8,17 @@
<root level="info">
<appender-ref ref="STDOUT" />
</root>
<!--Uncomment the below if you are doing Elasticsearch debugging -->
<!-- <logger name="org.hibernate.search.elasticsearch.request" additivity="false" level="trace">-->
<!-- <appender-ref ref="STDOUT" />-->
<!-- </logger>-->
<!-- <logger name="org.elasticsearch.client" level="debug" additivity="false">-->
<!-- <appender-ref ref="STDOUT" />-->
<!-- </logger>-->
<!-- <logger name="tracer" level="TRACE" additivity="false">-->
<!-- <appender-ref ref="STDOUT" />-->
<!-- </logger>-->
</configuration>

10
pom.xml
View File

@ -129,6 +129,11 @@
</modules>
<dependencies>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
@ -170,6 +175,11 @@
<groupId>io.opentelemetry.instrumentation</groupId>
<artifactId>opentelemetry-instrumentation-annotations</artifactId>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>3.22.0</version>
</dependency>
</dependencies>
<developers>