Issue 3992 elastic search mapper parsing exception when uploading loinc terminology (#4141)

* Revert TermConceptProperty indexing to not-nested type to avoid hitting elastic default maximums and
fix regexp issue which was main reason to change indexing way in the first place.

* Add changelog

* Document that terminology reindexing is required.

* Implement revision suggestions. Fix file name typo.

* Adjust and enable test created to reproduce bug

Co-authored-by: juan.marchionatto <juan.marchionatto@smilecdr.com>
This commit is contained in:
jmarchionatto 2022-10-21 16:19:45 -04:00 committed by GitHub
parent 1dba9392ef
commit 2830792c4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 296 additions and 321 deletions

View File

@ -0,0 +1,7 @@
---
type: fix
issue: 3992
title: "Previously when ValueSets were pre-expanded after loinc terminology upload, expansion was failing with an exception for each ValueSet
with more than 10,000 properties. This problem has been fixed.
This fix changed some freetext mappings (definitions about how resources are freetext indexed) for terminology resources, which requires
reindexing those resources. To do this use the `reindex-terminology` command."

View File

@ -0,0 +1,4 @@
This release has breaking changes.
* Hibernate Search mappings for Terminology entities have been upgraded, which requires terminology freetext reindexing.
To recreate Terminology freetext indexes use CLI command: `reindex-terminology`

View File

@ -30,14 +30,14 @@ import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.hibernate.search.engine.backend.types.ObjectStructure;
import org.hibernate.search.engine.backend.types.Projectable;
import org.hibernate.search.engine.backend.types.Searchable;
import org.hibernate.search.mapper.pojo.bridge.mapping.annotation.PropertyBinderRef;
import org.hibernate.search.mapper.pojo.bridge.mapping.annotation.RoutingBinderRef;
import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField;
import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField;
import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed;
import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded;
import org.hibernate.search.mapper.pojo.mapping.definition.annotation.PropertyBinding;
import org.hl7.fhir.r4.model.Coding;
import javax.annotation.Nonnull;
@ -114,12 +114,8 @@ public class TermConcept implements Serializable {
@FullTextField(name = "myDisplayPhonetic", searchable= Searchable.YES, projectable= Projectable.NO, analyzer = "autocompletePhoneticAnalyzer")
private String myDisplay;
/**
* IndexedEmbedded uses ObjectStructure.NESTED to be able to use hibernate search nested predicate queries.
* @see "https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#search-dsl-predicate-nested"
*/
@OneToMany(mappedBy = "myConcept", orphanRemoval = false, fetch = FetchType.LAZY)
@IndexedEmbedded(structure = ObjectStructure.NESTED)
@PropertyBinding(binder = @PropertyBinderRef(type = TermConceptPropertyBinder.class))
private Collection<TermConceptProperty> myProperties;
@OneToMany(mappedBy = "myConcept", orphanRemoval = false, fetch = FetchType.LAZY)
@ -458,7 +454,7 @@ public class TermConcept implements Serializable {
* Returns a view of {@link #getChildren()} but containing the actual child codes
*/
public List<TermConcept> getChildCodes() {
return getChildren().stream().map(t -> t.getChild()).collect(Collectors.toList());
return getChildren().stream().map(TermConceptParentChildLink::getChild).collect(Collectors.toList());
}
}

View File

@ -0,0 +1,83 @@
package ca.uhn.fhir.jpa.entity;
/*-
* #%L
* HAPI FHIR JPA Server
* %%
* Copyright (C) 2014 - 2022 Smile CDR, Inc.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import org.hibernate.search.engine.backend.document.DocumentElement;
import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaElement;
import org.hibernate.search.engine.backend.types.dsl.IndexFieldTypeFactory;
import org.hibernate.search.mapper.pojo.bridge.PropertyBridge;
import org.hibernate.search.mapper.pojo.bridge.binding.PropertyBindingContext;
import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.PropertyBinder;
import org.hibernate.search.mapper.pojo.bridge.runtime.PropertyBridgeWriteContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
/**
* Allows hibernate search to index individual concepts' properties
*/
public class TermConceptPropertyBinder implements PropertyBinder {
public static final String CONCEPT_PROPERTY_PREFIX_NAME = "P:";
private static final Logger ourLog = LoggerFactory.getLogger(TermConceptPropertyBinder.class);
@Override
public void bind(PropertyBindingContext thePropertyBindingContext) {
thePropertyBindingContext.dependencies().use("myKey").use("myValue");
IndexSchemaElement indexSchemaElement = thePropertyBindingContext.indexSchemaElement();
//In order to support dynamic fields, we have to use field templates. We _must_ define the template at bootstrap time and cannot
//create them adhoc. https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#mapper-orm-bridge-index-field-dsl-dynamic
indexSchemaElement.fieldTemplate("propTemplate", IndexFieldTypeFactory::asString)
.matchingPathGlob(CONCEPT_PROPERTY_PREFIX_NAME + "*")
.multiValued();
thePropertyBindingContext.bridge(Collection.class, new TermConceptPropertyBridge());
}
@SuppressWarnings("rawtypes")
private static class TermConceptPropertyBridge implements PropertyBridge<Collection> {
@Override
public void write(DocumentElement theDocument, Collection theObject, PropertyBridgeWriteContext thePropertyBridgeWriteContext) {
@SuppressWarnings("unchecked")
Collection<TermConceptProperty> properties = (Collection<TermConceptProperty>) theObject;
if (properties != null) {
for (TermConceptProperty next : properties) {
theDocument.addValue(CONCEPT_PROPERTY_PREFIX_NAME + next.getKey(), next.getValue());
ourLog.trace("Adding Prop: {}{} -- {}", CONCEPT_PROPERTY_PREFIX_NAME, next.getKey(), next.getValue());
if (next.getType() == TermConceptPropertyTypeEnum.CODING && isNotBlank(next.getDisplay())) {
theDocument.addValue(CONCEPT_PROPERTY_PREFIX_NAME + next.getKey(), next.getDisplay());
ourLog.trace("Adding multivalue Prop: {}{} -- {}", CONCEPT_PROPERTY_PREFIX_NAME, next.getKey(), next.getDisplay());
}
}
}
}
}
}

View File

@ -104,9 +104,6 @@ public class HapiHSearchAnalysisConfigurers {
theLuceneCtx.analyzer("conceptParentPidsAnalyzer").custom()
.tokenizer(WhitespaceTokenizerFactory.class);
theLuceneCtx.analyzer("termConceptPropertyAnalyzer").custom()
.tokenizer(WhitespaceTokenizerFactory.class);
theLuceneCtx.normalizer(LOWERCASE_ASCIIFOLDING_NORMALIZER).custom()
.tokenFilter(LowerCaseFilterFactory.class)
.tokenFilter(ASCIIFoldingFilterFactory.class);
@ -178,9 +175,6 @@ public class HapiHSearchAnalysisConfigurers {
theConfigCtx.analyzer("conceptParentPidsAnalyzer").custom()
.tokenizer("whitespace");
theConfigCtx.analyzer("termConceptPropertyAnalyzer").custom()
.tokenizer("whitespace");
theConfigCtx.normalizer( LOWERCASE_ASCIIFOLDING_NORMALIZER ).custom()
.tokenFilters( "lowercase", "asciifolding" );

View File

@ -65,7 +65,6 @@ import ca.uhn.fhir.jpa.model.sched.HapiJob;
import ca.uhn.fhir.jpa.model.sched.ISchedulerService;
import ca.uhn.fhir.jpa.model.sched.ScheduledJobDefinition;
import ca.uhn.fhir.jpa.model.util.JpaConstants;
import ca.uhn.fhir.jpa.search.ElasticsearchNestedQueryBuilderUtil;
import ca.uhn.fhir.jpa.search.builder.SearchBuilder;
import ca.uhn.fhir.jpa.term.api.ITermDeferredStorageSvc;
import ca.uhn.fhir.jpa.term.api.ITermReadSvc;
@ -91,7 +90,6 @@ import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ArrayListMultimap;
import com.google.gson.JsonObject;
import org.apache.commons.collections4.ListUtils;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
@ -99,12 +97,7 @@ import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.hibernate.CacheMode;
import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension;
import org.hibernate.search.backend.lucene.LuceneExtension;
import org.hibernate.search.engine.search.predicate.dsl.BooleanPredicateClausesStep;
import org.hibernate.search.engine.search.predicate.dsl.PredicateFinalStep;
import org.hibernate.search.engine.search.predicate.dsl.SearchPredicateFactory;
@ -193,6 +186,7 @@ import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import static ca.uhn.fhir.jpa.entity.TermConceptPropertyBinder.CONCEPT_PROPERTY_PREFIX_NAME;
import static ca.uhn.fhir.jpa.term.api.ITermLoaderSvc.LOINC_URI;
import static java.lang.String.join;
import static java.util.stream.Collectors.joining;
@ -284,10 +278,6 @@ public class TermReadSvcImpl implements ITermReadSvc {
private HibernatePropertiesProvider myHibernatePropertiesProvider;
private boolean isFullTextSetToUseElastic() {
return "elasticsearch".equalsIgnoreCase(myHibernatePropertiesProvider.getHibernateSearchBackend());
}
@Override
public boolean isCodeSystemSupported(ValidationSupportContext theValidationSupportContext, String theSystem) {
TermCodeSystemVersion cs = getCurrentCodeSystemVersion(theSystem);
@ -509,7 +499,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
/*
* ValueSet doesn't exist in pre-expansion database, so perform in-memory expansion
*/
if (!optionalTermValueSet.isPresent()) {
if (optionalTermValueSet.isEmpty()) {
ourLog.debug("ValueSet is not present in terminology tables. Will perform in-memory expansion without parameters. {}", getValueSetInfo(theValueSetToExpand));
String msg = myContext.getLocalizer().getMessage(TermReadSvcImpl.class, "valueSetExpandedUsingInMemoryExpansion", getValueSetInfo(theValueSetToExpand));
theAccumulator.addMessage(msg);
@ -815,7 +805,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
}
/**
* @return Returns true if there are potentially more results to process.
* Returns true if there are potentially more results to process.
*/
private void expandValueSetHandleIncludeOrExclude(@Nullable ValueSetExpansionOptions theExpansionOptions,
IValueSetConceptAccumulator theValueSetCodeAccumulator,
@ -853,9 +843,8 @@ public class TermReadSvcImpl implements ITermReadSvc {
}
}
Consumer<FhirVersionIndependentConcept> consumer = c -> {
Consumer<FhirVersionIndependentConcept> consumer = c ->
addOrRemoveCode(theValueSetCodeAccumulator, theAddedCodes, theAdd, system, c.getCode(), c.getDisplay(), c.getSystemVersion());
};
try {
ConversionContext40_50.INSTANCE.init(new VersionConvertor_40_50(new BaseAdvisor_40_50()), "ValueSet");
@ -901,7 +890,6 @@ public class TermReadSvcImpl implements ITermReadSvc {
return myFulltextSearchSvc != null && !ourForceDisableHibernateSearchForUnitTest;
}
@Nonnull
private void expandValueSetHandleIncludeOrExcludeUsingDatabase(
ValueSetExpansionOptions theExpansionOptions,
IValueSetConceptAccumulator theValueSetCodeAccumulator,
@ -1141,11 +1129,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
private @Nonnull
ValueSetExpansionOptions provideExpansionOptions(@Nullable ValueSetExpansionOptions theExpansionOptions) {
if (theExpansionOptions != null) {
return theExpansionOptions;
} else {
return DEFAULT_EXPANSION_OPTIONS;
}
return Objects.requireNonNullElse(theExpansionOptions, DEFAULT_EXPANSION_OPTIONS);
}
private void addOrRemoveCode(IValueSetConceptAccumulator theValueSetCodeAccumulator, Set<String> theAddedCodes, boolean theAdd, String theSystem, String theCode, String theDisplay, String theSystemVersion) {
@ -1205,7 +1189,9 @@ public class TermReadSvcImpl implements ITermReadSvc {
private void handleFilterPropertyDefault(SearchPredicateFactory theF,
BooleanPredicateClausesStep<?> theB, ValueSet.ConceptSetFilterComponent theFilter) {
theB.must(getPropertyNameValueNestedPredicate(theF, theFilter.getProperty(), theFilter.getValue()));
String value = theFilter.getValue();
Term term = new Term(CONCEPT_PROPERTY_PREFIX_NAME + theFilter.getProperty(), value);
theB.must(theF.match().field(term.field()).matching(term.text()));
}
@ -1213,7 +1199,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
/*
* We treat the regex filter as a match on the regex
* anywhere in the property string. The spec does not
* say whether or not this is the right behaviour, but
* say whether this is the right behaviour or not, but
* there are examples that seem to suggest that it is.
*/
String value = theFilter.getValue();
@ -1228,43 +1214,26 @@ public class TermReadSvcImpl implements ITermReadSvc {
value = value.substring(1);
}
if (isFullTextSetToUseElastic()) {
ElasticsearchNestedQueryBuilderUtil nestedQueryBuildUtil = new ElasticsearchNestedQueryBuilderUtil(
"myProperties", "myKey", theFilter.getProperty(),
"myValueString", value);
JsonObject nestedQueryJO = nestedQueryBuildUtil.toGson();
ourLog.debug("Build nested Elasticsearch query: {}", nestedQueryJO);
theB.must(theF.extension(ElasticsearchExtension.get()).fromJson(nestedQueryJO));
return;
}
// native Lucene configured
Query termPropKeyQuery = new TermQuery(new Term(IDX_PROP_KEY, theFilter.getProperty()));
Query regexpValueQuery = new RegexpQuery(new Term(IDX_PROP_VALUE_STRING, value));
theB.must(theF.nested().objectField("myProperties").nest(
theF.bool()
.must(theF.extension(LuceneExtension.get()).fromLuceneQuery(termPropKeyQuery))
.must(theF.extension(LuceneExtension.get()).fromLuceneQuery(regexpValueQuery))
));
theB.must(theF.regexp()
.field(CONCEPT_PROPERTY_PREFIX_NAME + theFilter.getProperty())
.matching(value) );
}
private void handleFilterLoincCopyright(SearchPredicateFactory theF, BooleanPredicateClausesStep<?> theB, ValueSet.ConceptSetFilterComponent theFilter) {
private void handleFilterLoincCopyright(SearchPredicateFactory theF, BooleanPredicateClausesStep<?> theB,
ValueSet.ConceptSetFilterComponent theFilter) {
if (theFilter.getOp() == ValueSet.FilterOperator.EQUAL) {
String copyrightFilterValue = defaultString(theFilter.getValue()).toLowerCase();
switch (copyrightFilterValue) {
case "3rdparty":
logFilteringValueOnProperty(theFilter.getValue(), theFilter.getProperty());
addFilterLoincCopyright3rdParty(theF, theB, theFilter);
addFilterLoincCopyright3rdParty(theF, theB);
break;
case "loinc":
logFilteringValueOnProperty(theFilter.getValue(), theFilter.getProperty());
addFilterLoincCopyrightLoinc(theF, theB, theFilter);
addFilterLoincCopyrightLoinc(theF, theB);
break;
default:
throwInvalidRequestForValueOnProperty(theFilter.getValue(), theFilter.getProperty());
@ -1275,21 +1244,13 @@ public class TermReadSvcImpl implements ITermReadSvc {
}
}
private void addFilterLoincCopyrightLoinc(SearchPredicateFactory theF,
BooleanPredicateClausesStep<?> theB, ValueSet.ConceptSetFilterComponent theFilter) {
theB.mustNot(theF.match().field(IDX_PROP_KEY).matching("EXTERNAL_COPYRIGHT_NOTICE"));
private void addFilterLoincCopyrightLoinc(SearchPredicateFactory theF, BooleanPredicateClausesStep<?> theB) {
theB.mustNot(theF.exists().field(CONCEPT_PROPERTY_PREFIX_NAME + "EXTERNAL_COPYRIGHT_NOTICE"));
}
private void addFilterLoincCopyright3rdParty(SearchPredicateFactory thePredicateFactory,
BooleanPredicateClausesStep<?> theBooleanClause, ValueSet.ConceptSetFilterComponent theFilter) {
//TODO GGG HS These used to be Term term = new Term(TermConceptPropertyBinder.CONCEPT_FIELD_PROPERTY_PREFIX + "EXTERNAL_COPYRIGHT_NOTICE", ".*");, which was lucene-specific.
//TODO GGG HS ask diederik if this is equivalent.
//This old .* regex is the same as an existence check on a field, which I've implemented here.
// theBooleanClause.must(thePredicateFactory.exists().field("EXTERNAL_COPYRIGHT_NOTICE"));
theBooleanClause.must(thePredicateFactory.match().field(IDX_PROP_KEY).matching("EXTERNAL_COPYRIGHT_NOTICE"));
private void addFilterLoincCopyright3rdParty(SearchPredicateFactory theF, BooleanPredicateClausesStep<?> theB) {
theB.must(theF.exists().field(CONCEPT_PROPERTY_PREFIX_NAME + "EXTERNAL_COPYRIGHT_NOTICE"));
}
@SuppressWarnings("EnumSwitchStatementWhichMissesCases")
@ -1308,21 +1269,21 @@ public class TermReadSvcImpl implements ITermReadSvc {
}
private void addLoincFilterAncestorEqual(String theSystem, SearchPredicateFactory f, BooleanPredicateClausesStep<?> b, ValueSet.ConceptSetFilterComponent theFilter) {
long parentPid = getAncestorCodePid(theSystem, theFilter.getProperty(), theFilter.getValue());
b.must(f.match().field("myParentPids").matching(String.valueOf(parentPid)));
addLoincFilterAncestorEqual(theSystem, f, b, theFilter.getProperty(), theFilter.getValue());
}
private void addLoincFilterAncestorEqual(String theSystem, SearchPredicateFactory f, BooleanPredicateClausesStep<?> b, String theProperty, String theValue) {
List<Term> terms = getAncestorTerms(theSystem, theProperty, theValue);
b.must(f.bool(innerB -> terms.forEach(term -> innerB.should(f.match().field(term.field()).matching(term.text())))));
}
private void addLoincFilterAncestorIn(String theSystem, SearchPredicateFactory f, BooleanPredicateClausesStep<?> b, ValueSet.ConceptSetFilterComponent theFilter) {
String[] values = theFilter.getValue().split(",");
List<Long> ancestorCodePidList = new ArrayList<>();
List<Term> terms = new ArrayList<>();
for (String value : values) {
ancestorCodePidList.add(getAncestorCodePid(theSystem, theFilter.getProperty(), value));
terms.addAll(getAncestorTerms(theSystem, theFilter.getProperty(), value));
}
b.must(f.bool(innerB -> ancestorCodePidList.forEach(
ancestorPid -> innerB.should(f.match().field("myParentPids").matching(String.valueOf(ancestorPid)))
)));
b.must(f.bool(innerB -> terms.forEach(term -> innerB.should(f.match().field(term.field()).matching(term.text())))));
}
@ -1342,32 +1303,20 @@ public class TermReadSvcImpl implements ITermReadSvc {
private void addLoincFilterParentChildIn(SearchPredicateFactory f, BooleanPredicateClausesStep<?> b, ValueSet.ConceptSetFilterComponent theFilter) {
String[] values = theFilter.getValue().split(",");
b.minimumShouldMatchNumber(1);
List<Term> terms = new ArrayList<>();
for (String value : values) {
logFilteringValueOnProperty(value, theFilter.getProperty());
b.should(getPropertyNameValueNestedPredicate(f, theFilter.getProperty(), value));
terms.add(getPropertyTerm(theFilter.getProperty(), value));
}
b.must(f.bool(innerB -> terms.forEach(term -> innerB.should(f.match().field(term.field()).matching(term.text())))));
}
private void addLoincFilterParentChildEqual(SearchPredicateFactory f, BooleanPredicateClausesStep<?> b, String theProperty, String theValue) {
logFilteringValueOnProperty(theValue, theProperty);
b.must(getPropertyNameValueNestedPredicate(f, theProperty, theValue));
b.must(f.match().field(CONCEPT_PROPERTY_PREFIX_NAME + theProperty).matching(theValue));
}
/**
* A nested predicate is required for both predicates to be applied to same property, otherwise if properties
* propAA:valueAA and propBB:valueBB are defined, a search for propAA:valueBB would be a match
* @see "https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#search-dsl-predicate-nested"
*/
private PredicateFinalStep getPropertyNameValueNestedPredicate(SearchPredicateFactory f, String theProperty, String theValue) {
return f.nested().objectField(IDX_PROPERTIES).nest(
f.bool()
.must(f.match().field(IDX_PROP_KEY).matching(theProperty))
.must(f.match().field(IDX_PROP_VALUE_STRING).field(IDX_PROP_DISPLAY_STRING).matching(theValue))
);
}
private void handleFilterConceptAndCode(String theSystem, SearchPredicateFactory f, BooleanPredicateClausesStep<?> b, ValueSet.ConceptSetFilterComponent theFilter) {
TermConcept code = findCodeForFilterCriteria(theSystem, theFilter);
@ -1427,14 +1376,24 @@ public class TermReadSvcImpl implements ITermReadSvc {
);
}
private long getAncestorCodePid(String theSystem, String theProperty, String theValue) {
private Term getPropertyTerm(String theProperty, String theValue) {
return new Term(CONCEPT_PROPERTY_PREFIX_NAME + theProperty, theValue);
}
private List<Term> getAncestorTerms(String theSystem, String theProperty, String theValue) {
List<Term> retVal = new ArrayList<>();
TermConcept code = findCode(theSystem, theValue)
.orElseThrow(() -> new InvalidRequestException("Invalid filter criteria - code does not exist: {" + Constants.codeSystemWithDefaultDescription(theSystem) + "}" + theValue));
retVal.add(new Term("myParentPids", "" + code.getId()));
logFilteringValueOnProperty(theValue, theProperty);
return code.getId();
return retVal;
}
@SuppressWarnings("EnumSwitchStatementWhichMissesCases")
private void handleFilterLoincDescendant(String theSystem, SearchPredicateFactory f, BooleanPredicateClausesStep<?> b, ValueSet.ConceptSetFilterComponent theFilter) {
switch (theFilter.getOp()) {
@ -1640,7 +1599,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
IBaseResource valueSet = myDaoRegistry.getResourceDao("ValueSet").read(theValueSetId, theRequestDetails);
ValueSet canonicalValueSet = myVersionCanonicalizer.valueSetToCanonical(valueSet);
Optional<TermValueSet> optionalTermValueSet = fetchValueSetEntity(canonicalValueSet);
if (!optionalTermValueSet.isPresent()) {
if (optionalTermValueSet.isEmpty()) {
return myContext.getLocalizer().getMessage(TermReadSvcImpl.class, "valueSetNotFoundInTerminologyDatabase", theValueSetId);
}
@ -1666,7 +1625,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
public boolean isValueSetPreExpandedForCodeValidation(ValueSet theValueSet) {
Optional<TermValueSet> optionalTermValueSet = fetchValueSetEntity(theValueSet);
if (!optionalTermValueSet.isPresent()) {
if (optionalTermValueSet.isEmpty()) {
ourLog.warn("ValueSet is not present in terminology tables. Will perform in-memory code validation. {}", getValueSetInfo(theValueSet));
return false;
}
@ -1684,13 +1643,11 @@ public class TermReadSvcImpl implements ITermReadSvc {
private Optional<TermValueSet> fetchValueSetEntity(ValueSet theValueSet) {
ResourcePersistentId valueSetResourcePid = getValueSetResourcePersistentId(theValueSet);
Optional<TermValueSet> optionalTermValueSet = myTermValueSetDao.findByResourcePid(valueSetResourcePid.getIdAsLong());
return optionalTermValueSet;
return myTermValueSetDao.findByResourcePid(valueSetResourcePid.getIdAsLong());
}
private ResourcePersistentId getValueSetResourcePersistentId(ValueSet theValueSet) {
ResourcePersistentId valueSetResourcePid = myIdHelperService.resolveResourcePersistentIds(RequestPartitionId.allPartitions(), theValueSet.getIdElement().getResourceType(), theValueSet.getIdElement().getIdPart());
return valueSetResourcePid;
return myIdHelperService.resolveResourcePersistentIds(RequestPartitionId.allPartitions(), theValueSet.getIdElement().getResourceType(), theValueSet.getIdElement().getIdPart());
}
protected IValidationSupport.CodeValidationResult validateCodeIsInPreExpandedValueSet(
@ -1726,7 +1683,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
return null;
}
TermValueSet valueSetEntity = myTermValueSetDao.findByResourcePid(valueSetResourcePid.getIdAsLong()).orElseThrow(() -> new IllegalStateException());
TermValueSet valueSetEntity = myTermValueSetDao.findByResourcePid(valueSetResourcePid.getIdAsLong()).orElseThrow(IllegalStateException::new);
String timingDescription = toHumanReadableExpansionTimestamp(valueSetEntity);
String msg = myContext.getLocalizer().getMessage(TermReadSvcImpl.class, "validationPerformedAgainstPreExpansion", timingDescription);
@ -1813,18 +1770,6 @@ public class TermReadSvcImpl implements ITermReadSvc {
}
}
private CodeSystem.ConceptDefinitionComponent findCode(List<CodeSystem.ConceptDefinitionComponent> theConcepts, String theCode) {
for (CodeSystem.ConceptDefinitionComponent next : theConcepts) {
if (theCode.equals(next.getCode())) {
return next;
}
CodeSystem.ConceptDefinitionComponent val = findCode(next.getConcept(), theCode);
if (val != null) {
return val;
}
}
return null;
}
@Override
public Optional<TermConcept> findCode(String theCodeSystem, String theCode) {
@ -1909,7 +1854,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
StopWatch stopwatch = new StopWatch();
Optional<TermConcept> concept = fetchLoadedCode(theCodeSystemResourcePid, theCode);
if (!concept.isPresent()) {
if (concept.isEmpty()) {
return Collections.emptySet();
}
@ -1941,7 +1886,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
Stopwatch stopwatch = Stopwatch.createStarted();
Optional<TermConcept> concept = fetchLoadedCode(theCodeSystemResourcePid, theCode);
if (!concept.isPresent()) {
if (concept.isEmpty()) {
return Collections.emptySet();
}
@ -2003,7 +1948,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
StopWatch sw = new StopWatch();
TermValueSet valueSetToExpand = txTemplate.execute(t -> {
Optional<TermValueSet> optionalTermValueSet = getNextTermValueSetNotExpanded();
if (!optionalTermValueSet.isPresent()) {
if (optionalTermValueSet.isEmpty()) {
return null;
}
@ -2071,9 +2016,9 @@ public class TermReadSvcImpl implements ITermReadSvc {
boolean haveCodeableConcept = codeableConcept != null && codeableConcept.getCoding().size() > 0;
Coding canonicalCodingToValidate = myVersionCanonicalizer.codingToCanonical((IBaseCoding) theCodingToValidate);
boolean haveCoding = canonicalCodingToValidate != null && canonicalCodingToValidate.isEmpty() == false;
boolean haveCoding = canonicalCodingToValidate != null && !canonicalCodingToValidate.isEmpty();
boolean haveCode = theCodeToValidate != null && theCodeToValidate.isEmpty() == false;
boolean haveCode = theCodeToValidate != null && !theCodeToValidate.isEmpty();
if (!haveCodeableConcept && !haveCoding && !haveCode) {
throw new InvalidRequestException(Msg.code(899) + "No code, coding, or codeableConcept provided to validate");
@ -2183,7 +2128,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
} else {
optionalExistingTermValueSetByUrl = myTermValueSetDao.findTermValueSetByUrlAndNullVersion(url);
}
if (!optionalExistingTermValueSetByUrl.isPresent()) {
if (optionalExistingTermValueSetByUrl.isEmpty()) {
myTermValueSetDao.save(termValueSet);
@ -2557,9 +2502,9 @@ public class TermReadSvcImpl implements ITermReadSvc {
boolean haveCodeableConcept = codeableConcept != null && codeableConcept.getCoding().size() > 0;
Coding coding = myVersionCanonicalizer.codingToCanonical((IBaseCoding) theCoding);
boolean haveCoding = coding != null && coding.isEmpty() == false;
boolean haveCoding = coding != null && !coding.isEmpty();
boolean haveCode = theCode != null && theCode.isEmpty() == false;
boolean haveCode = theCode != null && !theCode.isEmpty();
if (!haveCodeableConcept && !haveCoding && !haveCode) {
throw new InvalidRequestException(Msg.code(906) + "No code, coding, or codeableConcept provided to validate.");
@ -2622,7 +2567,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
public Optional<TermValueSet> findCurrentTermValueSet(String theUrl) {
if (TermReadSvcUtil.isLoincUnversionedValueSet(theUrl)) {
Optional<String> vsIdOpt = TermReadSvcUtil.getValueSetId(theUrl);
if (!vsIdOpt.isPresent()) {
if (vsIdOpt.isEmpty()) {
return Optional.empty();
}
@ -2763,7 +2708,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
IConnectionPoolInfoProvider connectionPoolInfoProvider =
new ConnectionPoolInfoProvider(myHibernatePropertiesProvider.getDataSource());
Optional<Integer> maxConnectionsOpt = connectionPoolInfoProvider.getTotalConnectionSize();
if ( ! maxConnectionsOpt.isPresent() ) {
if (maxConnectionsOpt.isEmpty()) {
return DEFAULT_MASS_INDEXER_OBJECT_LOADING_THREADS;
}
@ -2894,7 +2839,7 @@ public class TermReadSvcImpl implements ITermReadSvc {
/**
* Properties returned from method buildSearchScroll
*/
private final class SearchProperties {
private static final class SearchProperties {
private SearchScroll<EntityReference> mySearchScroll;
private Optional<PredicateFinalStep> myExpansionStepOpt;
private List<String> myIncludeOrExcludeCodes;

View File

@ -0,0 +1,56 @@
package ca.uhn.fhir.jpa.util;
/*-
* #%L
* HAPI FHIR Storage api
* %%
* Copyright (C) 2014 - 2022 Smile CDR, Inc.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;
/**
* The whole purpose of his class is to ease construction of a non-trivial gson.JsonObject,
* which can't be done the easy way in this case (using a JSON string), because there are
* valid regex strings which break gson, as this: ".*\\^Donor$"
*/
public class RegexpGsonBuilderUtil {
private RegexpGsonBuilderUtil() { }
/**
* Builds a json object as this sample:
* {"regexp":{" + thePropName + ":{"value":" + theValue + "}}}
*/
public static JsonObject toGson(String thePropName, String theValue) {
JsonObject valueJO = new JsonObject();
valueJO.add("value", new JsonPrimitive(theValue));
JsonObject systemValueJO = new JsonObject();
systemValueJO.add(thePropName, valueJO);
JsonObject regexpJO = new JsonObject();
regexpJO.add("regexp", systemValueJO);
JsonArray a = new JsonArray();
return regexpJO;
}
}

View File

@ -0,0 +1,57 @@
package ca.uhn.fhir.jpa.util;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonSyntaxException;
import com.google.gson.stream.MalformedJsonException;
import org.junit.jupiter.api.Test;
import static ca.uhn.fhir.jpa.entity.TermConceptPropertyBinder.CONCEPT_PROPERTY_PREFIX_NAME;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
class RegexpGsonBuilderUtilTest {
private static final String PROP_NAME = "myValueString";
// a valid regex string which breaks gson
private static final String GSON_FAILING_REGEX = ".*\\^Donor$";
@Test
void testBuildUsingTestedClass() {
String propertyValue = "propAAA";
String expectedGson = "{\"regexp\":{\"P:" + PROP_NAME + "\":{\"value\":\"" + propertyValue + "\"}}}";
assertEquals(expectedGson, RegexpGsonBuilderUtil.toGson(
CONCEPT_PROPERTY_PREFIX_NAME + PROP_NAME, propertyValue).toString());
}
/**
* This test demonstrates that valid regex strings break gson library.
* If one day this test fails would mean the library added support for this kind of strings
* so the tested class could be removed and the code using it just build a gson.JsonObject from a JSON string
*/
@Test
void testShowGsonLibFailing() {
String workingRegex = ".*[abc]?jk";
String workingRegexQuery = "{'regexp':{'P:SYSTEM':{'value':'" + workingRegex + "'}}}";
JsonObject jsonObj = new Gson().fromJson(workingRegexQuery, JsonObject.class);
assertFalse(jsonObj.isJsonNull());
// same json structure fails with some valid regex strings
String failingRegexQuery = "{'regexp':{'P:SYSTEM':{'value':'" + GSON_FAILING_REGEX + "'}}}";
JsonSyntaxException thrown = assertThrows(
JsonSyntaxException.class,
() -> new Gson().fromJson(failingRegexQuery, JsonObject.class));
assertTrue(thrown.getCause() instanceof MalformedJsonException);
}
}

View File

@ -36,7 +36,6 @@ import org.hl7.fhir.r4.model.Enumerations;
import org.hl7.fhir.r4.model.ValueSet;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Answers;
@ -287,7 +286,6 @@ public class ValueSetExpansionR4ElasticsearchIT extends BaseJpaTest {
* Reproduced: https://github.com/hapifhir/hapi-fhir/issues/3992
*/
@Test
@Disabled("until bug gets fixed")
public void testExpandValueSetWithMoreThanElasticDefaultNestedObjectCount() {
CodeSystem codeSystem = new CodeSystem();
codeSystem.setUrl(CS_URL);
@ -301,8 +299,13 @@ public class ValueSetExpansionR4ElasticsearchIT extends BaseJpaTest {
codeSystemVersion.setResource(csResource);
TermConcept tc = new TermConcept(codeSystemVersion, "test-code-1");
// need to be more than elastic [index.mapping.nested_objects.limit] index level setting (default = 10_000)
addTerConceptProperties(tc, 10_100);
// however, the new mapping (not nested, multivalued) groups properties by key, and there can't be more than 1000
// properties (keys), so we test here with a number of properties > 10_000 but grouped in max 200 property keys
// (current loinc version (2.73) has 82 property keys maximum in a TermConcept)
addTermConceptProperties(tc, 10_100, 200);
codeSystemVersion.getConcepts().add(tc);
ValueSet valueSet = new ValueSet();
@ -320,10 +323,21 @@ public class ValueSetExpansionR4ElasticsearchIT extends BaseJpaTest {
}
private void addTerConceptProperties(TermConcept theTermConcept, int theCount) {
for (int i = 0; i < theCount; i++) {
String suff = String.format("%05d", i);
theTermConcept.addPropertyString("prop-" + suff, "value-" + suff);
private void addTermConceptProperties(TermConcept theTermConcept, int thePropertiesCount, int thePropertyKeysCount) {
int valuesPerPropKey = thePropertiesCount / thePropertyKeysCount;
int propsCreated = 0;
while(propsCreated < thePropertiesCount) {
int propKeysCreated = 0;
while(propKeysCreated < thePropertyKeysCount && propsCreated < thePropertiesCount) {
String propKey = String.format("%05d", propKeysCreated);
String propSeq = String.format("%05d", propsCreated);
theTermConcept.addPropertyString("prop-key-" + propKey, "value-" + propSeq);
propKeysCreated++;
propsCreated++;
}
}
}

View File

@ -108,7 +108,7 @@ import static org.mockito.Mockito.when;
*/
//@ExtendWith(SpringExtension.class)
//@ContextConfiguration(classes = {TestR4Config.class, TestHSearchAddInConfig.DefaultLuceneHeap.class})
//@ContextConfiguration(classes = {TestR4Config.class, TestHSearchAddInConfig.DefaultLuceneHeap.class})
//@ContextConfiguration(classes = {TestR4Config.class, TestHSearchAddInConfig.Elasticsearch.class})
public abstract class AbstractValueSetHSearchExpansionR4Test extends BaseJpaTest {
private static final Logger ourLog = LoggerFactory.getLogger(AbstractValueSetHSearchExpansionR4Test.class);

View File

@ -1,94 +0,0 @@
package ca.uhn.fhir.jpa.search;
/*-
* #%L
* HAPI FHIR Storage api
* %%
* Copyright (C) 2014 - 2022 Smile CDR, Inc.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
/**
* The whole purpose of his class is to ease construction of a non-trivial gson.JsonObject,
* which can't be done the easy way in this case (using a JSON string), because there are
* valid regex strings which break gson, as this: ".*\\^Donor$"
*/
public class ElasticsearchNestedQueryBuilderUtil {
private final String myNestedObjName;
private final String myNestedKeyPropName;
private final String myNestedKeyPropValue;
private final String myNestedValuePropName;
private final String myNestedValuePropValue;
private final String myNestedPropertyKeyPath;
private final String myNestedPropertyValuePath;
private JsonObject builtJsonObj = new JsonObject();
public ElasticsearchNestedQueryBuilderUtil(String theNestedObjName, String theNestedKeyPropName,
String theNestedKeyPropValue, String theNestedValuePropName, String theNestedValuePropValue) {
myNestedObjName = theNestedObjName ;
myNestedKeyPropName = theNestedKeyPropName;
myNestedKeyPropValue = theNestedKeyPropValue;
myNestedValuePropName = theNestedValuePropName;
myNestedValuePropValue = theNestedValuePropValue;
myNestedPropertyKeyPath = myNestedObjName + "." + myNestedKeyPropName;
myNestedPropertyValuePath = myNestedObjName + "." + myNestedValuePropName;
buildJsonObj();
}
private void buildJsonObj() {
JsonObject matchPropJO = new JsonObject();
matchPropJO.addProperty(myNestedObjName + "." + myNestedKeyPropName, myNestedKeyPropValue);
JsonObject matchJO = new JsonObject();
matchJO.add("match", matchPropJO);
JsonObject regexpPropJO = new JsonObject();
regexpPropJO.addProperty(myNestedObjName + "." + myNestedValuePropName, myNestedValuePropValue);
JsonObject regexpJO = new JsonObject();
regexpJO.add("regexp", regexpPropJO);
JsonArray mustPropJA = new JsonArray();
mustPropJA.add(matchJO);
mustPropJA.add(regexpJO);
JsonObject mustPropJO = new JsonObject();
mustPropJO.add("must", mustPropJA);
JsonObject boolJO = new JsonObject();
boolJO.add("bool", mustPropJO);
JsonObject nestedJO = new JsonObject();
nestedJO.addProperty("path", myNestedObjName);
nestedJO.add("query", boolJO);
builtJsonObj.add("nested", nestedJO);
}
public JsonObject toGson() { return builtJsonObj; }
public String getNestedPropertyKeyPath() { return myNestedPropertyKeyPath; }
public String getNestedPropertyValuePath() { return myNestedPropertyValuePath; }
}

View File

@ -1,87 +0,0 @@
package ca.uhn.fhir.jpa.search;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonSyntaxException;
import com.google.gson.stream.MalformedJsonException;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
class ElasticsearchNestedQueryBuilderUtilTest {
private static final String NESTED_KEY_PROP_NAME = "myKey";
private static final String NESTED_VALUE_PROP_NAME = "myValueString";
private static final String NESTED_OBJECT_NAME = "myProperties";
// a valid regex string which breaks gson
private static final String GSON_FAILING_REGEX = ".*\\^Donor$";
private ElasticsearchNestedQueryBuilderUtil testedUtil;
@Test
void testBuildUsingTestedClass() {
String propertyValue = "propAAA";
testedUtil = new ElasticsearchNestedQueryBuilderUtil(
NESTED_OBJECT_NAME, NESTED_KEY_PROP_NAME, propertyValue, NESTED_VALUE_PROP_NAME, GSON_FAILING_REGEX);
assertFalse(testedUtil.toGson().isJsonNull());
JsonObject generatedJson = testedUtil.toGson();
JsonObject nestedJO = generatedJson.getAsJsonObject("nested");
assertEquals(NESTED_OBJECT_NAME, nestedJO.get("path").getAsString());
JsonObject queryJO = nestedJO.getAsJsonObject("query");
JsonObject boolJO = queryJO.getAsJsonObject("bool");
JsonArray mustJA = boolJO.getAsJsonArray("must");
JsonObject matchPropKeyJE = (JsonObject) mustJA.get(0);
JsonObject propKeyJO = matchPropKeyJE.getAsJsonObject("match");
assertEquals(propertyValue, propKeyJO.get(testedUtil.getNestedPropertyKeyPath()).getAsString());
JsonObject regexpPropValueJE = (JsonObject) mustJA.get(1);
JsonObject propValueJO = regexpPropValueJE.getAsJsonObject("regexp");
assertEquals(GSON_FAILING_REGEX, propValueJO.get(testedUtil.getNestedPropertyValuePath()).getAsString());
}
/**
* This test demonstrates that valid regex strings break gson library.
* If one day this test fails would mean the library added support for this kind of strings
* so the tested class could be removed and the code using it just build a gson.JsonObject from a JSON string
*/
@Test
void testShowGsonLibFailing() {
String workingRegex = ".*[abc]?jk";
String workingNestedQuery =
"{'nested': { 'path': 'myProperties', 'query': { 'bool': { 'must': [" +
"{'match': {'myProperties.myKey': 'propAAA' }}," +
"{'regexp': {'myProperties.myValueString': '" + workingRegex + "'}}" +
"]}}}}";
JsonObject jsonObj = new Gson().fromJson(workingNestedQuery, JsonObject.class);
assertFalse(jsonObj.isJsonNull());
// same json structure fails with some valid regex strings
String nestedQuery =
"{'nested': { 'path': 'myProperties', 'query': { 'bool': { 'must': [" +
"{'match': {'myProperties.myKey': 'propAAA' }}," +
"{'regexp': {'myProperties.myValueString': '" + GSON_FAILING_REGEX + "'}}" +
"]}}}}";
// MalformedJsonException thrown = assertThrows(
JsonSyntaxException thrown = assertThrows(
JsonSyntaxException.class,
() -> new Gson().fromJson(nestedQuery, JsonObject.class));
assertTrue(thrown.getCause() instanceof MalformedJsonException);
}
}