Paging queries can generate invalid Oracle SQL (#3123)

* Test fix

* Add docs

* Add changelog
This commit is contained in:
James Agnew 2021-10-28 14:42:06 -04:00 committed by GitHub
parent 2067ba83ee
commit 05f470db18
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 149 additions and 17 deletions

View File

@ -0,0 +1,5 @@
---
type: fix
issue: 3123
title: "When performing paging queries over large result sets in the JPA server (> 1000 results), SQL could be
generated that violated the Oracle RDBMS limit of 1000 parameters. This has been resolved."

View File

@ -518,7 +518,11 @@
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-arq</artifactId>
<version>3.17.0</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-core</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>

View File

@ -39,7 +39,6 @@ import ca.uhn.fhir.jpa.dao.BaseStorageDao;
import ca.uhn.fhir.jpa.dao.IFulltextSearchSvc;
import ca.uhn.fhir.jpa.dao.IResultIterator;
import ca.uhn.fhir.jpa.dao.ISearchBuilder;
import ca.uhn.fhir.jpa.dao.data.IResourceLinkDao;
import ca.uhn.fhir.jpa.dao.data.IResourceSearchViewDao;
import ca.uhn.fhir.jpa.dao.data.IResourceTagDao;
import ca.uhn.fhir.jpa.dao.index.IdHelperService;
@ -179,6 +178,7 @@ public class SearchBuilder implements ISearchBuilder {
private int myFetchSize;
private Integer myMaxResultsToFetch;
private Set<ResourcePersistentId> myPidSet;
private boolean myHasNextIteratorQuery = false;
private RequestPartitionId myRequestPartitionId;
@Autowired
private PartitionSettings myPartitionSettings;
@ -191,7 +191,6 @@ public class SearchBuilder implements ISearchBuilder {
@Autowired
private ModelConfig myModelConfig;
private boolean hasNextIteratorQuery = false;
/**
* Constructor
@ -493,9 +492,26 @@ public class SearchBuilder implements ISearchBuilder {
sqlBuilder.addPredicate(lastUpdatedPredicates);
}
//-- exclude the pids already in the previous iterator
if (hasNextIteratorQuery) {
sqlBuilder.excludeResourceIdsPredicate(myPidSet);
/*
* Exclude the pids already in the previous iterator. This is an optimization, as opposed
* to something needed to guarantee correct results.
*
* Why do we need it? Suppose for example, a query like:
* Observation?category=foo,bar,baz
* And suppose you have many resources that have all 3 of these category codes. In this case
* the SQL query will probably return the same PIDs multiple times, and if this happens enough
* we may exhaust the query results without getting enough distinct results back. When that
* happens we re-run the query with a larger limit. Excluding results we already know about
* tries to ensure that we get new unique results.
*
* The challenge with that though is that lots of DBs have an issue with too many
* parameters in one query. So we only do this optimization if there aren't too
* many results.
*/
if (myHasNextIteratorQuery) {
if (myPidSet.size() + sqlBuilder.countBindVariables() < 900) {
sqlBuilder.excludeResourceIdsPredicate(myPidSet);
}
}
/*
@ -1367,10 +1383,9 @@ public class SearchBuilder implements ISearchBuilder {
if (!myResultsIterator.hasNext()) {
if (myMaxResultsToFetch != null && (mySkipCount + myNonSkipCount == myMaxResultsToFetch)) {
if (mySkipCount > 0 && myNonSkipCount == 0) {
myMaxResultsToFetch += 1000;
StorageProcessingMessage message = new StorageProcessingMessage();
String msg = "Pass completed with no matching results. This indicates an inefficient query! Retrying with new max count of " + myMaxResultsToFetch;
String msg = "Pass completed with no matching results seeking rows " + myPidSet.size() + "-" + mySkipCount + ". This indicates an inefficient query! Retrying with new max count of " + myMaxResultsToFetch;
ourLog.warn(msg);
message.setMessage(msg);
HookParams params = new HookParams()
@ -1379,6 +1394,7 @@ public class SearchBuilder implements ISearchBuilder {
.add(StorageProcessingMessage.class, message);
CompositeInterceptorBroadcaster.doCallHooks(myInterceptorBroadcaster, myRequest, Pointcut.JPA_PERFTRACE_WARNING, params);
myMaxResultsToFetch += 1000;
initializeIteratorQuery(myOffset, myMaxResultsToFetch);
}
}
@ -1461,10 +1477,10 @@ public class SearchBuilder implements ISearchBuilder {
close();
if (myQueryList != null && myQueryList.size() > 0) {
myResultsIterator = myQueryList.remove(0);
hasNextIteratorQuery = true;
myHasNextIteratorQuery = true;
} else {
myResultsIterator = SearchQueryExecutor.emptyExecutor();
hasNextIteratorQuery = false;
myHasNextIteratorQuery = false;
}
}

View File

@ -517,6 +517,10 @@ public class SearchQueryBuilder {
.collect(Collectors.toList());
}
public int countBindVariables() {
return myBindVariableValues.size();
}
public void setMatchNothing() {
myMatchNothing = true;

View File

@ -0,0 +1,103 @@
package ca.uhn.fhir.jpa.dao.dstu3;
import ca.uhn.fhir.jpa.dao.data.ISearchDao;
import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
import ca.uhn.fhir.jpa.util.SqlQuery;
import ca.uhn.fhir.rest.api.server.IBundleProvider;
import ca.uhn.fhir.rest.param.ReferenceParam;
import ca.uhn.fhir.rest.param.TokenOrListParam;
import org.apache.commons.lang3.StringUtils;
import org.hl7.fhir.dstu3.model.CodeableConcept;
import org.hl7.fhir.dstu3.model.Coding;
import org.hl7.fhir.dstu3.model.Condition;
import org.hl7.fhir.dstu3.model.Patient;
import org.hl7.fhir.dstu3.model.Reference;
import org.hl7.fhir.instance.model.api.IBaseResource;
import org.hl7.fhir.instance.model.api.IIdType;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.lessThan;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class FhirResourceDaoDstu3SearchSqlTest extends BaseJpaDstu3Test {
private static final Logger ourLog = LoggerFactory.getLogger(FhirResourceDaoDstu3SearchSqlTest.class);
@Autowired
private ISearchDao mySearchEntityDao;
@Test
public void testSearchCondition_ByPatientAndCategories() {
Patient patient = new Patient();
//patient.addIdentifier().setSystem("http://mydomain.com/patient-identifier").setValue("PID1");
IIdType patientId = myPatientDao.create(patient).getId().toUnqualifiedVersionless();
int numResources = 3000;
for (int i = 1; i <= numResources; i++) {
Condition condition = new Condition();
condition.setSubject(new Reference(patientId));
List<CodeableConcept> categories = new ArrayList<>();
categories.add(new CodeableConcept()
.addCoding(new Coding("acc_condcat_fkc", "ACCEVN", "Access Event")));
categories.add(new CodeableConcept()
.addCoding(new Coding("acc_careplncat_fkc", "ACCMNTRG", "Access Monitoring")));
categories.add(new CodeableConcept()
.addCoding(new Coding("acc_condcat_fkc", "MIDTX", "During Tx")));
condition.setCategory(categories);
IIdType conditionId = myConditionDao.create(condition).getId().toUnqualifiedVersionless();
if (i % 500 == 0) {
ourLog.info("Created Condition with ID: " + conditionId);
}
}
// Search
// http://127.0.0.1:8000/Condition
// ?_count=300
// &category=ACCEVN%2CACCMNTRG%2CMIDTX
// &patient=Patient%2F123
SearchParameterMap map = new SearchParameterMap();
map.setCount(300);
map.add("patient", new ReferenceParam(patientId.toString()));
map.add("category", new TokenOrListParam(null, "ACCEVN", "ACCMNTRG", "MIDTX")); // , = %2C
IBundleProvider outcome = null;
String uuid = null;
for (int i = 0; i < 3000; i += 300) {
ourLog.info("Starting batch {}-{}", i, i+300);
myCaptureQueriesListener.clear();
if (outcome == null) {
outcome = myConditionDao.search(map);
uuid = outcome.getUuid();
} else {
outcome = myPagingProvider.retrieveResultList(mySrd, uuid);
}
List<IBaseResource> resources = outcome.getResources(i, i + 300);
ourLog.info("Batch {}-{} returned {} resources", i, i+300, resources.size());
assertEquals(300, resources.size());
List<SqlQuery> query = myCaptureQueriesListener.getSelectQueries();
for (SqlQuery next : query) {
String sql = next.getSql(false, false);
int paramCount = StringUtils.countMatches(sql, "?");
ourLog.info("SQL has {} params", paramCount);
assertThat("SQL has >1000 params: " + sql, paramCount, lessThan(1000));
if (sql.contains("HASH_VALUE IN")) {
sql = next.getSql(true, false);
ourLog.info("SQL: {}", sql);
}
}
}
}
}

View File

@ -597,21 +597,21 @@ public class ResourceProviderHasParamR4Test extends BaseResourceProviderR4Test {
assertThat(ids, contains(pid0.getValue()));
}
@Test
public void testMultipleHasParameter_NOT_IN() throws Exception {
for (int i=0; i<10; i++) {
createPatientWithObs(10);
}
String uri = ourServerBase + "/Patient?_has:Observation:subject:code-value-quantity=http://" + UrlUtil.escapeUrlParam("loinc.org|2345-7$gt180") + "&_has:Observation:subject:date=gt1950" + "&_has:Observation:subject:status=final&_count=4";
ourLog.info("uri = " + uri);
myCaptureQueriesListener.clear();
searchAndReturnUnqualifiedVersionlessIdValues(uri);
List<String> queries = myCaptureQueriesListener.getSelectQueries().stream().map(t -> t.getSql(true, false)).collect(Collectors.toList());
List<String> notInListQueries = new ArrayList<>();
@ -620,10 +620,10 @@ public class ResourceProviderHasParamR4Test extends BaseResourceProviderR4Test {
if (query.contains("RES_ID NOT IN"))
notInListQueries.add(query);
}
assertNotEquals(0, notInListQueries.size());
}
private List<String> searchAndReturnUnqualifiedVersionlessIdValues(String uri) throws IOException {
List<String> ids;
HttpGet get = new HttpGet(uri);