Implemented a new setting for Bulk Export that changes how many resources are stored in each binary file (#4119)

* Implemented a new setting for Bulk Export that changes how many resources are stored in each binary file

* Addressing suggestions

* Fixing test

* Fixing test
This commit is contained in:
Qingyixia 2022-10-12 10:03:54 -04:00 committed by GitHub
parent 379220fc46
commit 9d7829a404
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 5 deletions

View File

@ -0,0 +1,5 @@
---
type: add
issue: 4117
title: "Previously, the number of resources per binary file in bulk export was a static 1000. This is now configurable by a new DaoConfig property called
'setBulkExportFileMaximumCapacity()', and the default value is 1000 resources per file."

View File

@ -30,6 +30,7 @@ import ca.uhn.fhir.batch2.jobs.export.models.BulkExportIdList;
import ca.uhn.fhir.batch2.jobs.export.models.BulkExportJobParameters;
import ca.uhn.fhir.batch2.jobs.models.Id;
import ca.uhn.fhir.i18n.Msg;
import ca.uhn.fhir.jpa.api.config.DaoConfig;
import ca.uhn.fhir.jpa.bulk.export.api.IBulkExportProcessor;
import ca.uhn.fhir.jpa.bulk.export.model.ExportPIDIteratorParameters;
import ca.uhn.fhir.rest.api.server.storage.ResourcePersistentId;
@ -52,6 +53,9 @@ public class FetchResourceIdsStep implements IFirstJobStepWorker<BulkExportJobPa
@Autowired
private IBulkExportProcessor myBulkExportProcessor;
@Autowired
private DaoConfig myDaoConfig;
@Nonnull
@Override
public RunOutcome run(@Nonnull StepExecutionDetails<BulkExportJobParameters, VoidModel> theStepExecutionDetails,
@ -98,9 +102,8 @@ public class FetchResourceIdsStep implements IFirstJobStepWorker<BulkExportJobPa
idsToSubmit.add(id);
// >= so that we know (with confidence)
// that every batch is <= 1000 items
if (idsToSubmit.size() >= MAX_IDS_TO_BATCH) {
// Make sure resources stored in each batch does not go over the max capacity
if (idsToSubmit.size() >= myDaoConfig.getBulkExportFileMaximumCapacity()) {
submitWorkChunk(idsToSubmit, resourceType, params, theDataSink);
submissionCount++;
idsToSubmit = new ArrayList<>();

View File

@ -8,6 +8,7 @@ import ca.uhn.fhir.batch2.jobs.export.models.BulkExportIdList;
import ca.uhn.fhir.batch2.jobs.export.models.BulkExportJobParameters;
import ca.uhn.fhir.batch2.jobs.models.Id;
import ca.uhn.fhir.batch2.model.JobInstance;
import ca.uhn.fhir.jpa.api.config.DaoConfig;
import ca.uhn.fhir.jpa.bulk.export.api.IBulkExportProcessor;
import ca.uhn.fhir.jpa.bulk.export.model.ExportPIDIteratorParameters;
import ca.uhn.fhir.rest.api.server.bulk.BulkDataExportOptions;
@ -54,6 +55,8 @@ public class FetchResourceIdsStepTest {
@InjectMocks
private FetchResourceIdsStep myFirstStep;
@Mock
private DaoConfig myDaoConfig;
@BeforeEach
public void init() {
@ -115,6 +118,8 @@ public class FetchResourceIdsStepTest {
any(ExportPIDIteratorParameters.class)
)).thenReturn(patientIds.iterator())
.thenReturn(observationIds.iterator());
int maxFileCapacity = 1000;
when(myDaoConfig.getBulkExportFileMaximumCapacity()).thenReturn(maxFileCapacity);
// test
RunOutcome outcome = myFirstStep.run(input, sink);
@ -158,7 +163,7 @@ public class FetchResourceIdsStepTest {
}
@Test
public void run_moreThanAThousandPatients_hasAtLeastTwoJobs() {
public void run_moreThanTheMaxFileCapacityPatients_hasAtLeastTwoJobs() {
// setup
IJobDataSink<BulkExportIdList> sink = mock(IJobDataSink.class);
JobInstance instance = new JobInstance();
@ -169,7 +174,11 @@ public class FetchResourceIdsStepTest {
ourLog.setLevel(Level.INFO);
List<ResourcePersistentId> patientIds = new ArrayList<>();
for (int i = 0; i < FetchResourceIdsStep.MAX_IDS_TO_BATCH + 1; i++) {
// when
int maxFileCapacity = 5;
when(myDaoConfig.getBulkExportFileMaximumCapacity()).thenReturn(maxFileCapacity);
for (int i = 0; i <= maxFileCapacity; i++) {
ResourcePersistentId id = new ResourcePersistentId("Patient/RED" + i);
patientIds.add(id);
}

View File

@ -324,6 +324,11 @@ public class DaoConfig {
*/
private boolean myPreserveRequestIdInResourceBody = false;
/**
* Since 6.2.0
*/
private int myBulkExportFileMaximumCapacity = 1_000;
/**
* Constructor
*/
@ -2937,6 +2942,25 @@ public class DaoConfig {
myPreserveRequestIdInResourceBody = thePreserveRequestIdInResourceBody;
}
/**
* This setting controls how many resources will be stored in each binary file created by a bulk export.
* Default is 1000 resources per file.
*
* @since 6.2.0
*/
public int getBulkExportFileMaximumCapacity() {
return myBulkExportFileMaximumCapacity;
}
/**
* This setting controls how many resources will be stored in each binary file created by a bulk export.
* Default is 1000 resources per file.
*
* @since 6.2.0
*/
public void setBulkExportFileMaximumCapacity(int theBulkExportFileMaximumCapacity) {
myBulkExportFileMaximumCapacity = theBulkExportFileMaximumCapacity;
}
public enum StoreMetaSourceInformationEnum {
NONE(false, false),