Implemented a new setting for Bulk Export that changes how many resources are stored in each binary file (#4119)
* Implemented a new setting for Bulk Export that changes how many resources are stored in each binary file * Addressing suggestions * Fixing test * Fixing test
This commit is contained in:
parent
379220fc46
commit
9d7829a404
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
type: add
|
||||
issue: 4117
|
||||
title: "Previously, the number of resources per binary file in bulk export was a static 1000. This is now configurable by a new DaoConfig property called
|
||||
'setBulkExportFileMaximumCapacity()', and the default value is 1000 resources per file."
|
|
@ -30,6 +30,7 @@ import ca.uhn.fhir.batch2.jobs.export.models.BulkExportIdList;
|
|||
import ca.uhn.fhir.batch2.jobs.export.models.BulkExportJobParameters;
|
||||
import ca.uhn.fhir.batch2.jobs.models.Id;
|
||||
import ca.uhn.fhir.i18n.Msg;
|
||||
import ca.uhn.fhir.jpa.api.config.DaoConfig;
|
||||
import ca.uhn.fhir.jpa.bulk.export.api.IBulkExportProcessor;
|
||||
import ca.uhn.fhir.jpa.bulk.export.model.ExportPIDIteratorParameters;
|
||||
import ca.uhn.fhir.rest.api.server.storage.ResourcePersistentId;
|
||||
|
@ -52,6 +53,9 @@ public class FetchResourceIdsStep implements IFirstJobStepWorker<BulkExportJobPa
|
|||
@Autowired
|
||||
private IBulkExportProcessor myBulkExportProcessor;
|
||||
|
||||
@Autowired
|
||||
private DaoConfig myDaoConfig;
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public RunOutcome run(@Nonnull StepExecutionDetails<BulkExportJobParameters, VoidModel> theStepExecutionDetails,
|
||||
|
@ -98,9 +102,8 @@ public class FetchResourceIdsStep implements IFirstJobStepWorker<BulkExportJobPa
|
|||
|
||||
idsToSubmit.add(id);
|
||||
|
||||
// >= so that we know (with confidence)
|
||||
// that every batch is <= 1000 items
|
||||
if (idsToSubmit.size() >= MAX_IDS_TO_BATCH) {
|
||||
// Make sure resources stored in each batch does not go over the max capacity
|
||||
if (idsToSubmit.size() >= myDaoConfig.getBulkExportFileMaximumCapacity()) {
|
||||
submitWorkChunk(idsToSubmit, resourceType, params, theDataSink);
|
||||
submissionCount++;
|
||||
idsToSubmit = new ArrayList<>();
|
||||
|
|
|
@ -8,6 +8,7 @@ import ca.uhn.fhir.batch2.jobs.export.models.BulkExportIdList;
|
|||
import ca.uhn.fhir.batch2.jobs.export.models.BulkExportJobParameters;
|
||||
import ca.uhn.fhir.batch2.jobs.models.Id;
|
||||
import ca.uhn.fhir.batch2.model.JobInstance;
|
||||
import ca.uhn.fhir.jpa.api.config.DaoConfig;
|
||||
import ca.uhn.fhir.jpa.bulk.export.api.IBulkExportProcessor;
|
||||
import ca.uhn.fhir.jpa.bulk.export.model.ExportPIDIteratorParameters;
|
||||
import ca.uhn.fhir.rest.api.server.bulk.BulkDataExportOptions;
|
||||
|
@ -54,6 +55,8 @@ public class FetchResourceIdsStepTest {
|
|||
|
||||
@InjectMocks
|
||||
private FetchResourceIdsStep myFirstStep;
|
||||
@Mock
|
||||
private DaoConfig myDaoConfig;
|
||||
|
||||
@BeforeEach
|
||||
public void init() {
|
||||
|
@ -115,6 +118,8 @@ public class FetchResourceIdsStepTest {
|
|||
any(ExportPIDIteratorParameters.class)
|
||||
)).thenReturn(patientIds.iterator())
|
||||
.thenReturn(observationIds.iterator());
|
||||
int maxFileCapacity = 1000;
|
||||
when(myDaoConfig.getBulkExportFileMaximumCapacity()).thenReturn(maxFileCapacity);
|
||||
|
||||
// test
|
||||
RunOutcome outcome = myFirstStep.run(input, sink);
|
||||
|
@ -158,7 +163,7 @@ public class FetchResourceIdsStepTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void run_moreThanAThousandPatients_hasAtLeastTwoJobs() {
|
||||
public void run_moreThanTheMaxFileCapacityPatients_hasAtLeastTwoJobs() {
|
||||
// setup
|
||||
IJobDataSink<BulkExportIdList> sink = mock(IJobDataSink.class);
|
||||
JobInstance instance = new JobInstance();
|
||||
|
@ -169,7 +174,11 @@ public class FetchResourceIdsStepTest {
|
|||
ourLog.setLevel(Level.INFO);
|
||||
List<ResourcePersistentId> patientIds = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < FetchResourceIdsStep.MAX_IDS_TO_BATCH + 1; i++) {
|
||||
// when
|
||||
int maxFileCapacity = 5;
|
||||
when(myDaoConfig.getBulkExportFileMaximumCapacity()).thenReturn(maxFileCapacity);
|
||||
|
||||
for (int i = 0; i <= maxFileCapacity; i++) {
|
||||
ResourcePersistentId id = new ResourcePersistentId("Patient/RED" + i);
|
||||
patientIds.add(id);
|
||||
}
|
||||
|
|
|
@ -324,6 +324,11 @@ public class DaoConfig {
|
|||
*/
|
||||
private boolean myPreserveRequestIdInResourceBody = false;
|
||||
|
||||
/**
|
||||
* Since 6.2.0
|
||||
*/
|
||||
private int myBulkExportFileMaximumCapacity = 1_000;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
|
@ -2937,6 +2942,25 @@ public class DaoConfig {
|
|||
myPreserveRequestIdInResourceBody = thePreserveRequestIdInResourceBody;
|
||||
}
|
||||
|
||||
/**
|
||||
* This setting controls how many resources will be stored in each binary file created by a bulk export.
|
||||
* Default is 1000 resources per file.
|
||||
*
|
||||
* @since 6.2.0
|
||||
*/
|
||||
public int getBulkExportFileMaximumCapacity() {
|
||||
return myBulkExportFileMaximumCapacity;
|
||||
}
|
||||
|
||||
/**
|
||||
* This setting controls how many resources will be stored in each binary file created by a bulk export.
|
||||
* Default is 1000 resources per file.
|
||||
*
|
||||
* @since 6.2.0
|
||||
*/
|
||||
public void setBulkExportFileMaximumCapacity(int theBulkExportFileMaximumCapacity) {
|
||||
myBulkExportFileMaximumCapacity = theBulkExportFileMaximumCapacity;
|
||||
}
|
||||
|
||||
public enum StoreMetaSourceInformationEnum {
|
||||
NONE(false, false),
|
||||
|
|
Loading…
Reference in New Issue