diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_2_0/4117-bulk-export-file-size-setting.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_2_0/4117-bulk-export-file-size-setting.yaml new file mode 100644 index 00000000000..657fa132bc4 --- /dev/null +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_2_0/4117-bulk-export-file-size-setting.yaml @@ -0,0 +1,5 @@ +--- +type: add +issue: 4117 +title: "Previously, the number of resources per binary file in bulk export was a static 1000. This is now configurable by a new DaoConfig property called +'setBulkExportFileMaximumCapacity()', and the default value is 1000 resources per file." diff --git a/hapi-fhir-storage-batch2-jobs/src/main/java/ca/uhn/fhir/batch2/jobs/export/FetchResourceIdsStep.java b/hapi-fhir-storage-batch2-jobs/src/main/java/ca/uhn/fhir/batch2/jobs/export/FetchResourceIdsStep.java index 153d318ca39..db308719271 100644 --- a/hapi-fhir-storage-batch2-jobs/src/main/java/ca/uhn/fhir/batch2/jobs/export/FetchResourceIdsStep.java +++ b/hapi-fhir-storage-batch2-jobs/src/main/java/ca/uhn/fhir/batch2/jobs/export/FetchResourceIdsStep.java @@ -30,6 +30,7 @@ import ca.uhn.fhir.batch2.jobs.export.models.BulkExportIdList; import ca.uhn.fhir.batch2.jobs.export.models.BulkExportJobParameters; import ca.uhn.fhir.batch2.jobs.models.Id; import ca.uhn.fhir.i18n.Msg; +import ca.uhn.fhir.jpa.api.config.DaoConfig; import ca.uhn.fhir.jpa.bulk.export.api.IBulkExportProcessor; import ca.uhn.fhir.jpa.bulk.export.model.ExportPIDIteratorParameters; import ca.uhn.fhir.rest.api.server.storage.ResourcePersistentId; @@ -52,6 +53,9 @@ public class FetchResourceIdsStep implements IFirstJobStepWorker theStepExecutionDetails, @@ -98,9 +102,8 @@ public class FetchResourceIdsStep implements IFirstJobStepWorker= so that we know (with confidence) - // that every batch is <= 1000 items - if (idsToSubmit.size() >= MAX_IDS_TO_BATCH) { + // Make sure resources stored in each batch does not go over the max capacity + if (idsToSubmit.size() >= myDaoConfig.getBulkExportFileMaximumCapacity()) { submitWorkChunk(idsToSubmit, resourceType, params, theDataSink); submissionCount++; idsToSubmit = new ArrayList<>(); diff --git a/hapi-fhir-storage-batch2-jobs/src/test/java/ca/uhn/fhir/batch2/jobs/export/FetchResourceIdsStepTest.java b/hapi-fhir-storage-batch2-jobs/src/test/java/ca/uhn/fhir/batch2/jobs/export/FetchResourceIdsStepTest.java index da8137e7745..0947d8b219a 100644 --- a/hapi-fhir-storage-batch2-jobs/src/test/java/ca/uhn/fhir/batch2/jobs/export/FetchResourceIdsStepTest.java +++ b/hapi-fhir-storage-batch2-jobs/src/test/java/ca/uhn/fhir/batch2/jobs/export/FetchResourceIdsStepTest.java @@ -8,6 +8,7 @@ import ca.uhn.fhir.batch2.jobs.export.models.BulkExportIdList; import ca.uhn.fhir.batch2.jobs.export.models.BulkExportJobParameters; import ca.uhn.fhir.batch2.jobs.models.Id; import ca.uhn.fhir.batch2.model.JobInstance; +import ca.uhn.fhir.jpa.api.config.DaoConfig; import ca.uhn.fhir.jpa.bulk.export.api.IBulkExportProcessor; import ca.uhn.fhir.jpa.bulk.export.model.ExportPIDIteratorParameters; import ca.uhn.fhir.rest.api.server.bulk.BulkDataExportOptions; @@ -54,6 +55,8 @@ public class FetchResourceIdsStepTest { @InjectMocks private FetchResourceIdsStep myFirstStep; + @Mock + private DaoConfig myDaoConfig; @BeforeEach public void init() { @@ -115,6 +118,8 @@ public class FetchResourceIdsStepTest { any(ExportPIDIteratorParameters.class) )).thenReturn(patientIds.iterator()) .thenReturn(observationIds.iterator()); + int maxFileCapacity = 1000; + when(myDaoConfig.getBulkExportFileMaximumCapacity()).thenReturn(maxFileCapacity); // test RunOutcome outcome = myFirstStep.run(input, sink); @@ -158,7 +163,7 @@ public class FetchResourceIdsStepTest { } @Test - public void run_moreThanAThousandPatients_hasAtLeastTwoJobs() { + public void run_moreThanTheMaxFileCapacityPatients_hasAtLeastTwoJobs() { // setup IJobDataSink sink = mock(IJobDataSink.class); JobInstance instance = new JobInstance(); @@ -169,7 +174,11 @@ public class FetchResourceIdsStepTest { ourLog.setLevel(Level.INFO); List patientIds = new ArrayList<>(); - for (int i = 0; i < FetchResourceIdsStep.MAX_IDS_TO_BATCH + 1; i++) { + // when + int maxFileCapacity = 5; + when(myDaoConfig.getBulkExportFileMaximumCapacity()).thenReturn(maxFileCapacity); + + for (int i = 0; i <= maxFileCapacity; i++) { ResourcePersistentId id = new ResourcePersistentId("Patient/RED" + i); patientIds.add(id); } diff --git a/hapi-fhir-storage/src/main/java/ca/uhn/fhir/jpa/api/config/DaoConfig.java b/hapi-fhir-storage/src/main/java/ca/uhn/fhir/jpa/api/config/DaoConfig.java index 6da8e161e27..01525776504 100644 --- a/hapi-fhir-storage/src/main/java/ca/uhn/fhir/jpa/api/config/DaoConfig.java +++ b/hapi-fhir-storage/src/main/java/ca/uhn/fhir/jpa/api/config/DaoConfig.java @@ -324,6 +324,11 @@ public class DaoConfig { */ private boolean myPreserveRequestIdInResourceBody = false; + /** + * Since 6.2.0 + */ + private int myBulkExportFileMaximumCapacity = 1_000; + /** * Constructor */ @@ -2937,6 +2942,25 @@ public class DaoConfig { myPreserveRequestIdInResourceBody = thePreserveRequestIdInResourceBody; } + /** + * This setting controls how many resources will be stored in each binary file created by a bulk export. + * Default is 1000 resources per file. + * + * @since 6.2.0 + */ + public int getBulkExportFileMaximumCapacity() { + return myBulkExportFileMaximumCapacity; + } + + /** + * This setting controls how many resources will be stored in each binary file created by a bulk export. + * Default is 1000 resources per file. + * + * @since 6.2.0 + */ + public void setBulkExportFileMaximumCapacity(int theBulkExportFileMaximumCapacity) { + myBulkExportFileMaximumCapacity = theBulkExportFileMaximumCapacity; + } public enum StoreMetaSourceInformationEnum { NONE(false, false),