From 10af1434fca92e29e2daea6eeb14c99e88307d73 Mon Sep 17 00:00:00 2001 From: Steve Corbett <137920358+steve-corbett-smilecdr@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:59:56 -0600 Subject: [PATCH] Delete expunge fails with identical update timestamps (#5047) * 4759 - WIP unit test for this issue * WIP updating unit test for ResourceIdListStep * WIP candidate fix for ResourceIdListStep batch size problem * Enhance unit test with ArgumentCaptor. * WIP Parameterized the ResourceIdListStepTest and added assertions * Adding changelog for this issue * Updating changelog message for 5055 * Code formatting and import cleanup --------- Co-authored-by: Luke deGruchy --- ...5-bulk-delete-on-2100-resources-fails.yaml | 8 ++ .../batch2/jobs/step/ResourceIdListStep.java | 14 +-- .../jobs/step/ResourceIdListStepTest.java | 109 ++++++++++++++++++ 3 files changed, 124 insertions(+), 7 deletions(-) create mode 100644 hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_8_0/5055-bulk-delete-on-2100-resources-fails.yaml create mode 100644 hapi-fhir-storage-batch2/src/test/java/ca/uhn/fhir/batch2/jobs/step/ResourceIdListStepTest.java diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_8_0/5055-bulk-delete-on-2100-resources-fails.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_8_0/5055-bulk-delete-on-2100-resources-fails.yaml new file mode 100644 index 00000000000..c5c4b9c83b6 --- /dev/null +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_8_0/5055-bulk-delete-on-2100-resources-fails.yaml @@ -0,0 +1,8 @@ +--- +type: fix +issue: 5055 +title: " + Delete expunge (and possibly other batch 2 jobs) will fail when given more than + 2100 resources with identical timestamps when using an mssql database. + This has been fixed. +" diff --git a/hapi-fhir-storage-batch2/src/main/java/ca/uhn/fhir/batch2/jobs/step/ResourceIdListStep.java b/hapi-fhir-storage-batch2/src/main/java/ca/uhn/fhir/batch2/jobs/step/ResourceIdListStep.java index 0876ab67e9f..36bbdd86981 100644 --- a/hapi-fhir-storage-batch2/src/main/java/ca/uhn/fhir/batch2/jobs/step/ResourceIdListStep.java +++ b/hapi-fhir-storage-batch2/src/main/java/ca/uhn/fhir/batch2/jobs/step/ResourceIdListStep.java @@ -48,7 +48,7 @@ public class ResourceIdListStep myIdChunkProducer; @@ -91,6 +91,12 @@ public class ResourceIdListStep myIdChunkProducer; + @Mock + private StepExecutionDetails myStepExecutionDetails; + @Mock + private IJobDataSink myDataSink; + @Mock + private PartitionedUrlChunkRangeJson myData; + @Mock + private PartitionedUrlListJobParameters myParameters; + + @Captor + private ArgumentCaptor myDataCaptor; + + private ResourceIdListStep myResourceIdListStep; + + @BeforeEach + void beforeEach() { + myResourceIdListStep = new ResourceIdListStep<>(myIdChunkProducer); + } + + @ParameterizedTest + @ValueSource(ints = {1, 100, 500, 501, 2345}) + void testResourceIdListBatchSizeLimit(int theListSize) { + List idList = generateIdList(theListSize); + when(myStepExecutionDetails.getData()).thenReturn(myData); + when(myParameters.getBatchSize()).thenReturn(theListSize); + when(myStepExecutionDetails.getParameters()).thenReturn(myParameters); + HomogeneousResourcePidList homogeneousResourcePidList = mock(HomogeneousResourcePidList.class); + when(homogeneousResourcePidList.getTypedResourcePids()).thenReturn(idList); + when(homogeneousResourcePidList.getLastDate()).thenReturn(new Date()); + when(myIdChunkProducer.fetchResourceIdsPage(any(), any(), any(), any(), any())) + .thenReturn(homogeneousResourcePidList); + + // Ensure none of the work chunks exceed MAX_BATCH_OF_IDS in size: + doAnswer(i -> { + ResourceIdListWorkChunkJson list = i.getArgument(0); + Assertions.assertTrue(list.size() <= ResourceIdListStep.MAX_BATCH_OF_IDS, + "Id batch size should never exceed " + ResourceIdListStep.MAX_BATCH_OF_IDS); + return null; + }).when(myDataSink).accept(any(ResourceIdListWorkChunkJson.class)); + + final RunOutcome run = myResourceIdListStep.run(myStepExecutionDetails, myDataSink); + assertNotEquals(null, run); + + // The work should be divided into chunks of MAX_BATCH_OF_IDS in size (or less, but never more): + int expectedBatchCount = (int) Math.ceil((float) theListSize / ResourceIdListStep.MAX_BATCH_OF_IDS); + verify(myDataSink, times(expectedBatchCount)).accept(myDataCaptor.capture()); + final List allDataChunks = myDataCaptor.getAllValues(); + assertEquals(expectedBatchCount, allDataChunks.size()); + + // Ensure that all chunks except the very last one are MAX_BATCH_OF_IDS in length + for (int i = 0; i < expectedBatchCount - 1; i++) { + assertEquals(ResourceIdListStep.MAX_BATCH_OF_IDS, allDataChunks.get(i).size()); + } + + // The very last chunk should be whatever is left over (if there is a remainder): + int expectedLastBatchSize = theListSize % ResourceIdListStep.MAX_BATCH_OF_IDS; + expectedLastBatchSize = (expectedLastBatchSize == 0) ? ResourceIdListStep.MAX_BATCH_OF_IDS : expectedLastBatchSize; + assertEquals(expectedLastBatchSize, allDataChunks.get(allDataChunks.size() - 1).size()); + } + + private List generateIdList(int theListSize) { + List idList = new ArrayList<>(); + for (int id = 0; id < theListSize; id++) { + IResourcePersistentId theId = mock(IResourcePersistentId.class); + when(theId.toString()).thenReturn(Integer.toString(id + 1)); + TypedResourcePid typedId = new TypedResourcePid("Patient", theId); + idList.add(typedId); + } + return idList; + } +}