Delete expunge fails with identical update timestamps (#5047)

* 4759 - WIP unit test for this issue

* WIP updating unit test for ResourceIdListStep

* WIP candidate fix for ResourceIdListStep batch size problem

* Enhance unit test with ArgumentCaptor.

* WIP Parameterized the ResourceIdListStepTest and added assertions

* Adding changelog for this issue

* Updating changelog message for 5055

* Code formatting and import cleanup

---------

Co-authored-by: Luke deGruchy <luke.degruchy@smilecdr.com>
This commit is contained in:
Steve Corbett 2023-07-06 09:59:56 -06:00 committed by GitHub
parent a87321a227
commit 10af1434fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 124 additions and 7 deletions

View File

@ -0,0 +1,8 @@
---
type: fix
issue: 5055
title: "
Delete expunge (and possibly other batch 2 jobs) will fail when given more than
2100 resources with identical timestamps when using an mssql database.
This has been fixed.
"

View File

@ -48,7 +48,7 @@ public class ResourceIdListStep<PT extends PartitionedJobParameters, IT extends
private static final Logger ourLog = Logs.getBatchTroubleshootingLog(); private static final Logger ourLog = Logs.getBatchTroubleshootingLog();
public static final int DEFAULT_PAGE_SIZE = 20000; public static final int DEFAULT_PAGE_SIZE = 20000;
private static final int MAX_BATCH_OF_IDS = 500; protected static final int MAX_BATCH_OF_IDS = 500;
private final IIdChunkProducer<IT> myIdChunkProducer; private final IIdChunkProducer<IT> myIdChunkProducer;
@ -91,6 +91,12 @@ public class ResourceIdListStep<PT extends PartitionedJobParameters, IT extends
break; break;
} }
// If we get the same last time twice in a row, we've clearly reached the end
if (nextChunk.getLastDate().getTime() == previousLastTime) {
ourLog.info("Matching final timestamp of {}, loading is completed", new Date(previousLastTime));
break;
}
ourLog.info("Found {} IDs from {} to {}", nextChunk.size(), nextStart, nextChunk.getLastDate()); ourLog.info("Found {} IDs from {} to {}", nextChunk.size(), nextStart, nextChunk.getLastDate());
if (nextChunk.size() < 10 && HapiSystemProperties.isTestModeEnabled()) { if (nextChunk.size() < 10 && HapiSystemProperties.isTestModeEnabled()) {
// TODO: I've added this in order to troubleshoot MultitenantBatchOperationR4Test // TODO: I've added this in order to troubleshoot MultitenantBatchOperationR4Test
@ -103,12 +109,6 @@ public class ResourceIdListStep<PT extends PartitionedJobParameters, IT extends
idBuffer.add(nextId); idBuffer.add(nextId);
} }
// If we get the same last time twice in a row, we've clearly reached the end
if (nextChunk.getLastDate().getTime() == previousLastTime) {
ourLog.info("Matching final timestamp of {}, loading is completed", new Date(previousLastTime));
break;
}
previousLastTime = nextChunk.getLastDate().getTime(); previousLastTime = nextChunk.getLastDate().getTime();
nextStart = nextChunk.getLastDate(); nextStart = nextChunk.getLastDate();

View File

@ -0,0 +1,109 @@
package ca.uhn.fhir.batch2.jobs.step;
import ca.uhn.fhir.batch2.api.IJobDataSink;
import ca.uhn.fhir.batch2.api.RunOutcome;
import ca.uhn.fhir.batch2.api.StepExecutionDetails;
import ca.uhn.fhir.batch2.jobs.chunk.PartitionedUrlChunkRangeJson;
import ca.uhn.fhir.batch2.jobs.chunk.ResourceIdListWorkChunkJson;
import ca.uhn.fhir.batch2.jobs.parameters.PartitionedUrlListJobParameters;
import ca.uhn.fhir.jpa.api.pid.HomogeneousResourcePidList;
import ca.uhn.fhir.jpa.api.pid.TypedResourcePid;
import ca.uhn.fhir.rest.api.server.storage.IResourcePersistentId;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class ResourceIdListStepTest {
@Mock
private IIdChunkProducer<PartitionedUrlChunkRangeJson> myIdChunkProducer;
@Mock
private StepExecutionDetails<PartitionedUrlListJobParameters, PartitionedUrlChunkRangeJson> myStepExecutionDetails;
@Mock
private IJobDataSink<ResourceIdListWorkChunkJson> myDataSink;
@Mock
private PartitionedUrlChunkRangeJson myData;
@Mock
private PartitionedUrlListJobParameters myParameters;
@Captor
private ArgumentCaptor<ResourceIdListWorkChunkJson> myDataCaptor;
private ResourceIdListStep<PartitionedUrlListJobParameters, PartitionedUrlChunkRangeJson> myResourceIdListStep;
@BeforeEach
void beforeEach() {
myResourceIdListStep = new ResourceIdListStep<>(myIdChunkProducer);
}
@ParameterizedTest
@ValueSource(ints = {1, 100, 500, 501, 2345})
void testResourceIdListBatchSizeLimit(int theListSize) {
List<TypedResourcePid> idList = generateIdList(theListSize);
when(myStepExecutionDetails.getData()).thenReturn(myData);
when(myParameters.getBatchSize()).thenReturn(theListSize);
when(myStepExecutionDetails.getParameters()).thenReturn(myParameters);
HomogeneousResourcePidList homogeneousResourcePidList = mock(HomogeneousResourcePidList.class);
when(homogeneousResourcePidList.getTypedResourcePids()).thenReturn(idList);
when(homogeneousResourcePidList.getLastDate()).thenReturn(new Date());
when(myIdChunkProducer.fetchResourceIdsPage(any(), any(), any(), any(), any()))
.thenReturn(homogeneousResourcePidList);
// Ensure none of the work chunks exceed MAX_BATCH_OF_IDS in size:
doAnswer(i -> {
ResourceIdListWorkChunkJson list = i.getArgument(0);
Assertions.assertTrue(list.size() <= ResourceIdListStep.MAX_BATCH_OF_IDS,
"Id batch size should never exceed " + ResourceIdListStep.MAX_BATCH_OF_IDS);
return null;
}).when(myDataSink).accept(any(ResourceIdListWorkChunkJson.class));
final RunOutcome run = myResourceIdListStep.run(myStepExecutionDetails, myDataSink);
assertNotEquals(null, run);
// The work should be divided into chunks of MAX_BATCH_OF_IDS in size (or less, but never more):
int expectedBatchCount = (int) Math.ceil((float) theListSize / ResourceIdListStep.MAX_BATCH_OF_IDS);
verify(myDataSink, times(expectedBatchCount)).accept(myDataCaptor.capture());
final List<ResourceIdListWorkChunkJson> allDataChunks = myDataCaptor.getAllValues();
assertEquals(expectedBatchCount, allDataChunks.size());
// Ensure that all chunks except the very last one are MAX_BATCH_OF_IDS in length
for (int i = 0; i < expectedBatchCount - 1; i++) {
assertEquals(ResourceIdListStep.MAX_BATCH_OF_IDS, allDataChunks.get(i).size());
}
// The very last chunk should be whatever is left over (if there is a remainder):
int expectedLastBatchSize = theListSize % ResourceIdListStep.MAX_BATCH_OF_IDS;
expectedLastBatchSize = (expectedLastBatchSize == 0) ? ResourceIdListStep.MAX_BATCH_OF_IDS : expectedLastBatchSize;
assertEquals(expectedLastBatchSize, allDataChunks.get(allDataChunks.size() - 1).size());
}
private List<TypedResourcePid> generateIdList(int theListSize) {
List<TypedResourcePid> idList = new ArrayList<>();
for (int id = 0; id < theListSize; id++) {
IResourcePersistentId theId = mock(IResourcePersistentId.class);
when(theId.toString()).thenReturn(Integer.toString(id + 1));
TypedResourcePid typedId = new TypedResourcePid("Patient", theId);
idList.add(typedId);
}
return idList;
}
}