Delete expunge fails with identical update timestamps (#5047)
* 4759 - WIP unit test for this issue * WIP updating unit test for ResourceIdListStep * WIP candidate fix for ResourceIdListStep batch size problem * Enhance unit test with ArgumentCaptor. * WIP Parameterized the ResourceIdListStepTest and added assertions * Adding changelog for this issue * Updating changelog message for 5055 * Code formatting and import cleanup --------- Co-authored-by: Luke deGruchy <luke.degruchy@smilecdr.com>
This commit is contained in:
parent
a87321a227
commit
10af1434fc
|
@ -0,0 +1,8 @@
|
||||||
|
---
|
||||||
|
type: fix
|
||||||
|
issue: 5055
|
||||||
|
title: "
|
||||||
|
Delete expunge (and possibly other batch 2 jobs) will fail when given more than
|
||||||
|
2100 resources with identical timestamps when using an mssql database.
|
||||||
|
This has been fixed.
|
||||||
|
"
|
|
@ -48,7 +48,7 @@ public class ResourceIdListStep<PT extends PartitionedJobParameters, IT extends
|
||||||
private static final Logger ourLog = Logs.getBatchTroubleshootingLog();
|
private static final Logger ourLog = Logs.getBatchTroubleshootingLog();
|
||||||
public static final int DEFAULT_PAGE_SIZE = 20000;
|
public static final int DEFAULT_PAGE_SIZE = 20000;
|
||||||
|
|
||||||
private static final int MAX_BATCH_OF_IDS = 500;
|
protected static final int MAX_BATCH_OF_IDS = 500;
|
||||||
|
|
||||||
private final IIdChunkProducer<IT> myIdChunkProducer;
|
private final IIdChunkProducer<IT> myIdChunkProducer;
|
||||||
|
|
||||||
|
@ -91,6 +91,12 @@ public class ResourceIdListStep<PT extends PartitionedJobParameters, IT extends
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we get the same last time twice in a row, we've clearly reached the end
|
||||||
|
if (nextChunk.getLastDate().getTime() == previousLastTime) {
|
||||||
|
ourLog.info("Matching final timestamp of {}, loading is completed", new Date(previousLastTime));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
ourLog.info("Found {} IDs from {} to {}", nextChunk.size(), nextStart, nextChunk.getLastDate());
|
ourLog.info("Found {} IDs from {} to {}", nextChunk.size(), nextStart, nextChunk.getLastDate());
|
||||||
if (nextChunk.size() < 10 && HapiSystemProperties.isTestModeEnabled()) {
|
if (nextChunk.size() < 10 && HapiSystemProperties.isTestModeEnabled()) {
|
||||||
// TODO: I've added this in order to troubleshoot MultitenantBatchOperationR4Test
|
// TODO: I've added this in order to troubleshoot MultitenantBatchOperationR4Test
|
||||||
|
@ -103,12 +109,6 @@ public class ResourceIdListStep<PT extends PartitionedJobParameters, IT extends
|
||||||
idBuffer.add(nextId);
|
idBuffer.add(nextId);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we get the same last time twice in a row, we've clearly reached the end
|
|
||||||
if (nextChunk.getLastDate().getTime() == previousLastTime) {
|
|
||||||
ourLog.info("Matching final timestamp of {}, loading is completed", new Date(previousLastTime));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
previousLastTime = nextChunk.getLastDate().getTime();
|
previousLastTime = nextChunk.getLastDate().getTime();
|
||||||
nextStart = nextChunk.getLastDate();
|
nextStart = nextChunk.getLastDate();
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
package ca.uhn.fhir.batch2.jobs.step;
|
||||||
|
|
||||||
|
import ca.uhn.fhir.batch2.api.IJobDataSink;
|
||||||
|
import ca.uhn.fhir.batch2.api.RunOutcome;
|
||||||
|
import ca.uhn.fhir.batch2.api.StepExecutionDetails;
|
||||||
|
import ca.uhn.fhir.batch2.jobs.chunk.PartitionedUrlChunkRangeJson;
|
||||||
|
import ca.uhn.fhir.batch2.jobs.chunk.ResourceIdListWorkChunkJson;
|
||||||
|
import ca.uhn.fhir.batch2.jobs.parameters.PartitionedUrlListJobParameters;
|
||||||
|
import ca.uhn.fhir.jpa.api.pid.HomogeneousResourcePidList;
|
||||||
|
import ca.uhn.fhir.jpa.api.pid.TypedResourcePid;
|
||||||
|
import ca.uhn.fhir.rest.api.server.storage.IResourcePersistentId;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
|
import org.junit.jupiter.params.provider.ValueSource;
|
||||||
|
import org.mockito.ArgumentCaptor;
|
||||||
|
import org.mockito.Captor;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.Mockito.doAnswer;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.times;
|
||||||
|
import static org.mockito.Mockito.verify;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class ResourceIdListStepTest {
|
||||||
|
@Mock
|
||||||
|
private IIdChunkProducer<PartitionedUrlChunkRangeJson> myIdChunkProducer;
|
||||||
|
@Mock
|
||||||
|
private StepExecutionDetails<PartitionedUrlListJobParameters, PartitionedUrlChunkRangeJson> myStepExecutionDetails;
|
||||||
|
@Mock
|
||||||
|
private IJobDataSink<ResourceIdListWorkChunkJson> myDataSink;
|
||||||
|
@Mock
|
||||||
|
private PartitionedUrlChunkRangeJson myData;
|
||||||
|
@Mock
|
||||||
|
private PartitionedUrlListJobParameters myParameters;
|
||||||
|
|
||||||
|
@Captor
|
||||||
|
private ArgumentCaptor<ResourceIdListWorkChunkJson> myDataCaptor;
|
||||||
|
|
||||||
|
private ResourceIdListStep<PartitionedUrlListJobParameters, PartitionedUrlChunkRangeJson> myResourceIdListStep;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
void beforeEach() {
|
||||||
|
myResourceIdListStep = new ResourceIdListStep<>(myIdChunkProducer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@ValueSource(ints = {1, 100, 500, 501, 2345})
|
||||||
|
void testResourceIdListBatchSizeLimit(int theListSize) {
|
||||||
|
List<TypedResourcePid> idList = generateIdList(theListSize);
|
||||||
|
when(myStepExecutionDetails.getData()).thenReturn(myData);
|
||||||
|
when(myParameters.getBatchSize()).thenReturn(theListSize);
|
||||||
|
when(myStepExecutionDetails.getParameters()).thenReturn(myParameters);
|
||||||
|
HomogeneousResourcePidList homogeneousResourcePidList = mock(HomogeneousResourcePidList.class);
|
||||||
|
when(homogeneousResourcePidList.getTypedResourcePids()).thenReturn(idList);
|
||||||
|
when(homogeneousResourcePidList.getLastDate()).thenReturn(new Date());
|
||||||
|
when(myIdChunkProducer.fetchResourceIdsPage(any(), any(), any(), any(), any()))
|
||||||
|
.thenReturn(homogeneousResourcePidList);
|
||||||
|
|
||||||
|
// Ensure none of the work chunks exceed MAX_BATCH_OF_IDS in size:
|
||||||
|
doAnswer(i -> {
|
||||||
|
ResourceIdListWorkChunkJson list = i.getArgument(0);
|
||||||
|
Assertions.assertTrue(list.size() <= ResourceIdListStep.MAX_BATCH_OF_IDS,
|
||||||
|
"Id batch size should never exceed " + ResourceIdListStep.MAX_BATCH_OF_IDS);
|
||||||
|
return null;
|
||||||
|
}).when(myDataSink).accept(any(ResourceIdListWorkChunkJson.class));
|
||||||
|
|
||||||
|
final RunOutcome run = myResourceIdListStep.run(myStepExecutionDetails, myDataSink);
|
||||||
|
assertNotEquals(null, run);
|
||||||
|
|
||||||
|
// The work should be divided into chunks of MAX_BATCH_OF_IDS in size (or less, but never more):
|
||||||
|
int expectedBatchCount = (int) Math.ceil((float) theListSize / ResourceIdListStep.MAX_BATCH_OF_IDS);
|
||||||
|
verify(myDataSink, times(expectedBatchCount)).accept(myDataCaptor.capture());
|
||||||
|
final List<ResourceIdListWorkChunkJson> allDataChunks = myDataCaptor.getAllValues();
|
||||||
|
assertEquals(expectedBatchCount, allDataChunks.size());
|
||||||
|
|
||||||
|
// Ensure that all chunks except the very last one are MAX_BATCH_OF_IDS in length
|
||||||
|
for (int i = 0; i < expectedBatchCount - 1; i++) {
|
||||||
|
assertEquals(ResourceIdListStep.MAX_BATCH_OF_IDS, allDataChunks.get(i).size());
|
||||||
|
}
|
||||||
|
|
||||||
|
// The very last chunk should be whatever is left over (if there is a remainder):
|
||||||
|
int expectedLastBatchSize = theListSize % ResourceIdListStep.MAX_BATCH_OF_IDS;
|
||||||
|
expectedLastBatchSize = (expectedLastBatchSize == 0) ? ResourceIdListStep.MAX_BATCH_OF_IDS : expectedLastBatchSize;
|
||||||
|
assertEquals(expectedLastBatchSize, allDataChunks.get(allDataChunks.size() - 1).size());
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<TypedResourcePid> generateIdList(int theListSize) {
|
||||||
|
List<TypedResourcePid> idList = new ArrayList<>();
|
||||||
|
for (int id = 0; id < theListSize; id++) {
|
||||||
|
IResourcePersistentId theId = mock(IResourcePersistentId.class);
|
||||||
|
when(theId.toString()).thenReturn(Integer.toString(id + 1));
|
||||||
|
TypedResourcePid typedId = new TypedResourcePid("Patient", theId);
|
||||||
|
idList.add(typedId);
|
||||||
|
}
|
||||||
|
return idList;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue