fix batch2 npe when no results returned (#5219)

* fix npe error

* spotless

* spotless
This commit is contained in:
Ken Stevens 2023-08-19 20:37:16 -04:00 committed by GitHub
parent ab2a86b0d5
commit b9155721a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 41 additions and 29 deletions

View File

@ -0,0 +1,4 @@
---
type: fix
issue: 5219
title: "Reindex batch job threw an exception when no results matched the reindex request. This has been corrected."

View File

@ -90,25 +90,26 @@ public class ResourceIdListStep<PT extends PartitionedJobParameters, IT extends
if (nextChunk.isEmpty()) { if (nextChunk.isEmpty()) {
ourLog.info("No data returned"); ourLog.info("No data returned");
} else {
ourLog.debug("Found {} IDs from {} to {}", nextChunk.size(), start, nextChunk.getLastDate());
final Set<TypedPidJson> idBuffer = nextChunk.getTypedResourcePids().stream()
.map(TypedPidJson::new)
.collect(Collectors.toCollection(LinkedHashSet::new));
final UnmodifiableIterator<List<TypedPidJson>> partition =
Iterators.partition(idBuffer.iterator(), maxBatchId);
while (partition.hasNext()) {
final List<TypedPidJson> submissionIds = partition.next();
totalIdsFound += submissionIds.size();
chunkCount++;
submitWorkChunk(submissionIds, nextChunk.getRequestPartitionId(), theDataSink);
}
ourLog.info("Submitted {} chunks with {} resource IDs", chunkCount, totalIdsFound);
} }
ourLog.debug("Found {} IDs from {} to {}", nextChunk.size(), start, nextChunk.getLastDate());
final Set<TypedPidJson> idBuffer = nextChunk.getTypedResourcePids().stream()
.map(TypedPidJson::new)
.collect(Collectors.toCollection(LinkedHashSet::new));
final UnmodifiableIterator<List<TypedPidJson>> partition = Iterators.partition(idBuffer.iterator(), maxBatchId);
while (partition.hasNext()) {
final List<TypedPidJson> submissionIds = partition.next();
totalIdsFound += submissionIds.size();
chunkCount++;
submitWorkChunk(submissionIds, nextChunk.getRequestPartitionId(), theDataSink);
}
ourLog.info("Submitted {} chunks with {} resource IDs", chunkCount, totalIdsFound);
return RunOutcome.SUCCESS; return RunOutcome.SUCCESS;
} }

View File

@ -56,25 +56,30 @@ class ResourceIdListStepTest {
} }
@ParameterizedTest @ParameterizedTest
@ValueSource(ints = {1, 100, 500, 501, 2345, 10500}) @ValueSource(ints = {0, 1, 100, 500, 501, 2345, 10500})
void testResourceIdListBatchSizeLimit(int theListSize) { void testResourceIdListBatchSizeLimit(int theListSize) {
List<TypedResourcePid> idList = generateIdList(theListSize); List<TypedResourcePid> idList = generateIdList(theListSize);
when(myStepExecutionDetails.getData()).thenReturn(myData); when(myStepExecutionDetails.getData()).thenReturn(myData);
when(myParameters.getBatchSize()).thenReturn(theListSize); when(myParameters.getBatchSize()).thenReturn(theListSize);
when(myStepExecutionDetails.getParameters()).thenReturn(myParameters); when(myStepExecutionDetails.getParameters()).thenReturn(myParameters);
HomogeneousResourcePidList homogeneousResourcePidList = mock(HomogeneousResourcePidList.class); HomogeneousResourcePidList homogeneousResourcePidList = mock(HomogeneousResourcePidList.class);
when(homogeneousResourcePidList.getTypedResourcePids()).thenReturn(idList); if (theListSize > 0) {
when(homogeneousResourcePidList.getLastDate()).thenReturn(new Date()); when(homogeneousResourcePidList.getTypedResourcePids()).thenReturn(idList);
when(homogeneousResourcePidList.getLastDate()).thenReturn(new Date());
when(homogeneousResourcePidList.isEmpty()).thenReturn(false);
// Ensure none of the work chunks exceed MAX_BATCH_OF_IDS in size:
doAnswer(i -> {
ResourceIdListWorkChunkJson list = i.getArgument(0);
Assertions.assertTrue(list.size() <= ResourceIdListStep.MAX_BATCH_OF_IDS,
"Id batch size should never exceed " + ResourceIdListStep.MAX_BATCH_OF_IDS);
return null;
}).when(myDataSink).accept(any(ResourceIdListWorkChunkJson.class));
} else {
when(homogeneousResourcePidList.isEmpty()).thenReturn(true);
}
when(myIdChunkProducer.fetchResourceIdsPage(any(), any(), any(), any(), any())) when(myIdChunkProducer.fetchResourceIdsPage(any(), any(), any(), any(), any()))
.thenReturn(homogeneousResourcePidList); .thenReturn(homogeneousResourcePidList);
// Ensure none of the work chunks exceed MAX_BATCH_OF_IDS in size:
doAnswer(i -> {
ResourceIdListWorkChunkJson list = i.getArgument(0);
Assertions.assertTrue(list.size() <= ResourceIdListStep.MAX_BATCH_OF_IDS,
"Id batch size should never exceed " + ResourceIdListStep.MAX_BATCH_OF_IDS);
return null;
}).when(myDataSink).accept(any(ResourceIdListWorkChunkJson.class));
final RunOutcome run = myResourceIdListStep.run(myStepExecutionDetails, myDataSink); final RunOutcome run = myResourceIdListStep.run(myStepExecutionDetails, myDataSink);
assertNotEquals(null, run); assertNotEquals(null, run);
@ -93,7 +98,9 @@ class ResourceIdListStepTest {
// The very last chunk should be whatever is left over (if there is a remainder): // The very last chunk should be whatever is left over (if there is a remainder):
int expectedLastBatchSize = theListSize % ResourceIdListStep.MAX_BATCH_OF_IDS; int expectedLastBatchSize = theListSize % ResourceIdListStep.MAX_BATCH_OF_IDS;
expectedLastBatchSize = (expectedLastBatchSize == 0) ? ResourceIdListStep.MAX_BATCH_OF_IDS : expectedLastBatchSize; expectedLastBatchSize = (expectedLastBatchSize == 0) ? ResourceIdListStep.MAX_BATCH_OF_IDS : expectedLastBatchSize;
assertEquals(expectedLastBatchSize, allDataChunks.get(allDataChunks.size() - 1).size()); if (!allDataChunks.isEmpty()) {
assertEquals(expectedLastBatchSize, allDataChunks.get(allDataChunks.size() - 1).size());
}
} }
private List<TypedResourcePid> generateIdList(int theListSize) { private List<TypedResourcePid> generateIdList(int theListSize) {