Optimize used segment fetching in Kill tasks (#15107)

* Optimize used segment fetching in Kill tasks
This commit is contained in:
AmatyaAvadhanula 2023-10-09 17:54:13 +05:30 committed by GitHub
parent 7a35ce886d
commit 40a6dc4631
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 20 additions and 8 deletions

View File

@ -222,18 +222,30 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask
toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments)));
// Fetch the load specs of all segments overlapping with the given interval
final Set<Map<String, Object>> usedSegmentLoadSpecs = toolbox
.getTaskActionClient()
.submit(new RetrieveUsedSegmentsAction(getDataSource(), getInterval(), null, Segments.INCLUDING_OVERSHADOWED))
.stream()
.map(DataSegment::getLoadSpec)
.collect(Collectors.toSet());
final Set<Interval> unusedSegmentIntervals = unusedSegments.stream()
.map(DataSegment::getInterval)
.collect(Collectors.toSet());
final Set<Map<String, Object>> usedSegmentLoadSpecs = new HashSet<>();
if (!unusedSegmentIntervals.isEmpty()) {
RetrieveUsedSegmentsAction retrieveUsedSegmentsAction = new RetrieveUsedSegmentsAction(
getDataSource(),
null,
unusedSegmentIntervals,
Segments.INCLUDING_OVERSHADOWED
);
// Fetch the load specs of all segments overlapping with the unused segment intervals
usedSegmentLoadSpecs.addAll(toolbox.getTaskActionClient().submit(retrieveUsedSegmentsAction)
.stream()
.map(DataSegment::getLoadSpec)
.collect(Collectors.toSet())
);
}
// Kill segments from the deep storage only if their load specs are not being used by any used segments
final List<DataSegment> segmentsToBeKilled = unusedSegments
.stream()
.filter(unusedSegment -> !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec()))
.filter(unusedSegment -> unusedSegment.getLoadSpec() == null
|| !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec()))
.collect(Collectors.toList());
toolbox.getDataSegmentKiller().kill(segmentsToBeKilled);