From 40a6dc4631364bfce3bb54bb292092575a349c0b Mon Sep 17 00:00:00 2001 From: AmatyaAvadhanula Date: Mon, 9 Oct 2023 17:54:13 +0530 Subject: [PATCH] Optimize used segment fetching in Kill tasks (#15107) * Optimize used segment fetching in Kill tasks --- .../common/task/KillUnusedSegmentsTask.java | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index cc760894603..1726a3e6800 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -222,18 +222,30 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); - // Fetch the load specs of all segments overlapping with the given interval - final Set> usedSegmentLoadSpecs = toolbox - .getTaskActionClient() - .submit(new RetrieveUsedSegmentsAction(getDataSource(), getInterval(), null, Segments.INCLUDING_OVERSHADOWED)) - .stream() - .map(DataSegment::getLoadSpec) - .collect(Collectors.toSet()); + final Set unusedSegmentIntervals = unusedSegments.stream() + .map(DataSegment::getInterval) + .collect(Collectors.toSet()); + final Set> usedSegmentLoadSpecs = new HashSet<>(); + if (!unusedSegmentIntervals.isEmpty()) { + RetrieveUsedSegmentsAction retrieveUsedSegmentsAction = new RetrieveUsedSegmentsAction( + getDataSource(), + null, + unusedSegmentIntervals, + Segments.INCLUDING_OVERSHADOWED + ); + // Fetch the load specs of all segments overlapping with the unused segment intervals + usedSegmentLoadSpecs.addAll(toolbox.getTaskActionClient().submit(retrieveUsedSegmentsAction) + .stream() + .map(DataSegment::getLoadSpec) + .collect(Collectors.toSet()) + ); + } // Kill segments from the deep storage only if their load specs are not being used by any used segments final List segmentsToBeKilled = unusedSegments .stream() - .filter(unusedSegment -> !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) + .filter(unusedSegment -> unusedSegment.getLoadSpec() == null + || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) .collect(Collectors.toList()); toolbox.getDataSegmentKiller().kill(segmentsToBeKilled);