Improve run time of coordinator duty MarkAsUnusedOvershadowedSegments (#13287)

In clusters with a large number of segments, the duty `MarkAsUnusedOvershadowedSegments`
can take a long very long time to finish. This is because of the costly invocation of 
`timeline.isOvershadowed` which is done for every used segment in every coordinator run.

Changes
- Use `DataSourceSnapshot.getOvershadowedSegments` to get all overshadowed segments
- Iterate over this set instead of all used segments to identify segments that can be marked as unused
- Mark segments as unused in the DB in batches rather than one at a time
- Refactor: Add class `SegmentTimeline` for ease of use and readability while using a
`VersionedIntervalTimeline` of segments.
This commit is contained in:
Kashif Faraz 2022-11-01 20:19:52 +05:30 committed by GitHub
parent 0d03ce435f
commit fd7864ae33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 281 additions and 164 deletions

View File

@ -28,7 +28,7 @@ import org.apache.druid.server.coordinator.duty.CompactionSegmentIterator;
import org.apache.druid.server.coordinator.duty.CompactionSegmentSearchPolicy; import org.apache.druid.server.coordinator.duty.CompactionSegmentSearchPolicy;
import org.apache.druid.server.coordinator.duty.NewestSegmentFirstPolicy; import org.apache.druid.server.coordinator.duty.NewestSegmentFirstPolicy;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.apache.druid.timeline.partition.ShardSpec; import org.apache.druid.timeline.partition.ShardSpec;
import org.joda.time.DateTime; import org.joda.time.DateTime;
@ -82,7 +82,7 @@ public class NewestSegmentFirstPolicyBenchmark
private int numCompactionTaskSlots; private int numCompactionTaskSlots;
private Map<String, DataSourceCompactionConfig> compactionConfigs; private Map<String, DataSourceCompactionConfig> compactionConfigs;
private Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources; private Map<String, SegmentTimeline> dataSources;
@Setup(Level.Trial) @Setup(Level.Trial)
public void setup() public void setup()

View File

@ -78,7 +78,7 @@ public class VersionedIntervalTimelineBenchmark
private List<Interval> intervals; private List<Interval> intervals;
private List<DataSegment> segments; private List<DataSegment> segments;
private VersionedIntervalTimeline<String, DataSegment> timeline; private SegmentTimeline timeline;
private List<DataSegment> newSegments; private List<DataSegment> newSegments;
@Setup @Setup
@ -143,7 +143,7 @@ public class VersionedIntervalTimelineBenchmark
nextMinorVersions.put(interval, (short) (numNonRootGenerations + 1)); nextMinorVersions.put(interval, (short) (numNonRootGenerations + 1));
} }
timeline = VersionedIntervalTimeline.forSegments(segments); timeline = SegmentTimeline.forSegments(segments);
newSegments = new ArrayList<>(200); newSegments = new ArrayList<>(200);
@ -206,7 +206,7 @@ public class VersionedIntervalTimelineBenchmark
@Benchmark @Benchmark
public void benchAdd(Blackhole blackhole) public void benchAdd(Blackhole blackhole)
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(segments); final SegmentTimeline timeline = SegmentTimeline.forSegments(segments);
for (DataSegment newSegment : newSegments) { for (DataSegment newSegment : newSegments) {
timeline.add( timeline.add(
newSegment.getInterval(), newSegment.getInterval(),
@ -220,7 +220,7 @@ public class VersionedIntervalTimelineBenchmark
public void benchRemove(Blackhole blackhole) public void benchRemove(Blackhole blackhole)
{ {
final List<DataSegment> segmentsCopy = new ArrayList<>(segments); final List<DataSegment> segmentsCopy = new ArrayList<>(segments);
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(segmentsCopy); final SegmentTimeline timeline = SegmentTimeline.forSegments(segmentsCopy);
final int numTests = (int) (segmentsCopy.size() * 0.1); final int numTests = (int) (segmentsCopy.size() * 0.1);
for (int i = 0; i < numTests; i++) { for (int i = 0; i < numTests; i++) {
final DataSegment segment = segmentsCopy.remove(ThreadLocalRandom.current().nextInt(segmentsCopy.size())); final DataSegment segment = segmentsCopy.remove(ThreadLocalRandom.current().nextInt(segmentsCopy.size()));

View File

@ -27,7 +27,7 @@ package org.apache.druid.timeline;
* An Overshadowable overshadows another if its root partition range contains that of another * An Overshadowable overshadows another if its root partition range contains that of another
* and has a higher minorVersion. For more details, check https://github.com/apache/druid/issues/7491. * and has a higher minorVersion. For more details, check https://github.com/apache/druid/issues/7491.
*/ */
public interface Overshadowable<T extends Overshadowable> public interface Overshadowable<T extends Overshadowable<T>>
{ {
/** /**
* Returns true if this overshadowable overshadows the other. * Returns true if this overshadowable overshadows the other.

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.timeline;
import java.util.Comparator;
import java.util.Iterator;
/**
* {@link VersionedIntervalTimeline} for {@link DataSegment} objects.
*/
public class SegmentTimeline extends VersionedIntervalTimeline<String, DataSegment>
{
public static SegmentTimeline forSegments(Iterable<DataSegment> segments)
{
return forSegments(segments.iterator());
}
public static SegmentTimeline forSegments(Iterator<DataSegment> segments)
{
final SegmentTimeline timeline = new SegmentTimeline();
VersionedIntervalTimeline.addSegments(timeline, segments);
return timeline;
}
public SegmentTimeline()
{
super(Comparator.naturalOrder());
}
public boolean isOvershadowed(DataSegment segment)
{
return isOvershadowed(segment.getInterval(), segment.getVersion(), segment);
}
}

View File

@ -73,19 +73,6 @@ import java.util.stream.StreamSupport;
public class VersionedIntervalTimeline<VersionType, ObjectType extends Overshadowable<ObjectType>> public class VersionedIntervalTimeline<VersionType, ObjectType extends Overshadowable<ObjectType>>
implements TimelineLookup<VersionType, ObjectType> implements TimelineLookup<VersionType, ObjectType>
{ {
public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterable<DataSegment> segments)
{
return forSegments(segments.iterator());
}
public static VersionedIntervalTimeline<String, DataSegment> forSegments(Iterator<DataSegment> segments)
{
final VersionedIntervalTimeline<String, DataSegment> timeline =
new VersionedIntervalTimeline<>(Comparator.naturalOrder());
addSegments(timeline, segments);
return timeline;
}
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true); private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true);
// Below timelines stores only *visible* timelineEntries // Below timelines stores only *visible* timelineEntries
@ -106,16 +93,16 @@ public class VersionedIntervalTimeline<VersionType, ObjectType extends Overshado
private final Comparator<? super VersionType> versionComparator; private final Comparator<? super VersionType> versionComparator;
// Set this to true if the client needs to skip tombstones upon lookup (like the broker) // Set this to true if the client needs to skip tombstones upon lookup (like the broker)
private boolean skipObjectsWithNoData = false; private final boolean skipObjectsWithNoData;
public VersionedIntervalTimeline(Comparator<? super VersionType> versionComparator) public VersionedIntervalTimeline(Comparator<? super VersionType> versionComparator)
{ {
this.versionComparator = versionComparator; this(versionComparator, false);
} }
public VersionedIntervalTimeline(Comparator<? super VersionType> versionComparator, boolean skipObjectsWithNoData) public VersionedIntervalTimeline(Comparator<? super VersionType> versionComparator, boolean skipObjectsWithNoData)
{ {
this(versionComparator); this.versionComparator = versionComparator;
this.skipObjectsWithNoData = skipObjectsWithNoData; this.skipObjectsWithNoData = skipObjectsWithNoData;
} }

View File

@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.timeline;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
import java.util.Collections;
public class SegmentTimelineTest
{
@Test
public void testIsOvershadowed()
{
final SegmentTimeline timeline = SegmentTimeline.forSegments(
Arrays.asList(
createSegment("2022-01-01/2022-01-02", "v1", 0, 3),
createSegment("2022-01-01/2022-01-02", "v1", 1, 3),
createSegment("2022-01-01/2022-01-02", "v1", 2, 3),
createSegment("2022-01-02/2022-01-03", "v2", 0, 2),
createSegment("2022-01-02/2022-01-03", "v2", 1, 2)
)
);
Assert.assertFalse(
timeline.isOvershadowed(createSegment("2022-01-01/2022-01-02", "v1", 1, 3))
);
Assert.assertFalse(
timeline.isOvershadowed(createSegment("2022-01-01/2022-01-02", "v1", 2, 3))
);
Assert.assertFalse(
timeline.isOvershadowed(createSegment("2022-01-01/2022-01-02", "v1", 1, 4))
);
Assert.assertFalse(
timeline.isOvershadowed(createSegment("2022-01-01T00:00:00/2022-01-01T06:00:00", "v1", 1, 4))
);
Assert.assertTrue(
timeline.isOvershadowed(createSegment("2022-01-02/2022-01-03", "v1", 2, 4))
);
Assert.assertTrue(
timeline.isOvershadowed(createSegment("2022-01-02/2022-01-03", "v1", 0, 1))
);
}
private DataSegment createSegment(String interval, String version, int partitionNum, int totalNumPartitions)
{
return new DataSegment(
"wiki",
Intervals.of(interval),
version,
Collections.emptyMap(),
Collections.emptyList(),
Collections.emptyList(),
new NumberedShardSpec(partitionNum, totalNumPartitions),
0x9,
1L
);
}
}

View File

@ -175,7 +175,7 @@ import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.DruidNode; import org.apache.druid.server.DruidNode;
import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.calcite.rel.DruidQuery;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.partition.DimensionRangeShardSpec; import org.apache.druid.timeline.partition.DimensionRangeShardSpec;
import org.apache.druid.timeline.partition.NumberedPartialShardSpec; import org.apache.druid.timeline.partition.NumberedPartialShardSpec;
import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec;
@ -957,7 +957,7 @@ public class ControllerImpl implements Controller
if (dataSegments.isEmpty()) { if (dataSegments.isEmpty()) {
return Optional.empty(); return Optional.empty();
} else { } else {
return Optional.of(VersionedIntervalTimeline.forSegments(dataSegments)); return Optional.of(SegmentTimeline.forSegments(dataSegments));
} }
}; };
} }

View File

@ -27,7 +27,7 @@ import org.apache.druid.msq.querykit.DataSegmentTimelineView;
import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.testing.InitializedNullHandlingTest;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.partition.DimensionRangeShardSpec; import org.apache.druid.timeline.partition.DimensionRangeShardSpec;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
@ -79,15 +79,14 @@ public class TableInputSpecSlicerTest extends InitializedNullHandlingTest
BYTES_PER_SEGMENT BYTES_PER_SEGMENT
); );
private VersionedIntervalTimeline<String, DataSegment> timeline; private SegmentTimeline timeline;
private DataSegmentTimelineView timelineView;
private TableInputSpecSlicer slicer; private TableInputSpecSlicer slicer;
@Before @Before
public void setUp() public void setUp()
{ {
timeline = VersionedIntervalTimeline.forSegments(ImmutableList.of(SEGMENT1, SEGMENT2)); timeline = SegmentTimeline.forSegments(ImmutableList.of(SEGMENT1, SEGMENT2));
timelineView = (dataSource, intervals) -> { DataSegmentTimelineView timelineView = (dataSource, intervals) -> {
if (DATASOURCE.equals(dataSource)) { if (DATASOURCE.equals(dataSource)) {
return Optional.of(timeline); return Optional.of(timeline);
} else { } else {

View File

@ -30,13 +30,12 @@ import org.apache.druid.indexing.overlord.Segments;
import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.IngestionSpec; import org.apache.druid.segment.indexing.IngestionSpec;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionChunk;
import org.joda.time.Interval; import org.joda.time.Interval;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@ -199,8 +198,7 @@ public class HadoopIngestionSpec extends IngestionSpec<HadoopIOConfig, HadoopTun
} }
} }
final VersionedIntervalTimeline<String, DataSegment> timeline = final SegmentTimeline timeline = SegmentTimeline.forSegments(usedVisibleSegments);
VersionedIntervalTimeline.forSegments(usedVisibleSegments);
final List<WindowedDataSegment> windowedSegments = new ArrayList<>(); final List<WindowedDataSegment> windowedSegments = new ArrayList<>();
for (Interval interval : ingestionSpecObj.getIntervals()) { for (Interval interval : ingestionSpecObj.getIntervals()) {
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval); final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);

View File

@ -74,7 +74,7 @@ import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.CompactionState;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions; import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec;
import org.apache.druid.timeline.partition.TombstoneShardSpec; import org.apache.druid.timeline.partition.TombstoneShardSpec;
import org.joda.time.DateTime; import org.joda.time.DateTime;
@ -483,7 +483,7 @@ public abstract class AbstractBatchIndexTask extends AbstractTask
} else { } else {
// Use segment lock // Use segment lock
// Create a timeline to find latest segments only // Create a timeline to find latest segments only
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments( final SegmentTimeline timeline = SegmentTimeline.forSegments(
segments segments
); );

View File

@ -96,8 +96,8 @@ import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory;
import org.apache.druid.server.coordinator.duty.CompactSegments; import org.apache.druid.server.coordinator.duty.CompactSegments;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionChunk;
import org.apache.druid.timeline.partition.PartitionHolder; import org.apache.druid.timeline.partition.PartitionHolder;
import org.joda.time.Duration; import org.joda.time.Duration;
@ -735,7 +735,7 @@ public class CompactionTask extends AbstractBatchIndexTask
segmentProvider.findSegments(toolbox.getTaskActionClient()); segmentProvider.findSegments(toolbox.getTaskActionClient());
segmentProvider.checkSegments(lockGranularityInUse, usedSegments); segmentProvider.checkSegments(lockGranularityInUse, usedSegments);
final Map<DataSegment, File> segmentFileMap = toolbox.fetchSegments(usedSegments); final Map<DataSegment, File> segmentFileMap = toolbox.fetchSegments(usedSegments);
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = VersionedIntervalTimeline final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = SegmentTimeline
.forSegments(usedSegments) .forSegments(usedSegments)
.lookup(segmentProvider.interval); .lookup(segmentProvider.interval);
return new NonnullPair<>(segmentFileMap, timelineSegments); return new NonnullPair<>(segmentFileMap, timelineSegments);

View File

@ -71,8 +71,8 @@ import org.apache.druid.segment.realtime.firehose.ChatHandler;
import org.apache.druid.server.security.Action; import org.apache.druid.server.security.Action;
import org.apache.druid.server.security.AuthorizerMapper; import org.apache.druid.server.security.AuthorizerMapper;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionChunk;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull; import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.joda.time.Interval; import org.joda.time.Interval;
@ -267,7 +267,7 @@ public class SinglePhaseSubTask extends AbstractBatchSubtask implements ChatHand
// Find inputSegments overshadowed by pushedSegments // Find inputSegments overshadowed by pushedSegments
final Set<DataSegment> allSegments = new HashSet<>(getTaskLockHelper().getLockedExistingSegments()); final Set<DataSegment> allSegments = new HashSet<>(getTaskLockHelper().getLockedExistingSegments());
allSegments.addAll(pushedSegments); allSegments.addAll(pushedSegments);
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments); final SegmentTimeline timeline = SegmentTimeline.forSegments(allSegments);
final Set<DataSegment> oldSegments = FluentIterable.from(timeline.findFullyOvershadowed()) final Set<DataSegment> oldSegments = FluentIterable.from(timeline.findFullyOvershadowed())
.transformAndConcat(TimelineObjectHolder::getObject) .transformAndConcat(TimelineObjectHolder::getObject)
.transform(PartitionChunk::getObject) .transform(PartitionChunk::getObject)

View File

@ -56,8 +56,8 @@ import org.apache.druid.segment.IndexIO;
import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.loading.SegmentCacheManager; import org.apache.druid.segment.loading.SegmentCacheManager;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionChunk;
import org.apache.druid.timeline.partition.PartitionHolder; import org.apache.druid.timeline.partition.PartitionHolder;
import org.apache.druid.utils.Streams; import org.apache.druid.utils.Streams;
@ -509,7 +509,7 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
} }
} }
return VersionedIntervalTimeline.forSegments(usedSegments).lookup(interval); return SegmentTimeline.forSegments(usedSegments).lookup(interval);
} }
public static List<TimelineObjectHolder<String, DataSegment>> getTimelineForSegmentIds( public static List<TimelineObjectHolder<String, DataSegment>> getTimelineForSegmentIds(

View File

@ -50,7 +50,7 @@ import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory; import org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions; import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.partition.NumberedOverwriteShardSpec; import org.apache.druid.timeline.partition.NumberedOverwriteShardSpec;
import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.joda.time.Interval; import org.joda.time.Interval;
@ -241,7 +241,7 @@ public class SinglePhaseParallelIndexingTest extends AbstractParallelIndexSuperv
: getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", inputInterval, Segments.ONLY_VISIBLE); : getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", inputInterval, Segments.ONLY_VISIBLE);
Assert.assertFalse(newSegments.isEmpty()); Assert.assertFalse(newSegments.isEmpty());
allSegments.addAll(newSegments); allSegments.addAll(newSegments);
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments); final SegmentTimeline timeline = SegmentTimeline.forSegments(allSegments);
final Interval timelineInterval = inputInterval == null ? Intervals.ETERNITY : inputInterval; final Interval timelineInterval = inputInterval == null ? Intervals.ETERNITY : inputInterval;
final Set<DataSegment> visibles = timeline.findNonOvershadowedObjectsInInterval( final Set<DataSegment> visibles = timeline.findNonOvershadowedObjectsInInterval(
@ -606,7 +606,7 @@ public class SinglePhaseParallelIndexingTest extends AbstractParallelIndexSuperv
final Collection<DataSegment> newSegments = final Collection<DataSegment> newSegments =
getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", interval, Segments.ONLY_VISIBLE); getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", interval, Segments.ONLY_VISIBLE);
Assert.assertTrue(newSegments.containsAll(oldSegments)); Assert.assertTrue(newSegments.containsAll(oldSegments));
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(newSegments); final SegmentTimeline timeline = SegmentTimeline.forSegments(newSegments);
final Set<DataSegment> visibles = timeline.findNonOvershadowedObjectsInInterval(interval, Partitions.ONLY_COMPLETE); final Set<DataSegment> visibles = timeline.findNonOvershadowedObjectsInInterval(interval, Partitions.ONLY_COMPLETE);
Assert.assertEquals(new HashSet<>(newSegments), visibles); Assert.assertEquals(new HashSet<>(newSegments), visibles);
} }
@ -663,8 +663,7 @@ public class SinglePhaseParallelIndexingTest extends AbstractParallelIndexSuperv
final Collection<DataSegment> afterAppendSegments = final Collection<DataSegment> afterAppendSegments =
getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", interval, Segments.ONLY_VISIBLE); getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", interval, Segments.ONLY_VISIBLE);
Assert.assertTrue(afterAppendSegments.containsAll(beforeAppendSegments)); Assert.assertTrue(afterAppendSegments.containsAll(beforeAppendSegments));
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline final SegmentTimeline timeline = SegmentTimeline.forSegments(afterAppendSegments);
.forSegments(afterAppendSegments);
final Set<DataSegment> visibles = timeline.findNonOvershadowedObjectsInInterval(interval, Partitions.ONLY_COMPLETE); final Set<DataSegment> visibles = timeline.findNonOvershadowedObjectsInInterval(interval, Partitions.ONLY_COMPLETE);
Assert.assertEquals(new HashSet<>(afterAppendSegments), visibles); Assert.assertEquals(new HashSet<>(afterAppendSegments), visibles);
} }

View File

@ -42,8 +42,8 @@ import org.apache.druid.testing.clients.ClientInfoResourceTestClient;
import org.apache.druid.testing.utils.ITRetryUtil; import org.apache.druid.testing.utils.ITRetryUtil;
import org.apache.druid.testing.utils.SqlTestQueryHelper; import org.apache.druid.testing.utils.SqlTestQueryHelper;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.junit.Assert; import org.junit.Assert;
import java.io.IOException; import java.io.IOException;
@ -359,7 +359,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
if (waitForNewVersion) { if (waitForNewVersion) {
ITRetryUtil.retryUntilTrue( ITRetryUtil.retryUntilTrue(
() -> { () -> {
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments( final SegmentTimeline timeline = SegmentTimeline.forSegments(
coordinator.getAvailableSegments(dataSourceName) coordinator.getAvailableSegments(dataSourceName)
); );

View File

@ -42,8 +42,8 @@ import org.apache.druid.testing.clients.ClientInfoResourceTestClient;
import org.apache.druid.testing.utils.ITRetryUtil; import org.apache.druid.testing.utils.ITRetryUtil;
import org.apache.druid.testing.utils.SqlTestQueryHelper; import org.apache.druid.testing.utils.SqlTestQueryHelper;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.testng.Assert; import org.testng.Assert;
import java.io.IOException; import java.io.IOException;
@ -359,7 +359,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
if (waitForNewVersion) { if (waitForNewVersion) {
ITRetryUtil.retryUntilTrue( ITRetryUtil.retryUntilTrue(
() -> { () -> {
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments( final SegmentTimeline timeline = SegmentTimeline.forSegments(
coordinator.getAvailableSegments(dataSourceName) coordinator.getAvailableSegments(dataSourceName)
); );

View File

@ -24,7 +24,7 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.SqlSegmentsMetadataManager;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.utils.CollectionUtils; import org.apache.druid.utils.CollectionUtils;
@ -56,7 +56,7 @@ public class DataSourcesSnapshot
} }
public static DataSourcesSnapshot fromUsedSegmentsTimelines( public static DataSourcesSnapshot fromUsedSegmentsTimelines(
Map<String, VersionedIntervalTimeline<String, DataSegment>> usedSegmentsTimelinesPerDataSource, Map<String, SegmentTimeline> usedSegmentsTimelinesPerDataSource,
ImmutableMap<String, String> dataSourceProperties ImmutableMap<String, String> dataSourceProperties
) )
{ {
@ -73,8 +73,8 @@ public class DataSourcesSnapshot
} }
private final Map<String, ImmutableDruidDataSource> dataSourcesWithAllUsedSegments; private final Map<String, ImmutableDruidDataSource> dataSourcesWithAllUsedSegments;
private final Map<String, VersionedIntervalTimeline<String, DataSegment>> usedSegmentsTimelinesPerDataSource; private final Map<String, SegmentTimeline> usedSegmentsTimelinesPerDataSource;
private final ImmutableSet<SegmentId> overshadowedSegments; private final ImmutableSet<DataSegment> overshadowedSegments;
public DataSourcesSnapshot(Map<String, ImmutableDruidDataSource> dataSourcesWithAllUsedSegments) public DataSourcesSnapshot(Map<String, ImmutableDruidDataSource> dataSourcesWithAllUsedSegments)
{ {
@ -82,14 +82,14 @@ public class DataSourcesSnapshot
dataSourcesWithAllUsedSegments, dataSourcesWithAllUsedSegments,
CollectionUtils.mapValues( CollectionUtils.mapValues(
dataSourcesWithAllUsedSegments, dataSourcesWithAllUsedSegments,
dataSource -> VersionedIntervalTimeline.forSegments(dataSource.getSegments()) dataSource -> SegmentTimeline.forSegments(dataSource.getSegments())
) )
); );
} }
private DataSourcesSnapshot( private DataSourcesSnapshot(
Map<String, ImmutableDruidDataSource> dataSourcesWithAllUsedSegments, Map<String, ImmutableDruidDataSource> dataSourcesWithAllUsedSegments,
Map<String, VersionedIntervalTimeline<String, DataSegment>> usedSegmentsTimelinesPerDataSource Map<String, SegmentTimeline> usedSegmentsTimelinesPerDataSource
) )
{ {
this.dataSourcesWithAllUsedSegments = dataSourcesWithAllUsedSegments; this.dataSourcesWithAllUsedSegments = dataSourcesWithAllUsedSegments;
@ -113,12 +113,12 @@ public class DataSourcesSnapshot
return dataSourcesWithAllUsedSegments.get(dataSourceName); return dataSourcesWithAllUsedSegments.get(dataSourceName);
} }
public Map<String, VersionedIntervalTimeline<String, DataSegment>> getUsedSegmentsTimelinesPerDataSource() public Map<String, SegmentTimeline> getUsedSegmentsTimelinesPerDataSource()
{ {
return usedSegmentsTimelinesPerDataSource; return usedSegmentsTimelinesPerDataSource;
} }
public ImmutableSet<SegmentId> getOvershadowedSegments() public ImmutableSet<DataSegment> getOvershadowedSegments()
{ {
return overshadowedSegments; return overshadowedSegments;
} }
@ -150,20 +150,20 @@ public class DataSourcesSnapshot
* This method should be deduplicated with {@link VersionedIntervalTimeline#findFullyOvershadowed()}: see * This method should be deduplicated with {@link VersionedIntervalTimeline#findFullyOvershadowed()}: see
* https://github.com/apache/druid/issues/8070. * https://github.com/apache/druid/issues/8070.
* *
* @return overshadowed segment Ids list * @return List of overshadowed segments
*/ */
private List<SegmentId> determineOvershadowedSegments() private List<DataSegment> determineOvershadowedSegments()
{ {
// It's fine to add all overshadowed segments to a single collection because only // It's fine to add all overshadowed segments to a single collection because only
// a small fraction of the segments in the cluster are expected to be overshadowed, // a small fraction of the segments in the cluster are expected to be overshadowed,
// so building this collection shouldn't generate a lot of garbage. // so building this collection shouldn't generate a lot of garbage.
final List<SegmentId> overshadowedSegments = new ArrayList<>(); final List<DataSegment> overshadowedSegments = new ArrayList<>();
for (ImmutableDruidDataSource dataSource : dataSourcesWithAllUsedSegments.values()) { for (ImmutableDruidDataSource dataSource : dataSourcesWithAllUsedSegments.values()) {
VersionedIntervalTimeline<String, DataSegment> usedSegmentsTimeline = SegmentTimeline usedSegmentsTimeline =
usedSegmentsTimelinesPerDataSource.get(dataSource.getName()); usedSegmentsTimelinesPerDataSource.get(dataSource.getName());
for (DataSegment segment : dataSource.getSegments()) { for (DataSegment segment : dataSource.getSegments()) {
if (usedSegmentsTimeline.isOvershadowed(segment.getInterval(), segment.getVersion(), segment)) { if (usedSegmentsTimeline.isOvershadowed(segment)) {
overshadowedSegments.add(segment.getId()); overshadowedSegments.add(segment);
} }
} }
} }

View File

@ -50,8 +50,8 @@ import org.apache.druid.segment.SegmentUtils;
import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions; import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.NoneShardSpec; import org.apache.druid.timeline.partition.NoneShardSpec;
import org.apache.druid.timeline.partition.PartialShardSpec; import org.apache.druid.timeline.partition.PartialShardSpec;
import org.apache.druid.timeline.partition.PartitionChunk; import org.apache.druid.timeline.partition.PartitionChunk;
@ -157,7 +157,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor
return connector.retryWithHandle( return connector.retryWithHandle(
handle -> { handle -> {
if (visibility == Segments.ONLY_VISIBLE) { if (visibility == Segments.ONLY_VISIBLE) {
final VersionedIntervalTimeline<String, DataSegment> timeline = final SegmentTimeline timeline =
getTimelineForIntervalsWithHandle(handle, dataSource, intervals); getTimelineForIntervalsWithHandle(handle, dataSource, intervals);
return timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE); return timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE);
} else { } else {
@ -256,7 +256,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor
return identifiers; return identifiers;
} }
private VersionedIntervalTimeline<String, DataSegment> getTimelineForIntervalsWithHandle( private SegmentTimeline getTimelineForIntervalsWithHandle(
final Handle handle, final Handle handle,
final String dataSource, final String dataSource,
final List<Interval> intervals final List<Interval> intervals
@ -265,7 +265,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor
try (final CloseableIterator<DataSegment> iterator = try (final CloseableIterator<DataSegment> iterator =
SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper)
.retrieveUsedSegments(dataSource, intervals)) { .retrieveUsedSegments(dataSource, intervals)) {
return VersionedIntervalTimeline.forSegments(iterator); return SegmentTimeline.forSegments(iterator);
} }
} }
@ -323,7 +323,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor
// Find which segments are used (i.e. not overshadowed). // Find which segments are used (i.e. not overshadowed).
final Set<DataSegment> usedSegments = new HashSet<>(); final Set<DataSegment> usedSegments = new HashSet<>();
List<TimelineObjectHolder<String, DataSegment>> segmentHolders = List<TimelineObjectHolder<String, DataSegment>> segmentHolders =
VersionedIntervalTimeline.forSegments(segments).lookupWithIncompletePartitions(Intervals.ETERNITY); SegmentTimeline.forSegments(segments).lookupWithIncompletePartitions(Intervals.ETERNITY);
for (TimelineObjectHolder<String, DataSegment> holder : segmentHolders) { for (TimelineObjectHolder<String, DataSegment> holder : segmentHolders) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) { for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
usedSegments.add(chunk.getObject()); usedSegments.add(chunk.getObject());

View File

@ -96,11 +96,6 @@ public interface SegmentsMetadataManager
*/ */
Collection<ImmutableDruidDataSource> getImmutableDataSourcesWithAllUsedSegments(); Collection<ImmutableDruidDataSource> getImmutableDataSourcesWithAllUsedSegments();
/**
* Returns a set of overshadowed segment ids.
*/
Set<SegmentId> getOvershadowedSegments();
/** /**
* Returns a snapshot of DruidDataSources and overshadowed segments * Returns a snapshot of DruidDataSources and overshadowed segments
*/ */

View File

@ -49,6 +49,7 @@ import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions; import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull; import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.joda.time.DateTime; import org.joda.time.DateTime;
@ -558,8 +559,8 @@ public class SqlSegmentsMetadataManager implements SegmentsMetadataManager
private int doMarkAsUsedNonOvershadowedSegments(String dataSourceName, @Nullable Interval interval) private int doMarkAsUsedNonOvershadowedSegments(String dataSourceName, @Nullable Interval interval)
{ {
final List<DataSegment> unusedSegments = new ArrayList<>(); final List<DataSegment> unusedSegments = new ArrayList<>();
final VersionedIntervalTimeline<String, DataSegment> timeline = final SegmentTimeline timeline =
VersionedIntervalTimeline.forSegments(Collections.emptyList()); SegmentTimeline.forSegments(Collections.emptyIterator());
connector.inReadOnlyTransaction( connector.inReadOnlyTransaction(
(handle, status) -> { (handle, status) -> {
@ -593,15 +594,14 @@ public class SqlSegmentsMetadataManager implements SegmentsMetadataManager
private int markNonOvershadowedSegmentsAsUsed( private int markNonOvershadowedSegmentsAsUsed(
List<DataSegment> unusedSegments, List<DataSegment> unusedSegments,
VersionedIntervalTimeline<String, DataSegment> timeline SegmentTimeline timeline
) )
{ {
List<SegmentId> segmentIdsToMarkAsUsed = new ArrayList<>(); List<SegmentId> segmentIdsToMarkAsUsed = new ArrayList<>();
for (DataSegment segment : unusedSegments) { for (DataSegment segment : unusedSegments) {
if (timeline.isOvershadowed(segment.getInterval(), segment.getVersion(), segment)) { if (!timeline.isOvershadowed(segment)) {
continue; segmentIdsToMarkAsUsed.add(segment.getId());
} }
segmentIdsToMarkAsUsed.add(segment.getId());
} }
return markSegmentsAsUsed(segmentIdsToMarkAsUsed); return markSegmentsAsUsed(segmentIdsToMarkAsUsed);
@ -612,7 +612,7 @@ public class SqlSegmentsMetadataManager implements SegmentsMetadataManager
throws UnknownSegmentIdsException throws UnknownSegmentIdsException
{ {
try { try {
Pair<List<DataSegment>, VersionedIntervalTimeline<String, DataSegment>> unusedSegmentsAndTimeline = connector Pair<List<DataSegment>, SegmentTimeline> unusedSegmentsAndTimeline = connector
.inReadOnlyTransaction( .inReadOnlyTransaction(
(handle, status) -> { (handle, status) -> {
List<DataSegment> unusedSegments = retrieveUnusedSegments(dataSource, segmentIds, handle); List<DataSegment> unusedSegments = retrieveUnusedSegments(dataSource, segmentIds, handle);
@ -621,7 +621,7 @@ public class SqlSegmentsMetadataManager implements SegmentsMetadataManager
); );
try (CloseableIterator<DataSegment> usedSegmentsOverlappingUnusedSegmentsIntervals = try (CloseableIterator<DataSegment> usedSegmentsOverlappingUnusedSegmentsIntervals =
retrieveUsedSegmentsOverlappingIntervals(dataSource, unusedSegmentsIntervals, handle)) { retrieveUsedSegmentsOverlappingIntervals(dataSource, unusedSegmentsIntervals, handle)) {
VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments( SegmentTimeline timeline = SegmentTimeline.forSegments(
Iterators.concat(usedSegmentsOverlappingUnusedSegmentsIntervals, unusedSegments.iterator()) Iterators.concat(usedSegmentsOverlappingUnusedSegmentsIntervals, unusedSegments.iterator())
); );
return new Pair<>(unusedSegments, timeline); return new Pair<>(unusedSegments, timeline);
@ -630,7 +630,7 @@ public class SqlSegmentsMetadataManager implements SegmentsMetadataManager
); );
List<DataSegment> unusedSegments = unusedSegmentsAndTimeline.lhs; List<DataSegment> unusedSegments = unusedSegmentsAndTimeline.lhs;
VersionedIntervalTimeline<String, DataSegment> timeline = unusedSegmentsAndTimeline.rhs; SegmentTimeline timeline = unusedSegmentsAndTimeline.rhs;
return markNonOvershadowedSegmentsAsUsed(unusedSegments, timeline); return markNonOvershadowedSegmentsAsUsed(unusedSegments, timeline);
} }
catch (Exception e) { catch (Exception e) {
@ -800,12 +800,6 @@ public class SqlSegmentsMetadataManager implements SegmentsMetadataManager
return getSnapshotOfDataSourcesWithAllUsedSegments().getDataSourcesWithAllUsedSegments(); return getSnapshotOfDataSourcesWithAllUsedSegments().getDataSourcesWithAllUsedSegments();
} }
@Override
public Set<SegmentId> getOvershadowedSegments()
{
return getSnapshotOfDataSourcesWithAllUsedSegments().getOvershadowedSegments();
}
@Override @Override
public DataSourcesSnapshot getSnapshotOfDataSourcesWithAllUsedSegments() public DataSourcesSnapshot getSnapshotOfDataSourcesWithAllUsedSegments()
{ {
@ -834,16 +828,18 @@ public class SqlSegmentsMetadataManager implements SegmentsMetadataManager
} }
@Override @Override
public Optional<Iterable<DataSegment>> iterateAllUsedNonOvershadowedSegmentsForDatasourceInterval(String datasource, public Optional<Iterable<DataSegment>> iterateAllUsedNonOvershadowedSegmentsForDatasourceInterval(
Interval interval, String datasource,
boolean requiresLatest) Interval interval,
boolean requiresLatest
)
{ {
if (requiresLatest) { if (requiresLatest) {
forceOrWaitOngoingDatabasePoll(); forceOrWaitOngoingDatabasePoll();
} else { } else {
useLatestIfWithinDelayOrPerformNewDatabasePoll(); useLatestIfWithinDelayOrPerformNewDatabasePoll();
} }
VersionedIntervalTimeline<String, DataSegment> usedSegmentsTimeline SegmentTimeline usedSegmentsTimeline
= dataSourcesSnapshot.getUsedSegmentsTimelinesPerDataSource().get(datasource); = dataSourcesSnapshot.getUsedSegmentsTimelinesPerDataSource().get(datasource);
return Optional.fromNullable(usedSegmentsTimeline) return Optional.fromNullable(usedSegmentsTimeline)
.transform(timeline -> timeline.findNonOvershadowedObjectsInInterval(interval, Partitions.ONLY_COMPLETE)); .transform(timeline -> timeline.findNonOvershadowedObjectsInInterval(interval, Partitions.ONLY_COMPLETE));

View File

@ -93,7 +93,7 @@ public class SqlSegmentsMetadataQuery
* You cannot assume that segments returned by this call are actually active. Because there is some delay between * You cannot assume that segments returned by this call are actually active. Because there is some delay between
* new segment publishing and the marking-unused of older segments, it is possible that some segments returned * new segment publishing and the marking-unused of older segments, it is possible that some segments returned
* by this call are overshadowed by other segments. To check for this, use * by this call are overshadowed by other segments. To check for this, use
* {@link org.apache.druid.timeline.VersionedIntervalTimeline#forSegments(Iterator)}. * {@link org.apache.druid.timeline.SegmentTimeline#forSegments(Iterable)}.
* *
* This call does not return any information about realtime segments. * This call does not return any information about realtime segments.
* *

View File

@ -394,6 +394,13 @@ public class DruidCoordinator
return CoordinatorCompactionConfig.current(configManager); return CoordinatorCompactionConfig.current(configManager);
} }
public void markSegmentsAsUnused(String datasource, Set<SegmentId> segmentIds)
{
log.debug("Marking [%d] segments of datasource [%s] as unused: %s", segmentIds.size(), datasource, segmentIds);
int updatedCount = segmentsMetadataManager.markSegmentsAsUnused(segmentIds);
log.info("Successfully marked [%d] segments of datasource [%s] as unused", updatedCount, datasource);
}
public void markSegmentAsUnused(DataSegment segment) public void markSegmentAsUnused(DataSegment segment)
{ {
log.debug("Marking segment[%s] as unused", segment.getId()); log.debug("Marking segment[%s] as unused", segment.getId());

View File

@ -26,7 +26,7 @@ import org.apache.druid.client.DataSourcesSnapshot;
import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.metadata.MetadataRuleManager; import org.apache.druid.metadata.MetadataRuleManager;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.SegmentTimeline;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.Arrays; import java.util.Arrays;
@ -128,7 +128,7 @@ public class DruidCoordinatorRuntimeParams
* Creates and returns a "dataSource -> VersionedIntervalTimeline[version String, DataSegment]" map with "used" * Creates and returns a "dataSource -> VersionedIntervalTimeline[version String, DataSegment]" map with "used"
* segments. * segments.
*/ */
public Map<String, VersionedIntervalTimeline<String, DataSegment>> getUsedSegmentsTimelinesPerDataSource() public Map<String, SegmentTimeline> getUsedSegmentsTimelinesPerDataSource()
{ {
Preconditions.checkState(dataSourcesSnapshot != null, "dataSourcesSnapshot or usedSegments must be set"); Preconditions.checkState(dataSourcesSnapshot != null, "dataSourcesSnapshot or usedSegments must be set");
return dataSourcesSnapshot.getUsedSegmentsTimelinesPerDataSource(); return dataSourcesSnapshot.getUsedSegmentsTimelinesPerDataSource();
@ -378,7 +378,7 @@ public class DruidCoordinatorRuntimeParams
/** This method must be used in test code only. */ /** This method must be used in test code only. */
@VisibleForTesting @VisibleForTesting
public Builder withUsedSegmentsTimelinesPerDataSourceInTest( public Builder withUsedSegmentsTimelinesPerDataSourceInTest(
Map<String, VersionedIntervalTimeline<String, DataSegment>> usedSegmentsTimelinesPerDataSource Map<String, SegmentTimeline> usedSegmentsTimelinesPerDataSource
) )
{ {
this.dataSourcesSnapshot = DataSourcesSnapshot.fromUsedSegmentsTimelines( this.dataSourcesSnapshot = DataSourcesSnapshot.fromUsedSegmentsTimelines(

View File

@ -45,7 +45,7 @@ import org.apache.druid.server.coordinator.DataSourceCompactionConfig;
import org.apache.druid.server.coordinator.DruidCoordinatorConfig; import org.apache.druid.server.coordinator.DruidCoordinatorConfig;
import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.SegmentTimeline;
import org.joda.time.Interval; import org.joda.time.Interval;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -127,7 +127,7 @@ public class CompactSegments implements CoordinatorCustomDuty
final CoordinatorStats stats = new CoordinatorStats(); final CoordinatorStats stats = new CoordinatorStats();
List<DataSourceCompactionConfig> compactionConfigList = dynamicConfig.getCompactionConfigs(); List<DataSourceCompactionConfig> compactionConfigList = dynamicConfig.getCompactionConfigs();
if (dynamicConfig.getMaxCompactionTaskSlots() > 0) { if (dynamicConfig.getMaxCompactionTaskSlots() > 0) {
Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources = Map<String, SegmentTimeline> dataSources =
params.getUsedSegmentsTimelinesPerDataSource(); params.getUsedSegmentsTimelinesPerDataSource();
if (compactionConfigList != null && !compactionConfigList.isEmpty()) { if (compactionConfigList != null && !compactionConfigList.isEmpty()) {
Map<String, DataSourceCompactionConfig> compactionConfigs = compactionConfigList Map<String, DataSourceCompactionConfig> compactionConfigs = compactionConfigList

View File

@ -20,8 +20,7 @@
package org.apache.druid.server.coordinator.duty; package org.apache.druid.server.coordinator.duty;
import org.apache.druid.server.coordinator.DataSourceCompactionConfig; import org.apache.druid.server.coordinator.DataSourceCompactionConfig;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.joda.time.Interval; import org.joda.time.Interval;
import java.util.List; import java.util.List;
@ -37,7 +36,7 @@ public interface CompactionSegmentSearchPolicy
*/ */
CompactionSegmentIterator reset( CompactionSegmentIterator reset(
Map<String, DataSourceCompactionConfig> compactionConfigs, Map<String, DataSourceCompactionConfig> compactionConfigs,
Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources, Map<String, SegmentTimeline> dataSources,
Map<String, List<Interval>> skipIntervals Map<String, List<Interval>> skipIntervals
); );
} }

View File

@ -21,21 +21,27 @@ package org.apache.druid.server.coordinator.duty;
import org.apache.druid.client.ImmutableDruidDataSource; import org.apache.druid.client.ImmutableDruidDataSource;
import org.apache.druid.client.ImmutableDruidServer; import org.apache.druid.client.ImmutableDruidServer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.server.coordinator.CoordinatorStats; import org.apache.druid.server.coordinator.CoordinatorStats;
import org.apache.druid.server.coordinator.DruidCluster; import org.apache.druid.server.coordinator.DruidCluster;
import org.apache.druid.server.coordinator.DruidCoordinator; import org.apache.druid.server.coordinator.DruidCoordinator;
import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams;
import org.apache.druid.server.coordinator.ServerHolder; import org.apache.druid.server.coordinator.ServerHolder;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.VersionedIntervalTimeline;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
public class MarkAsUnusedOvershadowedSegments implements CoordinatorDuty public class MarkAsUnusedOvershadowedSegments implements CoordinatorDuty
{ {
private static final Logger log = new Logger(MarkAsUnusedOvershadowedSegments.class);
private final DruidCoordinator coordinator; private final DruidCoordinator coordinator;
public MarkAsUnusedOvershadowedSegments(DruidCoordinator coordinator) public MarkAsUnusedOvershadowedSegments(DruidCoordinator coordinator)
@ -48,13 +54,20 @@ public class MarkAsUnusedOvershadowedSegments implements CoordinatorDuty
{ {
// Mark as unused overshadowed segments only if we've had enough time to make sure we aren't flapping with old data. // Mark as unused overshadowed segments only if we've had enough time to make sure we aren't flapping with old data.
if (!params.coordinatorIsLeadingEnoughTimeToMarkAsUnusedOvershadowedSegements()) { if (!params.coordinatorIsLeadingEnoughTimeToMarkAsUnusedOvershadowedSegements()) {
log.info("Skipping MarkAsUnused as coordinator is not leading enough time.");
return params;
}
final Set<DataSegment> allOvershadowedSegments = params.getDataSourcesSnapshot().getOvershadowedSegments();
if (allOvershadowedSegments.isEmpty()) {
log.info("Skipping MarkAsUnused as there are no overshadowed segments.");
return params; return params;
} }
CoordinatorStats stats = new CoordinatorStats(); CoordinatorStats stats = new CoordinatorStats();
DruidCluster cluster = params.getDruidCluster(); DruidCluster cluster = params.getDruidCluster();
Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines = new HashMap<>(); final Map<String, SegmentTimeline> timelines = new HashMap<>();
for (SortedSet<ServerHolder> serverHolders : cluster.getSortedHistoricalsByTier()) { for (SortedSet<ServerHolder> serverHolders : cluster.getSortedHistoricalsByTier()) {
for (ServerHolder serverHolder : serverHolders) { for (ServerHolder serverHolder : serverHolders) {
@ -70,31 +83,30 @@ public class MarkAsUnusedOvershadowedSegments implements CoordinatorDuty
// to prevent unpublished segments from prematurely overshadowing segments. // to prevent unpublished segments from prematurely overshadowing segments.
// Mark all segments as unused in db that are overshadowed by served segments // Mark all segments as unused in db that are overshadowed by served segments
for (DataSegment dataSegment : params.getUsedSegments()) { final Map<String, Set<SegmentId>> datasourceToUnusedSegments = new HashMap<>();
VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSegment.getDataSource()); for (DataSegment dataSegment : allOvershadowedSegments) {
if (timeline != null SegmentTimeline timeline = timelines.get(dataSegment.getDataSource());
&& timeline.isOvershadowed(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment)) { if (timeline != null && timeline.isOvershadowed(dataSegment)) {
coordinator.markSegmentAsUnused(dataSegment); datasourceToUnusedSegments.computeIfAbsent(dataSegment.getDataSource(), ds -> new HashSet<>())
.add(dataSegment.getId());
stats.addToGlobalStat("overShadowedCount", 1); stats.addToGlobalStat("overShadowedCount", 1);
} }
} }
datasourceToUnusedSegments.forEach(coordinator::markSegmentsAsUnused);
return params.buildFromExisting().withCoordinatorStats(stats).build(); return params.buildFromExisting().withCoordinatorStats(stats).build();
} }
private void addSegmentsFromServer( private void addSegmentsFromServer(
ServerHolder serverHolder, ServerHolder serverHolder,
Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines Map<String, SegmentTimeline> timelines
) )
{ {
ImmutableDruidServer server = serverHolder.getServer(); ImmutableDruidServer server = serverHolder.getServer();
for (ImmutableDruidDataSource dataSource : server.getDataSources()) { for (ImmutableDruidDataSource dataSource : server.getDataSources()) {
VersionedIntervalTimeline<String, DataSegment> timeline = timelines SegmentTimeline timeline = timelines
.computeIfAbsent( .computeIfAbsent(dataSource.getName(), dsName -> new SegmentTimeline());
dataSource.getName(),
dsName -> new VersionedIntervalTimeline<>(Comparator.naturalOrder())
);
VersionedIntervalTimeline.addSegments(timeline, dataSource.getSegments().iterator()); VersionedIntervalTimeline.addSegments(timeline, dataSource.getSegments().iterator());
} }
} }

View File

@ -48,6 +48,7 @@ import org.apache.druid.server.coordinator.DataSourceCompactionConfig;
import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.CompactionState;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions; import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.NumberedPartitionChunk; import org.apache.druid.timeline.partition.NumberedPartitionChunk;
@ -103,7 +104,7 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator
NewestSegmentFirstIterator( NewestSegmentFirstIterator(
ObjectMapper objectMapper, ObjectMapper objectMapper,
Map<String, DataSourceCompactionConfig> compactionConfigs, Map<String, DataSourceCompactionConfig> compactionConfigs,
Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources, Map<String, SegmentTimeline> dataSources,
Map<String, List<Interval>> skipIntervals Map<String, List<Interval>> skipIntervals
) )
{ {
@ -111,7 +112,7 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator
this.compactionConfigs = compactionConfigs; this.compactionConfigs = compactionConfigs;
this.timelineIterators = Maps.newHashMapWithExpectedSize(dataSources.size()); this.timelineIterators = Maps.newHashMapWithExpectedSize(dataSources.size());
dataSources.forEach((String dataSource, VersionedIntervalTimeline<String, DataSegment> timeline) -> { dataSources.forEach((String dataSource, SegmentTimeline timeline) -> {
final DataSourceCompactionConfig config = compactionConfigs.get(dataSource); final DataSourceCompactionConfig config = compactionConfigs.get(dataSource);
Granularity configuredSegmentGranularity = null; Granularity configuredSegmentGranularity = null;
if (config != null && !timeline.isEmpty()) { if (config != null && !timeline.isEmpty()) {
@ -121,7 +122,7 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator
Map<Interval, Set<DataSegment>> intervalToPartitionMap = new HashMap<>(); Map<Interval, Set<DataSegment>> intervalToPartitionMap = new HashMap<>();
configuredSegmentGranularity = config.getGranularitySpec().getSegmentGranularity(); configuredSegmentGranularity = config.getGranularitySpec().getSegmentGranularity();
// Create a new timeline to hold segments in the new configured segment granularity // Create a new timeline to hold segments in the new configured segment granularity
VersionedIntervalTimeline<String, DataSegment> timelineWithConfiguredSegmentGranularity = new VersionedIntervalTimeline<>(Comparator.naturalOrder()); SegmentTimeline timelineWithConfiguredSegmentGranularity = new SegmentTimeline();
Set<DataSegment> segments = timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE); Set<DataSegment> segments = timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE);
for (DataSegment segment : segments) { for (DataSegment segment : segments) {
// Convert original segmentGranularity to new granularities bucket by configuredSegmentGranularity // Convert original segmentGranularity to new granularities bucket by configuredSegmentGranularity

View File

@ -21,8 +21,7 @@ package org.apache.druid.server.coordinator.duty;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.druid.server.coordinator.DataSourceCompactionConfig; import org.apache.druid.server.coordinator.DataSourceCompactionConfig;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.joda.time.Interval; import org.joda.time.Interval;
import java.util.List; import java.util.List;
@ -43,7 +42,7 @@ public class NewestSegmentFirstPolicy implements CompactionSegmentSearchPolicy
@Override @Override
public CompactionSegmentIterator reset( public CompactionSegmentIterator reset(
Map<String, DataSourceCompactionConfig> compactionConfigs, Map<String, DataSourceCompactionConfig> compactionConfigs,
Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources, Map<String, SegmentTimeline> dataSources,
Map<String, List<Interval>> skipIntervals Map<String, List<Interval>> skipIntervals
) )
{ {

View File

@ -89,7 +89,7 @@ public class RunRules implements CoordinatorDuty
// eventually will be unloaded from Historical servers. Segments overshadowed by *served* used segments are marked // eventually will be unloaded from Historical servers. Segments overshadowed by *served* used segments are marked
// as unused in MarkAsUnusedOvershadowedSegments, and then eventually Coordinator sends commands to Historical nodes // as unused in MarkAsUnusedOvershadowedSegments, and then eventually Coordinator sends commands to Historical nodes
// to unload such segments in UnloadUnusedSegments. // to unload such segments in UnloadUnusedSegments.
Set<SegmentId> overshadowed = params.getDataSourcesSnapshot().getOvershadowedSegments(); Set<DataSegment> overshadowed = params.getDataSourcesSnapshot().getOvershadowedSegments();
for (String tier : cluster.getTierNames()) { for (String tier : cluster.getTierNames()) {
replicatorThrottler.updateReplicationState(tier); replicatorThrottler.updateReplicationState(tier);
@ -122,7 +122,7 @@ public class RunRules implements CoordinatorDuty
} }
for (DataSegment segment : params.getUsedSegments()) { for (DataSegment segment : params.getUsedSegments()) {
if (overshadowed.contains(segment.getId())) { if (overshadowed.contains(segment)) {
// Skipping overshadowed segments // Skipping overshadowed segments
continue; continue;
} }

View File

@ -182,10 +182,10 @@ public class MetadataResource
final Stream<DataSegment> usedSegments = dataSourcesWithUsedSegments final Stream<DataSegment> usedSegments = dataSourcesWithUsedSegments
.stream() .stream()
.flatMap(t -> t.getSegments().stream()); .flatMap(t -> t.getSegments().stream());
final Set<SegmentId> overshadowedSegments = dataSourcesSnapshot.getOvershadowedSegments(); final Set<DataSegment> overshadowedSegments = dataSourcesSnapshot.getOvershadowedSegments();
final Stream<SegmentWithOvershadowedStatus> usedSegmentsWithOvershadowedStatus = usedSegments final Stream<SegmentWithOvershadowedStatus> usedSegmentsWithOvershadowedStatus = usedSegments
.map(segment -> new SegmentWithOvershadowedStatus(segment, overshadowedSegments.contains(segment.getId()))); .map(segment -> new SegmentWithOvershadowedStatus(segment, overshadowedSegments.contains(segment)));
final Function<SegmentWithOvershadowedStatus, Iterable<ResourceAction>> raGenerator = segment -> Collections final Function<SegmentWithOvershadowedStatus, Iterable<ResourceAction>> raGenerator = segment -> Collections
.singletonList(AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(segment.getDataSegment().getDataSource())); .singletonList(AuthorizationUtils.DATASOURCE_READ_RA_GENERATOR.apply(segment.getDataSegment().getDataSource()));

View File

@ -86,6 +86,7 @@ import org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig;
import org.apache.druid.server.coordinator.UserCompactionTaskTransformConfig; import org.apache.druid.server.coordinator.UserCompactionTaskTransformConfig;
import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.CompactionState;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec;
@ -176,7 +177,7 @@ public class CompactSegmentsTest
private final PartitionsSpec partitionsSpec; private final PartitionsSpec partitionsSpec;
private final BiFunction<Integer, Integer, ShardSpec> shardSpecFactory; private final BiFunction<Integer, Integer, ShardSpec> shardSpecFactory;
private Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources; private Map<String, SegmentTimeline> dataSources;
Map<String, List<DataSegment>> datasourceToSegments = new HashMap<>(); Map<String, List<DataSegment>> datasourceToSegments = new HashMap<>();
public CompactSegmentsTest(PartitionsSpec partitionsSpec, BiFunction<Integer, Integer, ShardSpec> shardSpecFactory) public CompactSegmentsTest(PartitionsSpec partitionsSpec, BiFunction<Integer, Integer, ShardSpec> shardSpecFactory)

View File

@ -117,8 +117,7 @@ public class MarkAsUnusedOvershadowedSegmentsTest
.andReturn(ImmutableSet.of(segmentV1, segmentV2)) .andReturn(ImmutableSet.of(segmentV1, segmentV2))
.anyTimes(); .anyTimes();
EasyMock.expect(druidDataSource.getName()).andReturn("test").anyTimes(); EasyMock.expect(druidDataSource.getName()).andReturn("test").anyTimes();
coordinator.markSegmentAsUnused(segmentV1); coordinator.markSegmentsAsUnused("test", ImmutableSet.of(segmentV1.getId(), segmentV0.getId()));
coordinator.markSegmentAsUnused(segmentV0);
EasyMock.expectLastCall(); EasyMock.expectLastCall();
EasyMock.replay(mockPeon, coordinator, druidServer, druidDataSource); EasyMock.replay(mockPeon, coordinator, druidServer, druidDataSource);

View File

@ -55,7 +55,7 @@ import org.apache.druid.server.coordinator.UserCompactionTaskTransformConfig;
import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.CompactionState;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.Partitions; import org.apache.druid.timeline.Partitions;
import org.apache.druid.timeline.VersionedIntervalTimeline; import org.apache.druid.timeline.SegmentTimeline;
import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.apache.druid.timeline.partition.ShardSpec; import org.apache.druid.timeline.partition.ShardSpec;
import org.assertj.core.api.Assertions; import org.assertj.core.api.Assertions;
@ -315,7 +315,7 @@ public class NewestSegmentFirstPolicyTest
public void testClearSegmentsToCompactWhenSkippingSegments() public void testClearSegmentsToCompactWhenSkippingSegments()
{ {
final long inputSegmentSizeBytes = 800000; final long inputSegmentSizeBytes = 800000;
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-12-03T00:00:00/2017-12-04T00:00:00"), Intervals.of("2017-12-03T00:00:00/2017-12-04T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -359,7 +359,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIfFirstSegmentIsInSkipOffset() public void testIfFirstSegmentIsInSkipOffset()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-12-02T14:00:00/2017-12-03T00:00:00"), Intervals.of("2017-12-02T14:00:00/2017-12-03T00:00:00"),
new Period("PT5H"), new Period("PT5H"),
@ -380,7 +380,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIfFirstSegmentOverlapsSkipOffset() public void testIfFirstSegmentOverlapsSkipOffset()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-12-01T23:00:00/2017-12-03T00:00:00"), Intervals.of("2017-12-01T23:00:00/2017-12-03T00:00:00"),
new Period("PT5H"), new Period("PT5H"),
@ -401,7 +401,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIfSegmentsSkipOffsetWithConfiguredSegmentGranularityEqual() public void testIfSegmentsSkipOffsetWithConfiguredSegmentGranularityEqual()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec(Intervals.of("2017-11-30T23:00:00/2017-12-03T00:00:00"), new Period("P1D")), new SegmentGenerateSpec(Intervals.of("2017-11-30T23:00:00/2017-12-03T00:00:00"), new Period("P1D")),
new SegmentGenerateSpec(Intervals.of("2017-10-14T00:00:00/2017-10-15T00:00:00"), new Period("P1D")) new SegmentGenerateSpec(Intervals.of("2017-10-14T00:00:00/2017-10-15T00:00:00"), new Period("P1D"))
); );
@ -424,7 +424,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIfSegmentsSkipOffsetWithConfiguredSegmentGranularityLarger() public void testIfSegmentsSkipOffsetWithConfiguredSegmentGranularityLarger()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
// This contains segment that // This contains segment that
// - Cross between month boundary of latest month (starts in Nov and ends in Dec). This should be skipped // - Cross between month boundary of latest month (starts in Nov and ends in Dec). This should be skipped
// - Fully in latest month (starts in Dec and ends in Dec). This should be skipped // - Fully in latest month (starts in Dec and ends in Dec). This should be skipped
@ -454,7 +454,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIfSegmentsSkipOffsetWithConfiguredSegmentGranularitySmaller() public void testIfSegmentsSkipOffsetWithConfiguredSegmentGranularitySmaller()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec(Intervals.of("2017-12-01T23:00:00/2017-12-03T00:00:00"), new Period("PT5H")), new SegmentGenerateSpec(Intervals.of("2017-12-01T23:00:00/2017-12-03T00:00:00"), new Period("PT5H")),
new SegmentGenerateSpec(Intervals.of("2017-10-14T00:00:00/2017-10-15T00:00:00"), new Period("PT5H")) new SegmentGenerateSpec(Intervals.of("2017-10-14T00:00:00/2017-10-15T00:00:00"), new Period("PT5H"))
); );
@ -563,7 +563,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIteratorReturnsSegmentsInConfiguredSegmentGranularity() public void testIteratorReturnsSegmentsInConfiguredSegmentGranularity()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
// Segments with day interval from Oct to Dec // Segments with day interval from Oct to Dec
new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-12-31T00:00:00"), new Period("P1D")) new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-12-31T00:00:00"), new Period("P1D"))
); );
@ -611,7 +611,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIteratorReturnsSegmentsInMultipleIntervalIfConfiguredSegmentGranularityCrossBoundary() public void testIteratorReturnsSegmentsInMultipleIntervalIfConfiguredSegmentGranularityCrossBoundary()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec(Intervals.of("2020-01-01/2020-01-08"), new Period("P7D")), new SegmentGenerateSpec(Intervals.of("2020-01-01/2020-01-08"), new Period("P7D")),
new SegmentGenerateSpec(Intervals.of("2020-01-28/2020-02-03"), new Period("P7D")), new SegmentGenerateSpec(Intervals.of("2020-01-28/2020-02-03"), new Period("P7D")),
new SegmentGenerateSpec(Intervals.of("2020-02-08/2020-02-15"), new Period("P7D")) new SegmentGenerateSpec(Intervals.of("2020-02-08/2020-02-15"), new Period("P7D"))
@ -648,7 +648,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIteratorDoesNotReturnCompactedInterval() public void testIteratorDoesNotReturnCompactedInterval()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec(Intervals.of("2017-12-01T00:00:00/2017-12-02T00:00:00"), new Period("P1D")) new SegmentGenerateSpec(Intervals.of("2017-12-01T00:00:00/2017-12-02T00:00:00"), new Period("P1D"))
); );
@ -670,7 +670,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIteratorReturnsAllMixedVersionSegmentsInConfiguredSegmentGranularity() public void testIteratorReturnsAllMixedVersionSegmentsInConfiguredSegmentGranularity()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), "1994-04-29T00:00:00.000Z", null), new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), "1994-04-29T00:00:00.000Z", null),
new SegmentGenerateSpec(Intervals.of("2017-10-01T01:00:00/2017-10-01T02:00:00"), new Period("PT1H"), "1994-04-30T00:00:00.000Z", null) new SegmentGenerateSpec(Intervals.of("2017-10-01T01:00:00/2017-10-01T02:00:00"), new Period("PT1H"), "1994-04-30T00:00:00.000Z", null)
); );
@ -703,7 +703,7 @@ public class NewestSegmentFirstPolicyTest
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null));
// Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -736,7 +736,7 @@ public class NewestSegmentFirstPolicyTest
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null));
// Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -769,7 +769,7 @@ public class NewestSegmentFirstPolicyTest
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null));
// Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -812,7 +812,7 @@ public class NewestSegmentFirstPolicyTest
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null));
// Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -855,7 +855,7 @@ public class NewestSegmentFirstPolicyTest
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null));
// Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -907,7 +907,7 @@ public class NewestSegmentFirstPolicyTest
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null));
// Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -961,7 +961,7 @@ public class NewestSegmentFirstPolicyTest
// rollup=false for interval 2017-10-01T00:00:00/2017-10-02T00:00:00, // rollup=false for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
// rollup=true for interval 2017-10-02T00:00:00/2017-10-03T00:00:00, // rollup=true for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
// and rollup=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (queryGranularity was not set during last compaction) // and rollup=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (queryGranularity was not set during last compaction)
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -1021,7 +1021,7 @@ public class NewestSegmentFirstPolicyTest
// queryGranularity=DAY for interval 2017-10-01T00:00:00/2017-10-02T00:00:00, // queryGranularity=DAY for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
// queryGranularity=MINUTE for interval 2017-10-02T00:00:00/2017-10-03T00:00:00, // queryGranularity=MINUTE for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
// and queryGranularity=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (queryGranularity was not set during last compaction) // and queryGranularity=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (queryGranularity was not set during last compaction)
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -1082,7 +1082,7 @@ public class NewestSegmentFirstPolicyTest
// Dimensions=["foo"] for interval 2017-10-02T00:00:00/2017-10-03T00:00:00, // Dimensions=["foo"] for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
// Dimensions=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (dimensions was not set during last compaction) // Dimensions=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (dimensions was not set during last compaction)
// and dimensionsSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (dimensionsSpec was not set during last compaction) // and dimensionsSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (dimensionsSpec was not set during last compaction)
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -1181,7 +1181,7 @@ public class NewestSegmentFirstPolicyTest
// filter=SelectorDimFilter("dim1", "bar", null) for interval 2017-10-02T00:00:00/2017-10-03T00:00:00, // filter=SelectorDimFilter("dim1", "bar", null) for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
// filter=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (filter was not set during last compaction) // filter=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (filter was not set during last compaction)
// and transformSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (transformSpec was not set during last compaction) // and transformSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (transformSpec was not set during last compaction)
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -1305,7 +1305,7 @@ public class NewestSegmentFirstPolicyTest
// metricsSpec={CountAggregatorFactory("cnt"), LongSumAggregatorFactory("val", "val")} for interval 2017-10-02T00:00:00/2017-10-03T00:00:00, // metricsSpec={CountAggregatorFactory("cnt"), LongSumAggregatorFactory("val", "val")} for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
// metricsSpec=[] for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (filter was not set during last compaction) // metricsSpec=[] for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (filter was not set during last compaction)
// and metricsSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (transformSpec was not set during last compaction) // and metricsSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (transformSpec was not set during last compaction)
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -1414,7 +1414,7 @@ public class NewestSegmentFirstPolicyTest
@Test @Test
public void testIteratorReturnsSegmentsSmallerSegmentGranularityCoveringMultipleSegmentsInTimeline() public void testIteratorReturnsSegmentsSmallerSegmentGranularityCoveringMultipleSegmentsInTimeline()
{ {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), "1994-04-29T00:00:00.000Z", null), new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), "1994-04-29T00:00:00.000Z", null),
new SegmentGenerateSpec(Intervals.of("2017-10-01T01:00:00/2017-10-01T02:00:00"), new Period("PT1H"), "1994-04-30T00:00:00.000Z", null) new SegmentGenerateSpec(Intervals.of("2017-10-01T01:00:00/2017-10-01T02:00:00"), new Period("PT1H"), "1994-04-30T00:00:00.000Z", null)
); );
@ -1450,7 +1450,7 @@ public class NewestSegmentFirstPolicyTest
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null));
// Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -1497,7 +1497,7 @@ public class NewestSegmentFirstPolicyTest
{ {
NullHandling.initializeForTests(); NullHandling.initializeForTests();
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null));
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline( final SegmentTimeline timeline = createTimeline(
new SegmentGenerateSpec( new SegmentGenerateSpec(
Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"),
new Period("P1D"), new Period("P1D"),
@ -1629,9 +1629,7 @@ public class NewestSegmentFirstPolicyTest
} }
} }
private static VersionedIntervalTimeline<String, DataSegment> createTimeline( private static SegmentTimeline createTimeline(SegmentGenerateSpec... specs)
SegmentGenerateSpec... specs
)
{ {
List<DataSegment> segments = new ArrayList<>(); List<DataSegment> segments = new ArrayList<>();
final String version = DateTimes.nowUtc().toString(); final String version = DateTimes.nowUtc().toString();
@ -1671,7 +1669,7 @@ public class NewestSegmentFirstPolicyTest
} }
} }
return VersionedIntervalTimeline.forSegments(segments); return SegmentTimeline.forSegments(segments);
} }
private DataSourceCompactionConfig createCompactionConfig( private DataSourceCompactionConfig createCompactionConfig(

View File

@ -145,12 +145,6 @@ public class TestSegmentsMetadataManager implements SegmentsMetadataManager
return getSnapshotOfDataSourcesWithAllUsedSegments().getDataSourcesWithAllUsedSegments(); return getSnapshotOfDataSourcesWithAllUsedSegments().getDataSourcesWithAllUsedSegments();
} }
@Override
public Set<SegmentId> getOvershadowedSegments()
{
return getSnapshotOfDataSourcesWithAllUsedSegments().getOvershadowedSegments();
}
@Override @Override
public DataSourcesSnapshot getSnapshotOfDataSourcesWithAllUsedSegments() public DataSourcesSnapshot getSnapshotOfDataSourcesWithAllUsedSegments()
{ {