mirror of https://github.com/apache/druid.git
Skip compaction for intervals without data (#15676)
* Skip compaction for intervals with a single tombstone and no data
This commit is contained in:
parent
072b16c6df
commit
67720b60ae
|
@ -339,6 +339,12 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator
|
|||
while (compactibleSegmentIterator.hasNext()) {
|
||||
List<DataSegment> segments = compactibleSegmentIterator.next();
|
||||
|
||||
// Do not compact an interval which comprises of a single tombstone
|
||||
// If there are multiple tombstones in the interval, we may still want to compact them
|
||||
if (segments.size() == 1 && segments.get(0).isTombstone()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final SegmentsToCompact candidates = SegmentsToCompact.from(segments);
|
||||
final Interval interval = candidates.getUmbrellaInterval();
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.druid.timeline.Partitions;
|
|||
import org.apache.druid.timeline.SegmentTimeline;
|
||||
import org.apache.druid.timeline.partition.NumberedShardSpec;
|
||||
import org.apache.druid.timeline.partition.ShardSpec;
|
||||
import org.apache.druid.timeline.partition.TombstoneShardSpec;
|
||||
import org.apache.druid.utils.Streams;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.Interval;
|
||||
|
@ -1764,6 +1765,118 @@ public class NewestSegmentFirstPolicyTest
|
|||
Assert.assertFalse(iterator.hasNext());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipCompactionForIntervalsContainingSingleTombstone()
|
||||
{
|
||||
final DataSegment tombstone2023 = new DataSegment(
|
||||
DATA_SOURCE,
|
||||
Intervals.of("2023/2024"),
|
||||
"0",
|
||||
new HashMap<>(),
|
||||
new ArrayList<>(),
|
||||
new ArrayList<>(),
|
||||
TombstoneShardSpec.INSTANCE,
|
||||
0,
|
||||
1);
|
||||
final DataSegment dataSegment2023 = new DataSegment(
|
||||
DATA_SOURCE,
|
||||
Intervals.of("2023/2024"),
|
||||
"0",
|
||||
new HashMap<>(),
|
||||
new ArrayList<>(),
|
||||
new ArrayList<>(),
|
||||
new NumberedShardSpec(1, 0),
|
||||
0,
|
||||
100);
|
||||
final DataSegment tombstone2024 = new DataSegment(
|
||||
DATA_SOURCE,
|
||||
Intervals.of("2024/2025"),
|
||||
"0",
|
||||
new HashMap<>(),
|
||||
new ArrayList<>(),
|
||||
new ArrayList<>(),
|
||||
TombstoneShardSpec.INSTANCE,
|
||||
0,
|
||||
1);
|
||||
|
||||
CompactionSegmentIterator iterator = policy.reset(
|
||||
ImmutableMap.of(DATA_SOURCE,
|
||||
createCompactionConfig(10000,
|
||||
new Period("P0D"),
|
||||
new UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null)
|
||||
)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
DATA_SOURCE,
|
||||
SegmentTimeline.forSegments(ImmutableSet.of(tombstone2023, dataSegment2023, tombstone2024))
|
||||
),
|
||||
Collections.emptyMap()
|
||||
);
|
||||
|
||||
// Skips 2024/2025 since it has a single tombstone and no data.
|
||||
// Return all segments in 2023/2024 since at least one of them has data despite there being a tombstone.
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(tombstone2023, dataSegment2023),
|
||||
iterator.next().getSegments()
|
||||
);
|
||||
|
||||
final DataSegment tombstone2025Jan = new DataSegment(
|
||||
DATA_SOURCE,
|
||||
Intervals.of("2025-01-01/2025-02-01"),
|
||||
"0",
|
||||
new HashMap<>(),
|
||||
new ArrayList<>(),
|
||||
new ArrayList<>(),
|
||||
TombstoneShardSpec.INSTANCE,
|
||||
0,
|
||||
1);
|
||||
final DataSegment tombstone2025Feb = new DataSegment(
|
||||
DATA_SOURCE,
|
||||
Intervals.of("2025-02-01/2025-03-01"),
|
||||
"0",
|
||||
new HashMap<>(),
|
||||
new ArrayList<>(),
|
||||
new ArrayList<>(),
|
||||
TombstoneShardSpec.INSTANCE,
|
||||
0,
|
||||
1);
|
||||
final DataSegment tombstone2025Mar = new DataSegment(
|
||||
DATA_SOURCE,
|
||||
Intervals.of("2025-03-01/2025-04-01"),
|
||||
"0",
|
||||
new HashMap<>(),
|
||||
new ArrayList<>(),
|
||||
new ArrayList<>(),
|
||||
TombstoneShardSpec.INSTANCE,
|
||||
0,
|
||||
1);
|
||||
iterator = policy.reset(
|
||||
ImmutableMap.of(DATA_SOURCE,
|
||||
createCompactionConfig(10000,
|
||||
new Period("P0D"),
|
||||
new UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null)
|
||||
)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
DATA_SOURCE,
|
||||
SegmentTimeline.forSegments(ImmutableSet.of(
|
||||
tombstone2023,
|
||||
dataSegment2023,
|
||||
tombstone2024,
|
||||
tombstone2025Jan,
|
||||
tombstone2025Feb,
|
||||
tombstone2025Mar
|
||||
))
|
||||
),
|
||||
Collections.emptyMap()
|
||||
);
|
||||
// Does not skip the tombstones in 2025 since there are multiple of them which could potentially be condensed to one
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(tombstone2025Jan, tombstone2025Feb, tombstone2025Mar),
|
||||
iterator.next().getSegments()
|
||||
);
|
||||
}
|
||||
|
||||
private static void assertCompactSegmentIntervals(
|
||||
CompactionSegmentIterator iterator,
|
||||
Period segmentPeriod,
|
||||
|
|
Loading…
Reference in New Issue