mirror of https://github.com/apache/druid.git
fix merging problem with linear shard specs
This commit is contained in:
parent
12e78955db
commit
3312d9b802
|
@ -21,6 +21,7 @@ package com.metamx.druid.master;
|
||||||
|
|
||||||
import com.google.common.base.Function;
|
import com.google.common.base.Function;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.base.Throwables;
|
||||||
import com.google.common.collect.HashMultiset;
|
import com.google.common.collect.HashMultiset;
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
|
@ -28,6 +29,7 @@ import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import com.google.common.collect.Multiset;
|
import com.google.common.collect.Multiset;
|
||||||
import com.google.common.collect.Ordering;
|
import com.google.common.collect.Ordering;
|
||||||
|
import com.metamx.common.ISE;
|
||||||
import com.metamx.common.Pair;
|
import com.metamx.common.Pair;
|
||||||
import com.metamx.common.guava.FunctionalIterable;
|
import com.metamx.common.guava.FunctionalIterable;
|
||||||
import com.metamx.common.logger.Logger;
|
import com.metamx.common.logger.Logger;
|
||||||
|
@ -36,7 +38,6 @@ import com.metamx.druid.VersionedIntervalTimeline;
|
||||||
import com.metamx.druid.client.DataSegment;
|
import com.metamx.druid.client.DataSegment;
|
||||||
import com.metamx.druid.client.indexing.IndexingServiceClient;
|
import com.metamx.druid.client.indexing.IndexingServiceClient;
|
||||||
import com.metamx.druid.partition.PartitionChunk;
|
import com.metamx.druid.partition.PartitionChunk;
|
||||||
|
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
import org.joda.time.Interval;
|
import org.joda.time.Interval;
|
||||||
|
|
||||||
|
@ -96,13 +97,18 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
// Accumulate timelineObjects greedily until we reach our limits, then backtrack to the maximum complete set
|
// Accumulate timelineObjects greedily until we reach our limits, then backtrack to the maximum complete set
|
||||||
SegmentsToMerge segmentsToMerge = new SegmentsToMerge();
|
SegmentsToMerge segmentsToMerge = new SegmentsToMerge();
|
||||||
|
|
||||||
for(int i = 0 ; i < timelineObjects.size() ; i++) {
|
for (int i = 0; i < timelineObjects.size(); i++) {
|
||||||
|
|
||||||
segmentsToMerge.add(timelineObjects.get(i));
|
try {
|
||||||
|
segmentsToMerge.add(timelineObjects.get(i));
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
log.error("Unable to merge segments for %s", entry.getKey());
|
||||||
|
throw Throwables.propagate(e);
|
||||||
|
}
|
||||||
|
|
||||||
if (segmentsToMerge.getByteCount() > params.getMergeBytesLimit()
|
if (segmentsToMerge.getByteCount() > params.getMergeBytesLimit()
|
||||||
|| segmentsToMerge.getSegmentCount() >= params.getMergeSegmentsLimit())
|
|| segmentsToMerge.getSegmentCount() >= params.getMergeSegmentsLimit()) {
|
||||||
{
|
|
||||||
i -= segmentsToMerge.backtrack(params.getMergeBytesLimit());
|
i -= segmentsToMerge.backtrack(params.getMergeBytesLimit());
|
||||||
|
|
||||||
if (segmentsToMerge.getSegmentCount() > 1) {
|
if (segmentsToMerge.getSegmentCount() > 1) {
|
||||||
|
@ -132,6 +138,7 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Issue merge request for some list of segments.
|
* Issue merge request for some list of segments.
|
||||||
|
*
|
||||||
* @return number of segments merged
|
* @return number of segments merged
|
||||||
*/
|
*/
|
||||||
private int mergeSegments(SegmentsToMerge segmentsToMerge, String dataSource)
|
private int mergeSegments(SegmentsToMerge segmentsToMerge, String dataSource)
|
||||||
|
@ -212,7 +219,6 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
public void add(TimelineObjectHolder<String, DataSegment> timelineObject)
|
public void add(TimelineObjectHolder<String, DataSegment> timelineObject)
|
||||||
{
|
{
|
||||||
final Interval timelineObjectInterval = timelineObject.getInterval();
|
final Interval timelineObjectInterval = timelineObject.getInterval();
|
||||||
final Interval underlyingInterval = timelineObject.getObject().getChunk(0).getObject().getInterval();
|
|
||||||
|
|
||||||
if (timelineObjects.size() > 0) {
|
if (timelineObjects.size() > 0) {
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
|
@ -222,16 +228,22 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(final PartitionChunk<DataSegment> segment : timelineObject.getObject()) {
|
PartitionChunk<DataSegment> firstChunk = Iterables.getFirst(timelineObject.getObject(), null);
|
||||||
|
if (firstChunk == null) {
|
||||||
|
throw new ISE("Unable to find an underlying interval");
|
||||||
|
}
|
||||||
|
Interval underlyingInterval = firstChunk.getObject().getInterval();
|
||||||
|
|
||||||
|
for (final PartitionChunk<DataSegment> segment : timelineObject.getObject()) {
|
||||||
segments.add(segment.getObject());
|
segments.add(segment.getObject());
|
||||||
if(segments.count(segment.getObject()) == 1) {
|
if (segments.count(segment.getObject()) == 1) {
|
||||||
byteCount += segment.getObject().getSize();
|
byteCount += segment.getObject().getSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute new underlying merged interval
|
// Compute new underlying merged interval
|
||||||
final Interval mergedUnderlyingInterval = getMergedUnderlyingInterval();
|
final Interval mergedUnderlyingInterval = getMergedUnderlyingInterval();
|
||||||
if(mergedUnderlyingInterval == null) {
|
if (mergedUnderlyingInterval == null) {
|
||||||
timelineObjects.add(Pair.of(timelineObject, underlyingInterval));
|
timelineObjects.add(Pair.of(timelineObject, underlyingInterval));
|
||||||
} else {
|
} else {
|
||||||
final DateTime start = underlyingInterval.getStart().isBefore(mergedUnderlyingInterval.getStart())
|
final DateTime start = underlyingInterval.getStart().isBefore(mergedUnderlyingInterval.getStart())
|
||||||
|
@ -248,7 +260,7 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
|
|
||||||
public Interval getMergedTimelineInterval()
|
public Interval getMergedTimelineInterval()
|
||||||
{
|
{
|
||||||
if(timelineObjects.isEmpty()) {
|
if (timelineObjects.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new Interval(
|
return new Interval(
|
||||||
|
@ -260,7 +272,7 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
|
|
||||||
public Interval getMergedUnderlyingInterval()
|
public Interval getMergedUnderlyingInterval()
|
||||||
{
|
{
|
||||||
if(timelineObjects.isEmpty()) {
|
if (timelineObjects.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return timelineObjects.get(timelineObjects.size() - 1).rhs;
|
return timelineObjects.get(timelineObjects.size() - 1).rhs;
|
||||||
|
@ -279,6 +291,7 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does this set of segments fully cover union(all segment intervals)?
|
* Does this set of segments fully cover union(all segment intervals)?
|
||||||
|
*
|
||||||
* @return true if this set is complete
|
* @return true if this set is complete
|
||||||
*/
|
*/
|
||||||
public boolean isComplete()
|
public boolean isComplete()
|
||||||
|
@ -288,6 +301,7 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove timelineObjects from this holder until we have a complete set with total size <= maxSize.
|
* Remove timelineObjects from this holder until we have a complete set with total size <= maxSize.
|
||||||
|
*
|
||||||
* @return number of timeline object holders removed
|
* @return number of timeline object holders removed
|
||||||
*/
|
*/
|
||||||
public int backtrack(long maxSize)
|
public int backtrack(long maxSize)
|
||||||
|
@ -296,12 +310,15 @@ public class DruidMasterSegmentMerger implements DruidMasterHelper
|
||||||
|
|
||||||
int removed = 0;
|
int removed = 0;
|
||||||
while (!isComplete() || byteCount > maxSize) {
|
while (!isComplete() || byteCount > maxSize) {
|
||||||
removed ++;
|
removed++;
|
||||||
|
|
||||||
final TimelineObjectHolder<String, DataSegment> removedHolder = timelineObjects.remove(timelineObjects.size() - 1).lhs;
|
final TimelineObjectHolder<String, DataSegment> removedHolder = timelineObjects.remove(
|
||||||
for(final PartitionChunk<DataSegment> segment : removedHolder.getObject()) {
|
timelineObjects.size()
|
||||||
|
- 1
|
||||||
|
).lhs;
|
||||||
|
for (final PartitionChunk<DataSegment> segment : removedHolder.getObject()) {
|
||||||
segments.remove(segment.getObject());
|
segments.remove(segment.getObject());
|
||||||
if(segments.count(segment.getObject()) == 0) {
|
if (segments.count(segment.getObject()) == 0) {
|
||||||
byteCount -= segment.getObject().getSize();
|
byteCount -= segment.getObject().getSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import com.google.common.collect.ImmutableSet;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.metamx.druid.client.DataSegment;
|
import com.metamx.druid.client.DataSegment;
|
||||||
import com.metamx.druid.client.indexing.IndexingServiceClient;
|
import com.metamx.druid.client.indexing.IndexingServiceClient;
|
||||||
|
import com.metamx.druid.shard.LinearShardSpec;
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
import org.joda.time.Interval;
|
import org.joda.time.Interval;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -190,7 +191,7 @@ public class DruidMasterSegmentMergerTest
|
||||||
ImmutableList.of(
|
ImmutableList.of(
|
||||||
ImmutableList.of(segments.get(0), segments.get(1)),
|
ImmutableList.of(segments.get(0), segments.get(1)),
|
||||||
ImmutableList.of(segments.get(2), segments.get(3), segments.get(4), segments.get(5), segments.get(6))
|
ImmutableList.of(segments.get(2), segments.get(3), segments.get(4), segments.get(5), segments.get(6))
|
||||||
), merge(segments)
|
), merge(segments)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -372,6 +373,35 @@ public class DruidMasterSegmentMergerTest
|
||||||
Assert.assertEquals(ImmutableList.of(ImmutableList.of(segments.get(4), segments.get(5))), merge(segments));
|
Assert.assertEquals(ImmutableList.of(ImmutableList.of(segments.get(4), segments.get(5))), merge(segments));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMergeLinearShardSpecs()
|
||||||
|
{
|
||||||
|
final List<DataSegment> segments = ImmutableList.of(
|
||||||
|
DataSegment.builder()
|
||||||
|
.dataSource("foo")
|
||||||
|
.interval(new Interval("2012-01-01/P1D"))
|
||||||
|
.version("1")
|
||||||
|
.shardSpec(new LinearShardSpec(1))
|
||||||
|
.build(),
|
||||||
|
DataSegment.builder()
|
||||||
|
.dataSource("foo")
|
||||||
|
.interval(new Interval("2012-01-02/P1D"))
|
||||||
|
.version("1")
|
||||||
|
.shardSpec(new LinearShardSpec(7))
|
||||||
|
.build(),
|
||||||
|
DataSegment.builder().dataSource("foo")
|
||||||
|
.interval(new Interval("2012-01-03/P1D"))
|
||||||
|
.version("1")
|
||||||
|
.shardSpec(new LinearShardSpec(1500))
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
ImmutableList.of(ImmutableList.of(segments.get(0), segments.get(1), segments.get(2))),
|
||||||
|
merge(segments)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs DruidMasterSegmentMerger on a particular set of segments and returns the list of requested merges.
|
* Runs DruidMasterSegmentMerger on a particular set of segments and returns the list of requested merges.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue