mirror of https://github.com/apache/druid.git
Merge pull request #2107 from jon-wei/fix_smq
More efficient SegmentMetadataQuery
This commit is contained in:
commit
7019d3c421
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
package io.druid.common.guava;
|
package io.druid.common.guava;
|
||||||
|
|
||||||
|
import com.google.common.base.Function;
|
||||||
import com.google.common.collect.Ordering;
|
import com.google.common.collect.Ordering;
|
||||||
import com.metamx.common.guava.Accumulator;
|
import com.metamx.common.guava.Accumulator;
|
||||||
import com.metamx.common.guava.Sequence;
|
import com.metamx.common.guava.Sequence;
|
||||||
|
@ -37,25 +38,29 @@ public class CombiningSequence<T> implements Sequence<T>
|
||||||
public static <T> CombiningSequence<T> create(
|
public static <T> CombiningSequence<T> create(
|
||||||
Sequence<T> baseSequence,
|
Sequence<T> baseSequence,
|
||||||
Ordering<T> ordering,
|
Ordering<T> ordering,
|
||||||
BinaryFn<T, T, T> mergeFn
|
BinaryFn<T, T, T> mergeFn,
|
||||||
|
Function transformFn
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return new CombiningSequence<T>(baseSequence, ordering, mergeFn);
|
return new CombiningSequence<T>(baseSequence, ordering, mergeFn, transformFn);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Sequence<T> baseSequence;
|
private final Sequence<T> baseSequence;
|
||||||
private final Ordering<T> ordering;
|
private final Ordering<T> ordering;
|
||||||
private final BinaryFn<T, T, T> mergeFn;
|
private final BinaryFn<T, T, T> mergeFn;
|
||||||
|
private final Function transformFn;
|
||||||
|
|
||||||
public CombiningSequence(
|
public CombiningSequence(
|
||||||
Sequence<T> baseSequence,
|
Sequence<T> baseSequence,
|
||||||
Ordering<T> ordering,
|
Ordering<T> ordering,
|
||||||
BinaryFn<T, T, T> mergeFn
|
BinaryFn<T, T, T> mergeFn,
|
||||||
|
Function transformFn
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
this.baseSequence = baseSequence;
|
this.baseSequence = baseSequence;
|
||||||
this.ordering = ordering;
|
this.ordering = ordering;
|
||||||
this.mergeFn = mergeFn;
|
this.mergeFn = mergeFn;
|
||||||
|
this.transformFn = transformFn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -117,6 +122,9 @@ public class CombiningSequence<T> implements Sequence<T>
|
||||||
@Override
|
@Override
|
||||||
public OutType get()
|
public OutType get()
|
||||||
{
|
{
|
||||||
|
if (transformFn != null) {
|
||||||
|
return (OutType) transformFn.apply(retVal);
|
||||||
|
}
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -214,7 +214,8 @@ public class CombiningSequenceTest
|
||||||
|
|
||||||
return Pair.of(lhs.lhs, lhs.rhs + rhs.rhs);
|
return Pair.of(lhs.lhs, lhs.rhs + rhs.rhs);
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
null
|
||||||
);
|
);
|
||||||
|
|
||||||
List<Pair<Integer, Integer>> merged = Sequences.toList(seq, Lists.<Pair<Integer, Integer>>newArrayList());
|
List<Pair<Integer, Integer>> merged = Sequences.toList(seq, Lists.<Pair<Integer, Integer>>newArrayList());
|
||||||
|
|
|
@ -30,7 +30,7 @@ There are several main parts to a segment metadata query:
|
||||||
|toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no|
|
|toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no|
|
||||||
|merge|Merge all individual segment metadata results into a single result|no|
|
|merge|Merge all individual segment metadata results into a single result|no|
|
||||||
|context|See [Context](../querying/query-context.html)|no|
|
|context|See [Context](../querying/query-context.html)|no|
|
||||||
|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "size"]. See section [analysisTypes](#analysistypes) for more details.|no|
|
|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "size", "interval"]. See section [analysisTypes](#analysistypes) for more details.|no|
|
||||||
|
|
||||||
The format of the result is:
|
The format of the result is:
|
||||||
|
|
||||||
|
@ -96,7 +96,7 @@ This is a list of properties that determines the amount of information returned
|
||||||
|
|
||||||
By default, all analysis types will be used. If a property is not needed, omitting it from this list will result in a more efficient query.
|
By default, all analysis types will be used. If a property is not needed, omitting it from this list will result in a more efficient query.
|
||||||
|
|
||||||
There are 2 types of column analyses:
|
There are 3 types of column analyses:
|
||||||
|
|
||||||
#### cardinality
|
#### cardinality
|
||||||
|
|
||||||
|
@ -107,3 +107,7 @@ There are 2 types of column analyses:
|
||||||
* Estimated byte size for the segment columns if they were stored in a flat format
|
* Estimated byte size for the segment columns if they were stored in a flat format
|
||||||
|
|
||||||
* Estimated total segment byte size in if it was stored in a flat format
|
* Estimated total segment byte size in if it was stored in a flat format
|
||||||
|
|
||||||
|
#### interval
|
||||||
|
|
||||||
|
* If present, the SegmentMetadataQuery will return the list of intervals associated with the queried segments.
|
||||||
|
|
|
@ -40,7 +40,7 @@ public abstract class ResultMergeQueryRunner<T> extends BySegmentSkippingQueryRu
|
||||||
@Override
|
@Override
|
||||||
public Sequence<T> doRun(QueryRunner<T> baseRunner, Query<T> query, Map<String, Object> context)
|
public Sequence<T> doRun(QueryRunner<T> baseRunner, Query<T> query, Map<String, Object> context)
|
||||||
{
|
{
|
||||||
return CombiningSequence.create(baseRunner.run(query, context), makeOrdering(query), createMergeFn(query));
|
return CombiningSequence.create(baseRunner.run(query, context), makeOrdering(query), createMergeFn(query), null);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Ordering<T> makeOrdering(Query<T> query);
|
protected abstract Ordering<T> makeOrdering(Query<T> query);
|
||||||
|
|
|
@ -281,4 +281,8 @@ public class SegmentAnalyzer
|
||||||
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.CARDINALITY);
|
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.CARDINALITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean analysisHasInterva(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
|
||||||
|
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,9 @@ import com.metamx.common.guava.Sequence;
|
||||||
import com.metamx.common.guava.nary.BinaryFn;
|
import com.metamx.common.guava.nary.BinaryFn;
|
||||||
import com.metamx.emitter.service.ServiceMetricEvent;
|
import com.metamx.emitter.service.ServiceMetricEvent;
|
||||||
import io.druid.collections.OrderedMergeSequence;
|
import io.druid.collections.OrderedMergeSequence;
|
||||||
|
import io.druid.common.guava.CombiningSequence;
|
||||||
import io.druid.common.utils.JodaUtils;
|
import io.druid.common.utils.JodaUtils;
|
||||||
|
import io.druid.data.input.Row;
|
||||||
import io.druid.query.CacheStrategy;
|
import io.druid.query.CacheStrategy;
|
||||||
import io.druid.query.DruidMetrics;
|
import io.druid.query.DruidMetrics;
|
||||||
import io.druid.query.Query;
|
import io.druid.query.Query;
|
||||||
|
@ -77,6 +79,36 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
|
||||||
{
|
{
|
||||||
return new ResultMergeQueryRunner<SegmentAnalysis>(runner)
|
return new ResultMergeQueryRunner<SegmentAnalysis>(runner)
|
||||||
{
|
{
|
||||||
|
private Function<SegmentAnalysis, SegmentAnalysis> transformFn = new Function<SegmentAnalysis, SegmentAnalysis>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public SegmentAnalysis apply(SegmentAnalysis analysis)
|
||||||
|
{
|
||||||
|
return new SegmentAnalysis(
|
||||||
|
analysis.getId(),
|
||||||
|
JodaUtils.condenseIntervals(analysis.getIntervals()),
|
||||||
|
analysis.getColumns(),
|
||||||
|
analysis.getSize(),
|
||||||
|
analysis.getNumRows()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Sequence<SegmentAnalysis> doRun(
|
||||||
|
QueryRunner<SegmentAnalysis> baseRunner,
|
||||||
|
Query<SegmentAnalysis> query,
|
||||||
|
Map<String, Object> context
|
||||||
|
)
|
||||||
|
{
|
||||||
|
return CombiningSequence.create(
|
||||||
|
baseRunner.run(query, context),
|
||||||
|
makeOrdering(query),
|
||||||
|
createMergeFn(query),
|
||||||
|
transformFn
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Ordering<SegmentAnalysis> makeOrdering(Query<SegmentAnalysis> query)
|
protected Ordering<SegmentAnalysis> makeOrdering(Query<SegmentAnalysis> query)
|
||||||
{
|
{
|
||||||
|
@ -115,9 +147,11 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
|
||||||
return arg1;
|
return arg1;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Interval> newIntervals = JodaUtils.condenseIntervals(
|
List<Interval> newIntervals = null;
|
||||||
Iterables.concat(arg1.getIntervals(), arg2.getIntervals())
|
if (query.hasInterval()) {
|
||||||
);
|
newIntervals = arg1.getIntervals();
|
||||||
|
newIntervals.addAll(arg2.getIntervals());
|
||||||
|
}
|
||||||
|
|
||||||
final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
|
final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
|
||||||
final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
|
final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
|
||||||
|
|
|
@ -45,9 +45,11 @@ import io.druid.query.metadata.metadata.SegmentMetadataQuery;
|
||||||
import io.druid.segment.QueryableIndex;
|
import io.druid.segment.QueryableIndex;
|
||||||
import io.druid.segment.Segment;
|
import io.druid.segment.Segment;
|
||||||
import io.druid.segment.StorageAdapter;
|
import io.druid.segment.StorageAdapter;
|
||||||
|
import org.joda.time.Interval;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.CancellationException;
|
import java.util.concurrent.CancellationException;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
@ -117,12 +119,13 @@ public class SegmentMetadataQueryRunnerFactory implements QueryRunnerFactory<Seg
|
||||||
columns.put(columnName, column);
|
columns.put(columnName, column);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
List<Interval> retIntervals = query.hasInterval() ? Arrays.asList(segment.getDataInterval()) : null;
|
||||||
|
|
||||||
return Sequences.simple(
|
return Sequences.simple(
|
||||||
Arrays.asList(
|
Arrays.asList(
|
||||||
new SegmentAnalysis(
|
new SegmentAnalysis(
|
||||||
segment.getIdentifier(),
|
segment.getIdentifier(),
|
||||||
Arrays.asList(segment.getDataInterval()),
|
retIntervals,
|
||||||
columns,
|
columns,
|
||||||
totalSize,
|
totalSize,
|
||||||
numRows
|
numRows
|
||||||
|
|
|
@ -50,7 +50,8 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
||||||
public enum AnalysisType
|
public enum AnalysisType
|
||||||
{
|
{
|
||||||
CARDINALITY,
|
CARDINALITY,
|
||||||
SIZE;
|
SIZE,
|
||||||
|
INTERVAL;
|
||||||
|
|
||||||
@JsonValue
|
@JsonValue
|
||||||
@Override
|
@Override
|
||||||
|
@ -77,7 +78,8 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
||||||
|
|
||||||
public static final EnumSet<AnalysisType> DEFAULT_ANALYSIS_TYPES = EnumSet.of(
|
public static final EnumSet<AnalysisType> DEFAULT_ANALYSIS_TYPES = EnumSet.of(
|
||||||
AnalysisType.CARDINALITY,
|
AnalysisType.CARDINALITY,
|
||||||
AnalysisType.SIZE
|
AnalysisType.SIZE,
|
||||||
|
AnalysisType.INTERVAL
|
||||||
);
|
);
|
||||||
|
|
||||||
private final ColumnIncluderator toInclude;
|
private final ColumnIncluderator toInclude;
|
||||||
|
@ -163,6 +165,11 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
||||||
return analysisTypes.contains(AnalysisType.SIZE);
|
return analysisTypes.contains(AnalysisType.SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean hasInterval()
|
||||||
|
{
|
||||||
|
return analysisTypes.contains(AnalysisType.INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
public byte[] getAnalysisTypesCacheKey()
|
public byte[] getAnalysisTypesCacheKey()
|
||||||
{
|
{
|
||||||
int size = 1;
|
int size = 1;
|
||||||
|
@ -259,6 +266,10 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
||||||
if (usingDefaultInterval != that.usingDefaultInterval) {
|
if (usingDefaultInterval != that.usingDefaultInterval) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!analysisTypes.equals(that.analysisTypes)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return !(toInclude != null ? !toInclude.equals(that.toInclude) : that.toInclude != null);
|
return !(toInclude != null ? !toInclude.equals(that.toInclude) : that.toInclude != null);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -270,6 +281,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
||||||
result = 31 * result + (toInclude != null ? toInclude.hashCode() : 0);
|
result = 31 * result + (toInclude != null ? toInclude.hashCode() : 0);
|
||||||
result = 31 * result + (merge ? 1 : 0);
|
result = 31 * result + (merge ? 1 : 0);
|
||||||
result = 31 * result + (usingDefaultInterval ? 1 : 0);
|
result = 31 * result + (usingDefaultInterval ? 1 : 0);
|
||||||
|
result = 31 * result + analysisTypes.hashCode();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,7 +54,7 @@ public class SegmentMetadataQueryQueryToolChestTest
|
||||||
new SegmentMetadataQueryQueryToolChest(null).getCacheStrategy(query);
|
new SegmentMetadataQueryQueryToolChest(null).getCacheStrategy(query);
|
||||||
|
|
||||||
// Test cache key generation
|
// Test cache key generation
|
||||||
byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x01};
|
byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x01, 0x02};
|
||||||
byte[] actualKey = strategy.computeCacheKey(query);
|
byte[] actualKey = strategy.computeCacheKey(query);
|
||||||
Assert.assertArrayEquals(expectedKey, actualKey);
|
Assert.assertArrayEquals(expectedKey, actualKey);
|
||||||
|
|
||||||
|
|
|
@ -1025,6 +1025,7 @@ public class RealtimePlumber implements Plumber
|
||||||
|
|
||||||
final File persistedFile = indexMerger.persist(
|
final File persistedFile = indexMerger.persist(
|
||||||
indexToPersist.getIndex(),
|
indexToPersist.getIndex(),
|
||||||
|
interval,
|
||||||
new File(computePersistDir(schema, interval), String.valueOf(indexToPersist.getCount())),
|
new File(computePersistDir(schema, interval), String.valueOf(indexToPersist.getCount())),
|
||||||
metaData,
|
metaData,
|
||||||
indexSpec
|
indexSpec
|
||||||
|
|
|
@ -396,21 +396,23 @@ public class RealtimePlumberSchoolTest
|
||||||
Map<Long, Sink> sinks = restoredPlumber.getSinks();
|
Map<Long, Sink> sinks = restoredPlumber.getSinks();
|
||||||
Assert.assertEquals(1, sinks.size());
|
Assert.assertEquals(1, sinks.size());
|
||||||
|
|
||||||
|
|
||||||
List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(new Long(0)));
|
List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(new Long(0)));
|
||||||
DateTime startTime = new DateTime("1970-01-01T00:00:00.000Z");
|
DateTime startTime = new DateTime("1970-01-01T00:00:00.000Z");
|
||||||
|
Interval expectedInterval = new Interval(startTime, new DateTime("1971-01-01T00:00:00.000Z"));
|
||||||
Assert.assertEquals(0, hydrants.get(0).getCount());
|
Assert.assertEquals(0, hydrants.get(0).getCount());
|
||||||
Assert.assertEquals(
|
Assert.assertEquals(
|
||||||
new Interval(startTime, new DateTime("1970-01-01T00:00:00.001Z")),
|
expectedInterval,
|
||||||
hydrants.get(0).getSegment().getDataInterval()
|
hydrants.get(0).getSegment().getDataInterval()
|
||||||
);
|
);
|
||||||
Assert.assertEquals(2, hydrants.get(1).getCount());
|
Assert.assertEquals(2, hydrants.get(1).getCount());
|
||||||
Assert.assertEquals(
|
Assert.assertEquals(
|
||||||
new Interval(startTime, new DateTime("1970-03-01T00:00:00.001Z")),
|
expectedInterval,
|
||||||
hydrants.get(1).getSegment().getDataInterval()
|
hydrants.get(1).getSegment().getDataInterval()
|
||||||
);
|
);
|
||||||
Assert.assertEquals(4, hydrants.get(2).getCount());
|
Assert.assertEquals(4, hydrants.get(2).getCount());
|
||||||
Assert.assertEquals(
|
Assert.assertEquals(
|
||||||
new Interval(startTime, new DateTime("1970-05-01T00:00:00.001Z")),
|
expectedInterval,
|
||||||
hydrants.get(2).getSegment().getDataInterval()
|
hydrants.get(2).getSegment().getDataInterval()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue