mirror of https://github.com/apache/druid.git
Merge pull request #568 from metamx/fix-topn
Fix inverted lexicographic topns and topns with dim extraction dimension specs
This commit is contained in:
commit
18685542ec
|
@ -76,6 +76,12 @@ public class DefaultDimensionSpec implements DimensionSpec
|
||||||
.array();
|
.array();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean preservesOrdering()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
|
|
@ -33,7 +33,12 @@ import io.druid.query.extraction.DimExtractionFn;
|
||||||
public interface DimensionSpec
|
public interface DimensionSpec
|
||||||
{
|
{
|
||||||
public String getDimension();
|
public String getDimension();
|
||||||
|
|
||||||
public String getOutputName();
|
public String getOutputName();
|
||||||
|
|
||||||
public DimExtractionFn getDimExtractionFn();
|
public DimExtractionFn getDimExtractionFn();
|
||||||
|
|
||||||
public byte[] getCacheKey();
|
public byte[] getCacheKey();
|
||||||
|
|
||||||
|
public boolean preservesOrdering();
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,6 +83,12 @@ public class ExtractionDimensionSpec implements DimensionSpec
|
||||||
.array();
|
.array();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean preservesOrdering()
|
||||||
|
{
|
||||||
|
return dimExtractionFn.preservesOrdering();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
|
|
@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*/
|
*/
|
||||||
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property="type")
|
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
|
||||||
@JsonSubTypes(value = {
|
@JsonSubTypes(value = {
|
||||||
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
|
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
|
||||||
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
|
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
|
||||||
|
@ -35,5 +35,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||||
public interface DimExtractionFn
|
public interface DimExtractionFn
|
||||||
{
|
{
|
||||||
public byte[] getCacheKey();
|
public byte[] getCacheKey();
|
||||||
|
|
||||||
public String apply(String dimValue);
|
public String apply(String dimValue);
|
||||||
|
|
||||||
|
public boolean preservesOrdering();
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,4 +92,10 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
|
||||||
{
|
{
|
||||||
return fn.apply(dimValue);
|
return fn.apply(dimValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean preservesOrdering()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,6 +67,12 @@ public class PartialDimExtractionFn implements DimExtractionFn
|
||||||
return expr;
|
return expr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean preservesOrdering()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
|
|
@ -67,6 +67,12 @@ public class RegexDimExtractionFn implements DimExtractionFn
|
||||||
return expr;
|
return expr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean preservesOrdering()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
|
|
@ -63,6 +63,12 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn
|
||||||
return searchQuerySpec.accept(dimValue) ? dimValue : null;
|
return searchQuerySpec.accept(dimValue) ? dimValue : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean preservesOrdering()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
|
|
@ -87,6 +87,12 @@ public class TimeDimExtractionFn implements DimExtractionFn
|
||||||
return resultFormat;
|
return resultFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean preservesOrdering()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
|
|
@ -31,7 +31,6 @@ import io.druid.segment.DimensionSelector;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -41,7 +40,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
|
||||||
{
|
{
|
||||||
private final Capabilities capabilities;
|
private final Capabilities capabilities;
|
||||||
private final TopNQuery query;
|
private final TopNQuery query;
|
||||||
private final Comparator<?> comparator;
|
|
||||||
private final StupidPool<ByteBuffer> bufferPool;
|
private final StupidPool<ByteBuffer> bufferPool;
|
||||||
|
|
||||||
public AggregateTopNMetricFirstAlgorithm(
|
public AggregateTopNMetricFirstAlgorithm(
|
||||||
|
@ -52,8 +50,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
|
||||||
{
|
{
|
||||||
this.capabilities = capabilities;
|
this.capabilities = capabilities;
|
||||||
this.query = query;
|
this.query = query;
|
||||||
this.comparator = query.getTopNMetricSpec()
|
|
||||||
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
|
|
||||||
this.bufferPool = bufferPool;
|
this.bufferPool = bufferPool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,7 +58,12 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
|
||||||
DimensionSelector dimSelector, Cursor cursor
|
DimensionSelector dimSelector, Cursor cursor
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return new TopNParams(dimSelector, cursor, dimSelector.getValueCardinality(), Integer.MAX_VALUE);
|
return new TopNParams(
|
||||||
|
dimSelector,
|
||||||
|
cursor,
|
||||||
|
dimSelector.getValueCardinality(),
|
||||||
|
Integer.MAX_VALUE
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -234,7 +234,7 @@ public abstract class BaseTopNAlgorithm<DimValSelector, DimValAggregateStore, Pa
|
||||||
|
|
||||||
public static TopNResultBuilder makeResultBuilder(TopNParams params, TopNQuery query)
|
public static TopNResultBuilder makeResultBuilder(TopNParams params, TopNQuery query)
|
||||||
{
|
{
|
||||||
Comparator comparator = query.getTopNMetricSpec()
|
final Comparator comparator = query.getTopNMetricSpec()
|
||||||
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
|
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
|
||||||
return query.getTopNMetricSpec().getResultBuilder(
|
return query.getTopNMetricSpec().getResultBuilder(
|
||||||
params.getCursor().getTime(),
|
params.getCursor().getTime(),
|
||||||
|
|
|
@ -26,7 +26,6 @@ import io.druid.segment.Cursor;
|
||||||
import io.druid.segment.DimensionSelector;
|
import io.druid.segment.DimensionSelector;
|
||||||
import io.druid.segment.data.IndexedInts;
|
import io.druid.segment.data.IndexedInts;
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -34,7 +33,6 @@ import java.util.Map;
|
||||||
public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][], Map<String, Aggregator[]>, TopNParams>
|
public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][], Map<String, Aggregator[]>, TopNParams>
|
||||||
{
|
{
|
||||||
private final TopNQuery query;
|
private final TopNQuery query;
|
||||||
private final Comparator<?> comparator;
|
|
||||||
|
|
||||||
public DimExtractionTopNAlgorithm(
|
public DimExtractionTopNAlgorithm(
|
||||||
Capabilities capabilities,
|
Capabilities capabilities,
|
||||||
|
@ -44,8 +42,6 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
|
||||||
super(capabilities);
|
super(capabilities);
|
||||||
|
|
||||||
this.query = query;
|
this.query = query;
|
||||||
this.comparator = query.getTopNMetricSpec()
|
|
||||||
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -53,15 +49,28 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
|
||||||
final DimensionSelector dimSelector, final Cursor cursor
|
final DimensionSelector dimSelector, final Cursor cursor
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return new TopNParams(dimSelector, cursor, dimSelector.getValueCardinality(), Integer.MAX_VALUE);
|
return new TopNParams(
|
||||||
|
dimSelector,
|
||||||
|
cursor,
|
||||||
|
dimSelector.getValueCardinality(),
|
||||||
|
Integer.MAX_VALUE
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess)
|
protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess)
|
||||||
{
|
{
|
||||||
return query.getTopNMetricSpec().configureOptimizer(
|
final AggregatorArrayProvider provider = new AggregatorArrayProvider(
|
||||||
new AggregatorArrayProvider(params.getDimSelector(), query, params.getCardinality())
|
params.getDimSelector(),
|
||||||
).build();
|
query,
|
||||||
|
params.getCardinality()
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!query.getDimensionSpec().preservesOrdering()) {
|
||||||
|
return provider.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
return query.getTopNMetricSpec().configureOptimizer(provider).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,6 +21,7 @@ package io.druid.query.topn;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.google.common.collect.Ordering;
|
||||||
import com.metamx.common.guava.Comparators;
|
import com.metamx.common.guava.Comparators;
|
||||||
import io.druid.query.aggregation.AggregatorFactory;
|
import io.druid.query.aggregation.AggregatorFactory;
|
||||||
import io.druid.query.aggregation.PostAggregator;
|
import io.druid.query.aggregation.PostAggregator;
|
||||||
|
@ -63,11 +64,27 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator getComparator(
|
public Comparator getComparator(
|
||||||
List<AggregatorFactory> aggregatorSpecs,
|
final List<AggregatorFactory> aggregatorSpecs,
|
||||||
List<PostAggregator> postAggregatorSpecs
|
final List<PostAggregator> postAggregatorSpecs
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return Comparators.inverse(delegate.getComparator(aggregatorSpecs, postAggregatorSpecs));
|
return Comparators.inverse(
|
||||||
|
new Comparator()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public int compare(Object o1, Object o2)
|
||||||
|
{
|
||||||
|
// nulls last
|
||||||
|
if (o1 == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (o2 == null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return delegate.getComparator(aggregatorSpecs, postAggregatorSpecs).compare(o1, o2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -80,7 +97,14 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
|
||||||
List<PostAggregator> postAggs
|
List<PostAggregator> postAggs
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return delegate.getResultBuilder(timestamp, dimSpec, threshold, comparator, aggFactories, postAggs);
|
return delegate.getResultBuilder(
|
||||||
|
timestamp,
|
||||||
|
dimSpec,
|
||||||
|
threshold,
|
||||||
|
comparator,
|
||||||
|
aggFactories,
|
||||||
|
postAggs
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -94,6 +118,9 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
|
||||||
@Override
|
@Override
|
||||||
public <T> TopNMetricSpecBuilder<T> configureOptimizer(TopNMetricSpecBuilder<T> builder)
|
public <T> TopNMetricSpecBuilder<T> configureOptimizer(TopNMetricSpecBuilder<T> builder)
|
||||||
{
|
{
|
||||||
|
if (!canBeOptimizedUnordered()) {
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
return delegate.configureOptimizer(builder);
|
return delegate.configureOptimizer(builder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,6 +136,12 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
|
||||||
return delegate.getMetricName(dimSpec);
|
return delegate.getMetricName(dimSpec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canBeOptimizedUnordered()
|
||||||
|
{
|
||||||
|
return delegate.canBeOptimizedUnordered();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o)
|
public boolean equals(Object o)
|
||||||
{
|
{
|
||||||
|
|
|
@ -43,6 +43,13 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
||||||
@Override
|
@Override
|
||||||
public int compare(String s, String s2)
|
public int compare(String s, String s2)
|
||||||
{
|
{
|
||||||
|
// null first
|
||||||
|
if (s == null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (s2 == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8));
|
return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -54,7 +61,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
||||||
@JsonProperty("previousStop") String previousStop
|
@JsonProperty("previousStop") String previousStop
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
this.previousStop = (previousStop == null) ? "" : previousStop;
|
this.previousStop = previousStop;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -85,13 +92,20 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
||||||
List<PostAggregator> postAggs
|
List<PostAggregator> postAggs
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return new TopNLexicographicResultBuilder(timestamp, dimSpec, threshold, previousStop, comparator, aggFactories);
|
return new TopNLexicographicResultBuilder(
|
||||||
|
timestamp,
|
||||||
|
dimSpec,
|
||||||
|
threshold,
|
||||||
|
previousStop,
|
||||||
|
comparator,
|
||||||
|
aggFactories
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public byte[] getCacheKey()
|
public byte[] getCacheKey()
|
||||||
{
|
{
|
||||||
byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8);
|
byte[] previousStopBytes = previousStop == null ? new byte[]{} : previousStop.getBytes(Charsets.UTF_8);
|
||||||
|
|
||||||
return ByteBuffer.allocate(1 + previousStopBytes.length)
|
return ByteBuffer.allocate(1 + previousStopBytes.length)
|
||||||
.put(CACHE_TYPE_ID)
|
.put(CACHE_TYPE_ID)
|
||||||
|
@ -119,6 +133,12 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
||||||
return dimSpec.getOutputName();
|
return dimSpec.getOutputName();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canBeOptimizedUnordered()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
@ -130,12 +150,18 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o)
|
public boolean equals(Object o)
|
||||||
{
|
{
|
||||||
if (this == o) return true;
|
if (this == o) {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
return true;
|
||||||
|
}
|
||||||
|
if (o == null || getClass() != o.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
LexicographicTopNMetricSpec that = (LexicographicTopNMetricSpec) o;
|
LexicographicTopNMetricSpec that = (LexicographicTopNMetricSpec) o;
|
||||||
|
|
||||||
if (previousStop != null ? !previousStop.equals(that.previousStop) : that.previousStop != null) return false;
|
if (previousStop != null ? !previousStop.equals(that.previousStop) : that.previousStop != null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -158,6 +158,12 @@ public class NumericTopNMetricSpec implements TopNMetricSpec
|
||||||
return metric;
|
return metric;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canBeOptimizedUnordered()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
|
|
@ -31,7 +31,6 @@ import io.druid.segment.data.IndexedInts;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*/
|
*/
|
||||||
|
@ -40,7 +39,6 @@ public class PooledTopNAlgorithm
|
||||||
{
|
{
|
||||||
private final Capabilities capabilities;
|
private final Capabilities capabilities;
|
||||||
private final TopNQuery query;
|
private final TopNQuery query;
|
||||||
private final Comparator<?> comparator;
|
|
||||||
private final StupidPool<ByteBuffer> bufferPool;
|
private final StupidPool<ByteBuffer> bufferPool;
|
||||||
|
|
||||||
public PooledTopNAlgorithm(
|
public PooledTopNAlgorithm(
|
||||||
|
@ -53,8 +51,6 @@ public class PooledTopNAlgorithm
|
||||||
|
|
||||||
this.capabilities = capabilities;
|
this.capabilities = capabilities;
|
||||||
this.query = query;
|
this.query = query;
|
||||||
this.comparator = query.getTopNMetricSpec()
|
|
||||||
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
|
|
||||||
this.bufferPool = bufferPool;
|
this.bufferPool = bufferPool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,12 +111,15 @@ public class PooledTopNAlgorithm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int[] makeDimValSelector(PooledTopNParams params, int numProcessed, int numToProcess)
|
protected int[] makeDimValSelector(PooledTopNParams params, int numProcessed, int numToProcess)
|
||||||
{
|
{
|
||||||
final TopNMetricSpecBuilder<int[]> arrayProvider = params.getArrayProvider();
|
final TopNMetricSpecBuilder<int[]> arrayProvider = params.getArrayProvider();
|
||||||
|
|
||||||
|
if (!query.getDimensionSpec().preservesOrdering()) {
|
||||||
|
return arrayProvider.build();
|
||||||
|
}
|
||||||
|
|
||||||
arrayProvider.ignoreFirstN(numProcessed);
|
arrayProvider.ignoreFirstN(numProcessed);
|
||||||
arrayProvider.keepOnlyN(numToProcess);
|
arrayProvider.keepOnlyN(numToProcess);
|
||||||
return query.getTopNMetricSpec().configureOptimizer(arrayProvider).build();
|
return query.getTopNMetricSpec().configureOptimizer(arrayProvider).build();
|
||||||
|
|
|
@ -23,7 +23,6 @@ import com.google.common.collect.Maps;
|
||||||
import com.google.common.collect.MinMaxPriorityQueue;
|
import com.google.common.collect.MinMaxPriorityQueue;
|
||||||
import io.druid.query.Result;
|
import io.druid.query.Result;
|
||||||
import io.druid.query.aggregation.AggregatorFactory;
|
import io.druid.query.aggregation.AggregatorFactory;
|
||||||
import io.druid.query.aggregation.PostAggregator;
|
|
||||||
import io.druid.query.dimension.DimensionSpec;
|
import io.druid.query.dimension.DimensionSpec;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
|
||||||
|
@ -40,6 +39,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
||||||
private final DateTime timestamp;
|
private final DateTime timestamp;
|
||||||
private final DimensionSpec dimSpec;
|
private final DimensionSpec dimSpec;
|
||||||
private final String previousStop;
|
private final String previousStop;
|
||||||
|
private final Comparator comparator;
|
||||||
private final List<AggregatorFactory> aggFactories;
|
private final List<AggregatorFactory> aggFactories;
|
||||||
private MinMaxPriorityQueue<DimValHolder> pQueue = null;
|
private MinMaxPriorityQueue<DimValHolder> pQueue = null;
|
||||||
|
|
||||||
|
@ -55,6 +55,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
||||||
this.timestamp = timestamp;
|
this.timestamp = timestamp;
|
||||||
this.dimSpec = dimSpec;
|
this.dimSpec = dimSpec;
|
||||||
this.previousStop = previousStop;
|
this.previousStop = previousStop;
|
||||||
|
this.comparator = comparator;
|
||||||
this.aggFactories = aggFactories;
|
this.aggFactories = aggFactories;
|
||||||
|
|
||||||
instantiatePQueue(threshold, comparator);
|
instantiatePQueue(threshold, comparator);
|
||||||
|
@ -69,7 +70,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
||||||
{
|
{
|
||||||
Map<String, Object> metricValues = Maps.newLinkedHashMap();
|
Map<String, Object> metricValues = Maps.newLinkedHashMap();
|
||||||
|
|
||||||
if (dimName.compareTo(previousStop) > 0) {
|
if (comparator.compare(dimName, previousStop) > 0) {
|
||||||
metricValues.put(dimSpec.getOutputName(), dimName);
|
metricValues.put(dimSpec.getOutputName(), dimName);
|
||||||
Iterator<AggregatorFactory> aggsIter = aggFactories.iterator();
|
Iterator<AggregatorFactory> aggsIter = aggFactories.iterator();
|
||||||
for (Object metricVal : metricVals) {
|
for (Object metricVal : metricVals) {
|
||||||
|
|
|
@ -60,4 +60,6 @@ public interface TopNMetricSpec
|
||||||
public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector);
|
public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector);
|
||||||
|
|
||||||
public String getMetricName(DimensionSpec dimSpec);
|
public String getMetricName(DimensionSpec dimSpec);
|
||||||
|
|
||||||
|
public boolean canBeOptimizedUnordered();
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,12 @@ public class TopNParams
|
||||||
private final int cardinality;
|
private final int cardinality;
|
||||||
private final int numValuesPerPass;
|
private final int numValuesPerPass;
|
||||||
|
|
||||||
protected TopNParams(DimensionSelector dimSelector, Cursor cursor, int cardinality, int numValuesPerPass)
|
protected TopNParams(
|
||||||
|
DimensionSelector dimSelector,
|
||||||
|
Cursor cursor,
|
||||||
|
int cardinality,
|
||||||
|
int numValuesPerPass
|
||||||
|
)
|
||||||
{
|
{
|
||||||
this.dimSelector = dimSelector;
|
this.dimSelector = dimSelector;
|
||||||
this.cursor = cursor;
|
this.cursor = cursor;
|
||||||
|
|
|
@ -1077,6 +1077,88 @@ public class TopNQueryRunnerTest
|
||||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTopNInvertedLexicographicWithPreviousStop()
|
||||||
|
{
|
||||||
|
TopNQuery query = new TopNQueryBuilder()
|
||||||
|
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||||
|
.granularity(QueryRunnerTestHelper.allGran)
|
||||||
|
.dimension(providerDimension)
|
||||||
|
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("upfront")))
|
||||||
|
.threshold(4)
|
||||||
|
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||||
|
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||||
|
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||||
|
new Result<TopNResultValue>(
|
||||||
|
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||||
|
new TopNResultValue(
|
||||||
|
Arrays.<Map<String, Object>>asList(
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "total_market",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 5351.814697265625D,
|
||||||
|
"addRowsIndexConstant", 5356.814697265625D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "spot",
|
||||||
|
"rows", 18L,
|
||||||
|
"index", 2231.8768157958984D,
|
||||||
|
"addRowsIndexConstant", 2250.8768157958984D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTopNInvertedLexicographicWithNonExistingPreviousStop()
|
||||||
|
{
|
||||||
|
TopNQuery query = new TopNQueryBuilder()
|
||||||
|
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||||
|
.granularity(QueryRunnerTestHelper.allGran)
|
||||||
|
.dimension(providerDimension)
|
||||||
|
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u")))
|
||||||
|
.threshold(4)
|
||||||
|
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||||
|
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||||
|
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||||
|
new Result<TopNResultValue>(
|
||||||
|
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||||
|
new TopNResultValue(
|
||||||
|
Arrays.<Map<String, Object>>asList(
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "total_market",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 5351.814697265625D,
|
||||||
|
"addRowsIndexConstant", 5356.814697265625D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "spot",
|
||||||
|
"rows", 18L,
|
||||||
|
"index", 2231.8768157958984D,
|
||||||
|
"addRowsIndexConstant", 2250.8768157958984D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTopNDimExtraction()
|
public void testTopNDimExtraction()
|
||||||
{
|
{
|
||||||
|
@ -1129,6 +1211,246 @@ public class TopNQueryRunnerTest
|
||||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTopNLexicographicDimExtraction()
|
||||||
|
{
|
||||||
|
TopNQuery query = new TopNQueryBuilder()
|
||||||
|
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||||
|
.granularity(QueryRunnerTestHelper.allGran)
|
||||||
|
.dimension(
|
||||||
|
new ExtractionDimensionSpec(
|
||||||
|
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.metric(new LexicographicTopNMetricSpec(null))
|
||||||
|
.threshold(4)
|
||||||
|
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||||
|
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||||
|
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||||
|
new Result<TopNResultValue>(
|
||||||
|
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||||
|
new TopNResultValue(
|
||||||
|
Arrays.<Map<String, Object>>asList(
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "s",
|
||||||
|
"rows", 18L,
|
||||||
|
"index", 2231.8768157958984D,
|
||||||
|
"addRowsIndexConstant", 2250.8768157958984D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "t",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 5351.814697265625D,
|
||||||
|
"addRowsIndexConstant", 5356.814697265625D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "u",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 4875.669677734375D,
|
||||||
|
"addRowsIndexConstant", 4880.669677734375D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvertedTopNLexicographicDimExtraction2()
|
||||||
|
{
|
||||||
|
TopNQuery query = new TopNQueryBuilder()
|
||||||
|
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||||
|
.granularity(QueryRunnerTestHelper.allGran)
|
||||||
|
.dimension(
|
||||||
|
new ExtractionDimensionSpec(
|
||||||
|
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null)))
|
||||||
|
.threshold(4)
|
||||||
|
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||||
|
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||||
|
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||||
|
new Result<TopNResultValue>(
|
||||||
|
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||||
|
new TopNResultValue(
|
||||||
|
Arrays.<Map<String, Object>>asList(
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "t",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 5351.814697265625D,
|
||||||
|
"addRowsIndexConstant", 5356.814697265625D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "o",
|
||||||
|
"rows", 18L,
|
||||||
|
"index", 2231.8768157958984D,
|
||||||
|
"addRowsIndexConstant", 2250.8768157958984D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "f",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 4875.669677734375D,
|
||||||
|
"addRowsIndexConstant", 4880.669677734375D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTopNLexicographicDimExtractionWithPreviousStop()
|
||||||
|
{
|
||||||
|
TopNQuery query = new TopNQueryBuilder()
|
||||||
|
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||||
|
.granularity(QueryRunnerTestHelper.allGran)
|
||||||
|
.dimension(
|
||||||
|
new ExtractionDimensionSpec(
|
||||||
|
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.metric(new LexicographicTopNMetricSpec("spot"))
|
||||||
|
.threshold(4)
|
||||||
|
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||||
|
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||||
|
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||||
|
new Result<TopNResultValue>(
|
||||||
|
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||||
|
new TopNResultValue(
|
||||||
|
Arrays.<Map<String, Object>>asList(
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "t",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 5351.814697265625D,
|
||||||
|
"addRowsIndexConstant", 5356.814697265625D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "u",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 4875.669677734375D,
|
||||||
|
"addRowsIndexConstant", 4880.669677734375D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvertedTopNLexicographicDimExtractionWithPreviousStop()
|
||||||
|
{
|
||||||
|
TopNQuery query = new TopNQueryBuilder()
|
||||||
|
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||||
|
.granularity(QueryRunnerTestHelper.allGran)
|
||||||
|
.dimension(
|
||||||
|
new ExtractionDimensionSpec(
|
||||||
|
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u")))
|
||||||
|
.threshold(4)
|
||||||
|
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||||
|
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||||
|
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||||
|
new Result<TopNResultValue>(
|
||||||
|
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||||
|
new TopNResultValue(
|
||||||
|
Arrays.<Map<String, Object>>asList(
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "t",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 5351.814697265625D,
|
||||||
|
"addRowsIndexConstant", 5356.814697265625D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "s",
|
||||||
|
"rows", 18L,
|
||||||
|
"index", 2231.8768157958984D,
|
||||||
|
"addRowsIndexConstant", 2250.8768157958984D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvertedTopNLexicographicDimExtractionWithPreviousStop2()
|
||||||
|
{
|
||||||
|
TopNQuery query = new TopNQueryBuilder()
|
||||||
|
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||||
|
.granularity(QueryRunnerTestHelper.allGran)
|
||||||
|
.dimension(
|
||||||
|
new ExtractionDimensionSpec(
|
||||||
|
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p")))
|
||||||
|
.threshold(4)
|
||||||
|
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||||
|
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||||
|
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||||
|
new Result<TopNResultValue>(
|
||||||
|
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||||
|
new TopNResultValue(
|
||||||
|
Arrays.<Map<String, Object>>asList(
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "o",
|
||||||
|
"rows", 18L,
|
||||||
|
"index", 2231.8768157958984D,
|
||||||
|
"addRowsIndexConstant", 2250.8768157958984D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||||
|
),
|
||||||
|
ImmutableMap.<String, Object>of(
|
||||||
|
providerDimension, "f",
|
||||||
|
"rows", 4L,
|
||||||
|
"index", 4875.669677734375D,
|
||||||
|
"addRowsIndexConstant", 4880.669677734375D,
|
||||||
|
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testInvertedTopNQuery()
|
public void testInvertedTopNQuery()
|
||||||
{
|
{
|
||||||
|
@ -1179,7 +1501,8 @@ public class TopNQueryRunnerTest
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTopNDependentPostAgg() {
|
public void testTopNDependentPostAgg()
|
||||||
|
{
|
||||||
TopNQuery query = new TopNQueryBuilder()
|
TopNQuery query = new TopNQueryBuilder()
|
||||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||||
.granularity(QueryRunnerTestHelper.allGran)
|
.granularity(QueryRunnerTestHelper.allGran)
|
||||||
|
|
Loading…
Reference in New Issue