Merge pull request #568 from metamx/fix-topn

Fix inverted lexicographic topns and topns with dim extraction dimension specs
This commit is contained in:
xvrl 2014-05-28 10:47:10 -07:00
commit 18685542ec
20 changed files with 574 additions and 119 deletions

View File

@ -76,6 +76,12 @@ public class DefaultDimensionSpec implements DimensionSpec
.array(); .array();
} }
@Override
public boolean preservesOrdering()
{
return true;
}
@Override @Override
public String toString() public String toString()
{ {

View File

@ -33,7 +33,12 @@ import io.druid.query.extraction.DimExtractionFn;
public interface DimensionSpec public interface DimensionSpec
{ {
public String getDimension(); public String getDimension();
public String getOutputName(); public String getOutputName();
public DimExtractionFn getDimExtractionFn(); public DimExtractionFn getDimExtractionFn();
public byte[] getCacheKey(); public byte[] getCacheKey();
public boolean preservesOrdering();
} }

View File

@ -83,6 +83,12 @@ public class ExtractionDimensionSpec implements DimensionSpec
.array(); .array();
} }
@Override
public boolean preservesOrdering()
{
return dimExtractionFn.preservesOrdering();
}
@Override @Override
public String toString() public String toString()
{ {

View File

@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
/** /**
*/ */
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property="type") @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
@JsonSubTypes(value = { @JsonSubTypes(value = {
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class), @JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class), @JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
@ -35,5 +35,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
public interface DimExtractionFn public interface DimExtractionFn
{ {
public byte[] getCacheKey(); public byte[] getCacheKey();
public String apply(String dimValue); public String apply(String dimValue);
public boolean preservesOrdering();
} }

View File

@ -92,4 +92,10 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
{ {
return fn.apply(dimValue); return fn.apply(dimValue);
} }
@Override
public boolean preservesOrdering()
{
return false;
}
} }

View File

@ -67,6 +67,12 @@ public class PartialDimExtractionFn implements DimExtractionFn
return expr; return expr;
} }
@Override
public boolean preservesOrdering()
{
return true;
}
@Override @Override
public String toString() public String toString()
{ {

View File

@ -67,6 +67,12 @@ public class RegexDimExtractionFn implements DimExtractionFn
return expr; return expr;
} }
@Override
public boolean preservesOrdering()
{
return false;
}
@Override @Override
public String toString() public String toString()
{ {

View File

@ -63,6 +63,12 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn
return searchQuerySpec.accept(dimValue) ? dimValue : null; return searchQuerySpec.accept(dimValue) ? dimValue : null;
} }
@Override
public boolean preservesOrdering()
{
return true;
}
@Override @Override
public String toString() public String toString()
{ {

View File

@ -87,6 +87,12 @@ public class TimeDimExtractionFn implements DimExtractionFn
return resultFormat; return resultFormat;
} }
@Override
public boolean preservesOrdering()
{
return false;
}
@Override @Override
public String toString() public String toString()
{ {

View File

@ -31,7 +31,6 @@ import io.druid.segment.DimensionSelector;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -41,7 +40,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
{ {
private final Capabilities capabilities; private final Capabilities capabilities;
private final TopNQuery query; private final TopNQuery query;
private final Comparator<?> comparator;
private final StupidPool<ByteBuffer> bufferPool; private final StupidPool<ByteBuffer> bufferPool;
public AggregateTopNMetricFirstAlgorithm( public AggregateTopNMetricFirstAlgorithm(
@ -52,8 +50,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
{ {
this.capabilities = capabilities; this.capabilities = capabilities;
this.query = query; this.query = query;
this.comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
this.bufferPool = bufferPool; this.bufferPool = bufferPool;
} }
@ -62,7 +58,12 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
DimensionSelector dimSelector, Cursor cursor DimensionSelector dimSelector, Cursor cursor
) )
{ {
return new TopNParams(dimSelector, cursor, dimSelector.getValueCardinality(), Integer.MAX_VALUE); return new TopNParams(
dimSelector,
cursor,
dimSelector.getValueCardinality(),
Integer.MAX_VALUE
);
} }
@Override @Override

View File

@ -234,8 +234,8 @@ public abstract class BaseTopNAlgorithm<DimValSelector, DimValAggregateStore, Pa
public static TopNResultBuilder makeResultBuilder(TopNParams params, TopNQuery query) public static TopNResultBuilder makeResultBuilder(TopNParams params, TopNQuery query)
{ {
Comparator comparator = query.getTopNMetricSpec() final Comparator comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs()); .getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
return query.getTopNMetricSpec().getResultBuilder( return query.getTopNMetricSpec().getResultBuilder(
params.getCursor().getTime(), params.getCursor().getTime(),
query.getDimensionSpec(), query.getDimensionSpec(),

View File

@ -26,7 +26,6 @@ import io.druid.segment.Cursor;
import io.druid.segment.DimensionSelector; import io.druid.segment.DimensionSelector;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
import java.util.Comparator;
import java.util.Map; import java.util.Map;
/** /**
@ -34,7 +33,6 @@ import java.util.Map;
public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][], Map<String, Aggregator[]>, TopNParams> public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][], Map<String, Aggregator[]>, TopNParams>
{ {
private final TopNQuery query; private final TopNQuery query;
private final Comparator<?> comparator;
public DimExtractionTopNAlgorithm( public DimExtractionTopNAlgorithm(
Capabilities capabilities, Capabilities capabilities,
@ -44,8 +42,6 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
super(capabilities); super(capabilities);
this.query = query; this.query = query;
this.comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
} }
@Override @Override
@ -53,15 +49,28 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
final DimensionSelector dimSelector, final Cursor cursor final DimensionSelector dimSelector, final Cursor cursor
) )
{ {
return new TopNParams(dimSelector, cursor, dimSelector.getValueCardinality(), Integer.MAX_VALUE); return new TopNParams(
dimSelector,
cursor,
dimSelector.getValueCardinality(),
Integer.MAX_VALUE
);
} }
@Override @Override
protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess) protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess)
{ {
return query.getTopNMetricSpec().configureOptimizer( final AggregatorArrayProvider provider = new AggregatorArrayProvider(
new AggregatorArrayProvider(params.getDimSelector(), query, params.getCardinality()) params.getDimSelector(),
).build(); query,
params.getCardinality()
);
if (!query.getDimensionSpec().preservesOrdering()) {
return provider.build();
}
return query.getTopNMetricSpec().configureOptimizer(provider).build();
} }
@Override @Override

View File

@ -21,6 +21,7 @@ package io.druid.query.topn;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Ordering;
import com.metamx.common.guava.Comparators; import com.metamx.common.guava.Comparators;
import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.PostAggregator; import io.druid.query.aggregation.PostAggregator;
@ -63,11 +64,27 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
@Override @Override
public Comparator getComparator( public Comparator getComparator(
List<AggregatorFactory> aggregatorSpecs, final List<AggregatorFactory> aggregatorSpecs,
List<PostAggregator> postAggregatorSpecs final List<PostAggregator> postAggregatorSpecs
) )
{ {
return Comparators.inverse(delegate.getComparator(aggregatorSpecs, postAggregatorSpecs)); return Comparators.inverse(
new Comparator()
{
@Override
public int compare(Object o1, Object o2)
{
// nulls last
if (o1 == null) {
return 1;
}
if (o2 == null) {
return -1;
}
return delegate.getComparator(aggregatorSpecs, postAggregatorSpecs).compare(o1, o2);
}
}
);
} }
@Override @Override
@ -80,7 +97,14 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
List<PostAggregator> postAggs List<PostAggregator> postAggs
) )
{ {
return delegate.getResultBuilder(timestamp, dimSpec, threshold, comparator, aggFactories, postAggs); return delegate.getResultBuilder(
timestamp,
dimSpec,
threshold,
comparator,
aggFactories,
postAggs
);
} }
@Override @Override
@ -94,6 +118,9 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
@Override @Override
public <T> TopNMetricSpecBuilder<T> configureOptimizer(TopNMetricSpecBuilder<T> builder) public <T> TopNMetricSpecBuilder<T> configureOptimizer(TopNMetricSpecBuilder<T> builder)
{ {
if (!canBeOptimizedUnordered()) {
return builder;
}
return delegate.configureOptimizer(builder); return delegate.configureOptimizer(builder);
} }
@ -109,6 +136,12 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
return delegate.getMetricName(dimSpec); return delegate.getMetricName(dimSpec);
} }
@Override
public boolean canBeOptimizedUnordered()
{
return delegate.canBeOptimizedUnordered();
}
@Override @Override
public boolean equals(Object o) public boolean equals(Object o)
{ {

View File

@ -43,6 +43,13 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@Override @Override
public int compare(String s, String s2) public int compare(String s, String s2)
{ {
// null first
if (s == null) {
return -1;
}
if (s2 == null) {
return 1;
}
return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8)); return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8));
} }
}; };
@ -54,7 +61,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@JsonProperty("previousStop") String previousStop @JsonProperty("previousStop") String previousStop
) )
{ {
this.previousStop = (previousStop == null) ? "" : previousStop; this.previousStop = previousStop;
} }
@Override @Override
@ -85,13 +92,20 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
List<PostAggregator> postAggs List<PostAggregator> postAggs
) )
{ {
return new TopNLexicographicResultBuilder(timestamp, dimSpec, threshold, previousStop, comparator, aggFactories); return new TopNLexicographicResultBuilder(
timestamp,
dimSpec,
threshold,
previousStop,
comparator,
aggFactories
);
} }
@Override @Override
public byte[] getCacheKey() public byte[] getCacheKey()
{ {
byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8); byte[] previousStopBytes = previousStop == null ? new byte[]{} : previousStop.getBytes(Charsets.UTF_8);
return ByteBuffer.allocate(1 + previousStopBytes.length) return ByteBuffer.allocate(1 + previousStopBytes.length)
.put(CACHE_TYPE_ID) .put(CACHE_TYPE_ID)
@ -119,6 +133,12 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
return dimSpec.getOutputName(); return dimSpec.getOutputName();
} }
@Override
public boolean canBeOptimizedUnordered()
{
return false;
}
@Override @Override
public String toString() public String toString()
{ {
@ -130,12 +150,18 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@Override @Override
public boolean equals(Object o) public boolean equals(Object o)
{ {
if (this == o) return true; if (this == o) {
if (o == null || getClass() != o.getClass()) return false; return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LexicographicTopNMetricSpec that = (LexicographicTopNMetricSpec) o; LexicographicTopNMetricSpec that = (LexicographicTopNMetricSpec) o;
if (previousStop != null ? !previousStop.equals(that.previousStop) : that.previousStop != null) return false; if (previousStop != null ? !previousStop.equals(that.previousStop) : that.previousStop != null) {
return false;
}
return true; return true;
} }

View File

@ -158,6 +158,12 @@ public class NumericTopNMetricSpec implements TopNMetricSpec
return metric; return metric;
} }
@Override
public boolean canBeOptimizedUnordered()
{
return true;
}
@Override @Override
public String toString() public String toString()
{ {

View File

@ -31,7 +31,6 @@ import io.druid.segment.data.IndexedInts;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator;
/** /**
*/ */
@ -40,7 +39,6 @@ public class PooledTopNAlgorithm
{ {
private final Capabilities capabilities; private final Capabilities capabilities;
private final TopNQuery query; private final TopNQuery query;
private final Comparator<?> comparator;
private final StupidPool<ByteBuffer> bufferPool; private final StupidPool<ByteBuffer> bufferPool;
public PooledTopNAlgorithm( public PooledTopNAlgorithm(
@ -53,8 +51,6 @@ public class PooledTopNAlgorithm
this.capabilities = capabilities; this.capabilities = capabilities;
this.query = query; this.query = query;
this.comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
this.bufferPool = bufferPool; this.bufferPool = bufferPool;
} }
@ -115,12 +111,15 @@ public class PooledTopNAlgorithm
} }
@Override @Override
protected int[] makeDimValSelector(PooledTopNParams params, int numProcessed, int numToProcess) protected int[] makeDimValSelector(PooledTopNParams params, int numProcessed, int numToProcess)
{ {
final TopNMetricSpecBuilder<int[]> arrayProvider = params.getArrayProvider(); final TopNMetricSpecBuilder<int[]> arrayProvider = params.getArrayProvider();
if (!query.getDimensionSpec().preservesOrdering()) {
return arrayProvider.build();
}
arrayProvider.ignoreFirstN(numProcessed); arrayProvider.ignoreFirstN(numProcessed);
arrayProvider.keepOnlyN(numToProcess); arrayProvider.keepOnlyN(numToProcess);
return query.getTopNMetricSpec().configureOptimizer(arrayProvider).build(); return query.getTopNMetricSpec().configureOptimizer(arrayProvider).build();

View File

@ -23,7 +23,6 @@ import com.google.common.collect.Maps;
import com.google.common.collect.MinMaxPriorityQueue; import com.google.common.collect.MinMaxPriorityQueue;
import io.druid.query.Result; import io.druid.query.Result;
import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.DimensionSpec;
import org.joda.time.DateTime; import org.joda.time.DateTime;
@ -40,6 +39,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
private final DateTime timestamp; private final DateTime timestamp;
private final DimensionSpec dimSpec; private final DimensionSpec dimSpec;
private final String previousStop; private final String previousStop;
private final Comparator comparator;
private final List<AggregatorFactory> aggFactories; private final List<AggregatorFactory> aggFactories;
private MinMaxPriorityQueue<DimValHolder> pQueue = null; private MinMaxPriorityQueue<DimValHolder> pQueue = null;
@ -55,6 +55,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
this.timestamp = timestamp; this.timestamp = timestamp;
this.dimSpec = dimSpec; this.dimSpec = dimSpec;
this.previousStop = previousStop; this.previousStop = previousStop;
this.comparator = comparator;
this.aggFactories = aggFactories; this.aggFactories = aggFactories;
instantiatePQueue(threshold, comparator); instantiatePQueue(threshold, comparator);
@ -69,7 +70,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
{ {
Map<String, Object> metricValues = Maps.newLinkedHashMap(); Map<String, Object> metricValues = Maps.newLinkedHashMap();
if (dimName.compareTo(previousStop) > 0) { if (comparator.compare(dimName, previousStop) > 0) {
metricValues.put(dimSpec.getOutputName(), dimName); metricValues.put(dimSpec.getOutputName(), dimName);
Iterator<AggregatorFactory> aggsIter = aggFactories.iterator(); Iterator<AggregatorFactory> aggsIter = aggFactories.iterator();
for (Object metricVal : metricVals) { for (Object metricVal : metricVals) {

View File

@ -60,4 +60,6 @@ public interface TopNMetricSpec
public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector); public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector);
public String getMetricName(DimensionSpec dimSpec); public String getMetricName(DimensionSpec dimSpec);
public boolean canBeOptimizedUnordered();
} }

View File

@ -31,7 +31,12 @@ public class TopNParams
private final int cardinality; private final int cardinality;
private final int numValuesPerPass; private final int numValuesPerPass;
protected TopNParams(DimensionSelector dimSelector, Cursor cursor, int cardinality, int numValuesPerPass) protected TopNParams(
DimensionSelector dimSelector,
Cursor cursor,
int cardinality,
int numValuesPerPass
)
{ {
this.dimSelector = dimSelector; this.dimSelector = dimSelector;
this.cursor = cursor; this.cursor = cursor;

View File

@ -134,32 +134,32 @@ public class TopNQueryRunnerTest
new TopNResultValue( new TopNResultValue(
Arrays.<Map<String, Object>>asList( Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put(providerDimension, "total_market") .put(providerDimension, "total_market")
.put("rows", 186L) .put("rows", 186L)
.put("index", 215679.82879638672D) .put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D) .put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2) .put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D) .put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D) .put("minIndex", 792.3260498046875D)
.build(), .build(),
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put(providerDimension, "upfront") .put(providerDimension, "upfront")
.put("rows", 186L) .put("rows", 186L)
.put("index", 192046.1060180664D) .put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D) .put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2) .put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D) .put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D) .put("minIndex", 545.9906005859375D)
.build(), .build(),
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put(providerDimension, "spot") .put(providerDimension, "spot")
.put("rows", 837L) .put("rows", 837L)
.put("index", 95606.57232284546D) .put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D) .put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9) .put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D) .put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D) .put("minIndex", 59.02102279663086D)
.build() .build()
) )
) )
) )
@ -198,32 +198,32 @@ public class TopNQueryRunnerTest
new TopNResultValue( new TopNResultValue(
Arrays.<Map<String, Object>>asList( Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put(providerDimension, "total_market") .put(providerDimension, "total_market")
.put("rows", 186L) .put("rows", 186L)
.put("index", 215679.82879638672D) .put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D) .put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2) .put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D) .put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D) .put("minIndex", 792.3260498046875D)
.build(), .build(),
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put(providerDimension, "upfront") .put(providerDimension, "upfront")
.put("rows", 186L) .put("rows", 186L)
.put("index", 192046.1060180664D) .put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D) .put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2) .put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D) .put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D) .put("minIndex", 545.9906005859375D)
.build(), .build(),
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put(providerDimension, "spot") .put(providerDimension, "spot")
.put("rows", 837L) .put("rows", 837L)
.put("index", 95606.57232284546D) .put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D) .put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9) .put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D) .put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D) .put("minIndex", 59.02102279663086D)
.build() .build()
) )
) )
) )
@ -263,32 +263,32 @@ public class TopNQueryRunnerTest
new TopNResultValue( new TopNResultValue(
Arrays.<Map<String, Object>>asList( Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put("provider", "spot") .put("provider", "spot")
.put("rows", 837L) .put("rows", 837L)
.put("index", 95606.57232284546D) .put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D) .put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9) .put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D) .put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D) .put("minIndex", 59.02102279663086D)
.build(), .build(),
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put("provider", "total_market") .put("provider", "total_market")
.put("rows", 186L) .put("rows", 186L)
.put("index", 215679.82879638672D) .put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D) .put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2) .put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D) .put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D) .put("minIndex", 792.3260498046875D)
.build(), .build(),
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
.put("provider", "upfront") .put("provider", "upfront")
.put("rows", 186L) .put("rows", 186L)
.put("index", 192046.1060180664D) .put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D) .put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2) .put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D) .put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D) .put("minIndex", 545.9906005859375D)
.build() .build()
) )
) )
) )
@ -696,18 +696,18 @@ public class TopNQueryRunnerTest
public void testTopNWithNonExistentFilterMultiDim() public void testTopNWithNonExistentFilterMultiDim()
{ {
AndDimFilter andDimFilter = Druids.newAndDimFilterBuilder() AndDimFilter andDimFilter = Druids.newAndDimFilterBuilder()
.fields( .fields(
Lists.<DimFilter>newArrayList( Lists.<DimFilter>newArrayList(
Druids.newSelectorDimFilterBuilder() Druids.newSelectorDimFilterBuilder()
.dimension(providerDimension) .dimension(providerDimension)
.value("billyblank") .value("billyblank")
.build(), .build(),
Druids.newSelectorDimFilterBuilder() Druids.newSelectorDimFilterBuilder()
.dimension(QueryRunnerTestHelper.qualityDimension) .dimension(QueryRunnerTestHelper.qualityDimension)
.value("mezzanine") .value("mezzanine")
.build() .build()
) )
).build(); ).build();
TopNQuery query = new TopNQueryBuilder() TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource) .dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran) .granularity(QueryRunnerTestHelper.allGran)
@ -1077,6 +1077,88 @@ public class TopNQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, runner.run(query)); TestHelper.assertExpectedResults(expectedResults, runner.run(query));
} }
@Test
public void testTopNInvertedLexicographicWithPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(providerDimension)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("upfront")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "total_market",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "spot",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testTopNInvertedLexicographicWithNonExistingPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(providerDimension)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "total_market",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "spot",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test @Test
public void testTopNDimExtraction() public void testTopNDimExtraction()
{ {
@ -1129,6 +1211,246 @@ public class TopNQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, runner.run(query)); TestHelper.assertExpectedResults(expectedResults, runner.run(query));
} }
@Test
public void testTopNLexicographicDimExtraction()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
)
)
.metric(new LexicographicTopNMetricSpec(null))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "s",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
),
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "u",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNLexicographicDimExtraction2()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
)
)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null)))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "o",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
),
ImmutableMap.<String, Object>of(
providerDimension, "f",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testTopNLexicographicDimExtractionWithPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
)
)
.metric(new LexicographicTopNMetricSpec("spot"))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "u",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNLexicographicDimExtractionWithPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
)
)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "s",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNLexicographicDimExtractionWithPreviousStop2()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
)
)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "o",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
),
ImmutableMap.<String, Object>of(
providerDimension, "f",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test @Test
public void testInvertedTopNQuery() public void testInvertedTopNQuery()
{ {
@ -1179,7 +1501,8 @@ public class TopNQueryRunnerTest
} }
@Test @Test
public void testTopNDependentPostAgg() { public void testTopNDependentPostAgg()
{
TopNQuery query = new TopNQueryBuilder() TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource) .dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran) .granularity(QueryRunnerTestHelper.allGran)