Merge pull request #568 from metamx/fix-topn

Fix inverted lexicographic topns and topns with dim extraction dimension specs
This commit is contained in:
xvrl 2014-05-28 10:47:10 -07:00
commit 18685542ec
20 changed files with 574 additions and 119 deletions

View File

@ -76,6 +76,12 @@ public class DefaultDimensionSpec implements DimensionSpec
.array();
}
@Override
public boolean preservesOrdering()
{
return true;
}
@Override
public String toString()
{

View File

@ -33,7 +33,12 @@ import io.druid.query.extraction.DimExtractionFn;
public interface DimensionSpec
{
public String getDimension();
public String getOutputName();
public DimExtractionFn getDimExtractionFn();
public byte[] getCacheKey();
public boolean preservesOrdering();
}

View File

@ -83,6 +83,12 @@ public class ExtractionDimensionSpec implements DimensionSpec
.array();
}
@Override
public boolean preservesOrdering()
{
return dimExtractionFn.preservesOrdering();
}
@Override
public String toString()
{

View File

@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
/**
*/
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property="type")
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
@ -35,5 +35,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
public interface DimExtractionFn
{
public byte[] getCacheKey();
public String apply(String dimValue);
public boolean preservesOrdering();
}

View File

@ -92,4 +92,10 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
{
return fn.apply(dimValue);
}
@Override
public boolean preservesOrdering()
{
return false;
}
}

View File

@ -67,6 +67,12 @@ public class PartialDimExtractionFn implements DimExtractionFn
return expr;
}
@Override
public boolean preservesOrdering()
{
return true;
}
@Override
public String toString()
{

View File

@ -67,6 +67,12 @@ public class RegexDimExtractionFn implements DimExtractionFn
return expr;
}
@Override
public boolean preservesOrdering()
{
return false;
}
@Override
public String toString()
{

View File

@ -63,6 +63,12 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn
return searchQuerySpec.accept(dimValue) ? dimValue : null;
}
@Override
public boolean preservesOrdering()
{
return true;
}
@Override
public String toString()
{

View File

@ -87,6 +87,12 @@ public class TimeDimExtractionFn implements DimExtractionFn
return resultFormat;
}
@Override
public boolean preservesOrdering()
{
return false;
}
@Override
public String toString()
{

View File

@ -31,7 +31,6 @@ import io.druid.segment.DimensionSelector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
@ -41,7 +40,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
{
private final Capabilities capabilities;
private final TopNQuery query;
private final Comparator<?> comparator;
private final StupidPool<ByteBuffer> bufferPool;
public AggregateTopNMetricFirstAlgorithm(
@ -52,8 +50,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
{
this.capabilities = capabilities;
this.query = query;
this.comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
this.bufferPool = bufferPool;
}
@ -62,7 +58,12 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
DimensionSelector dimSelector, Cursor cursor
)
{
return new TopNParams(dimSelector, cursor, dimSelector.getValueCardinality(), Integer.MAX_VALUE);
return new TopNParams(
dimSelector,
cursor,
dimSelector.getValueCardinality(),
Integer.MAX_VALUE
);
}
@Override

View File

@ -234,8 +234,8 @@ public abstract class BaseTopNAlgorithm<DimValSelector, DimValAggregateStore, Pa
public static TopNResultBuilder makeResultBuilder(TopNParams params, TopNQuery query)
{
Comparator comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
final Comparator comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
return query.getTopNMetricSpec().getResultBuilder(
params.getCursor().getTime(),
query.getDimensionSpec(),

View File

@ -26,7 +26,6 @@ import io.druid.segment.Cursor;
import io.druid.segment.DimensionSelector;
import io.druid.segment.data.IndexedInts;
import java.util.Comparator;
import java.util.Map;
/**
@ -34,7 +33,6 @@ import java.util.Map;
public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][], Map<String, Aggregator[]>, TopNParams>
{
private final TopNQuery query;
private final Comparator<?> comparator;
public DimExtractionTopNAlgorithm(
Capabilities capabilities,
@ -44,8 +42,6 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
super(capabilities);
this.query = query;
this.comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
}
@Override
@ -53,15 +49,28 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
final DimensionSelector dimSelector, final Cursor cursor
)
{
return new TopNParams(dimSelector, cursor, dimSelector.getValueCardinality(), Integer.MAX_VALUE);
return new TopNParams(
dimSelector,
cursor,
dimSelector.getValueCardinality(),
Integer.MAX_VALUE
);
}
@Override
protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess)
{
return query.getTopNMetricSpec().configureOptimizer(
new AggregatorArrayProvider(params.getDimSelector(), query, params.getCardinality())
).build();
final AggregatorArrayProvider provider = new AggregatorArrayProvider(
params.getDimSelector(),
query,
params.getCardinality()
);
if (!query.getDimensionSpec().preservesOrdering()) {
return provider.build();
}
return query.getTopNMetricSpec().configureOptimizer(provider).build();
}
@Override

View File

@ -21,6 +21,7 @@ package io.druid.query.topn;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Ordering;
import com.metamx.common.guava.Comparators;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
@ -63,11 +64,27 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
@Override
public Comparator getComparator(
List<AggregatorFactory> aggregatorSpecs,
List<PostAggregator> postAggregatorSpecs
final List<AggregatorFactory> aggregatorSpecs,
final List<PostAggregator> postAggregatorSpecs
)
{
return Comparators.inverse(delegate.getComparator(aggregatorSpecs, postAggregatorSpecs));
return Comparators.inverse(
new Comparator()
{
@Override
public int compare(Object o1, Object o2)
{
// nulls last
if (o1 == null) {
return 1;
}
if (o2 == null) {
return -1;
}
return delegate.getComparator(aggregatorSpecs, postAggregatorSpecs).compare(o1, o2);
}
}
);
}
@Override
@ -80,7 +97,14 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
List<PostAggregator> postAggs
)
{
return delegate.getResultBuilder(timestamp, dimSpec, threshold, comparator, aggFactories, postAggs);
return delegate.getResultBuilder(
timestamp,
dimSpec,
threshold,
comparator,
aggFactories,
postAggs
);
}
@Override
@ -94,6 +118,9 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
@Override
public <T> TopNMetricSpecBuilder<T> configureOptimizer(TopNMetricSpecBuilder<T> builder)
{
if (!canBeOptimizedUnordered()) {
return builder;
}
return delegate.configureOptimizer(builder);
}
@ -109,6 +136,12 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
return delegate.getMetricName(dimSpec);
}
@Override
public boolean canBeOptimizedUnordered()
{
return delegate.canBeOptimizedUnordered();
}
@Override
public boolean equals(Object o)
{

View File

@ -43,6 +43,13 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@Override
public int compare(String s, String s2)
{
// null first
if (s == null) {
return -1;
}
if (s2 == null) {
return 1;
}
return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8));
}
};
@ -54,7 +61,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@JsonProperty("previousStop") String previousStop
)
{
this.previousStop = (previousStop == null) ? "" : previousStop;
this.previousStop = previousStop;
}
@Override
@ -85,13 +92,20 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
List<PostAggregator> postAggs
)
{
return new TopNLexicographicResultBuilder(timestamp, dimSpec, threshold, previousStop, comparator, aggFactories);
return new TopNLexicographicResultBuilder(
timestamp,
dimSpec,
threshold,
previousStop,
comparator,
aggFactories
);
}
@Override
public byte[] getCacheKey()
{
byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8);
byte[] previousStopBytes = previousStop == null ? new byte[]{} : previousStop.getBytes(Charsets.UTF_8);
return ByteBuffer.allocate(1 + previousStopBytes.length)
.put(CACHE_TYPE_ID)
@ -119,6 +133,12 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
return dimSpec.getOutputName();
}
@Override
public boolean canBeOptimizedUnordered()
{
return false;
}
@Override
public String toString()
{
@ -130,12 +150,18 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@Override
public boolean equals(Object o)
{
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LexicographicTopNMetricSpec that = (LexicographicTopNMetricSpec) o;
if (previousStop != null ? !previousStop.equals(that.previousStop) : that.previousStop != null) return false;
if (previousStop != null ? !previousStop.equals(that.previousStop) : that.previousStop != null) {
return false;
}
return true;
}

View File

@ -158,6 +158,12 @@ public class NumericTopNMetricSpec implements TopNMetricSpec
return metric;
}
@Override
public boolean canBeOptimizedUnordered()
{
return true;
}
@Override
public String toString()
{

View File

@ -31,7 +31,6 @@ import io.druid.segment.data.IndexedInts;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
/**
*/
@ -40,7 +39,6 @@ public class PooledTopNAlgorithm
{
private final Capabilities capabilities;
private final TopNQuery query;
private final Comparator<?> comparator;
private final StupidPool<ByteBuffer> bufferPool;
public PooledTopNAlgorithm(
@ -53,8 +51,6 @@ public class PooledTopNAlgorithm
this.capabilities = capabilities;
this.query = query;
this.comparator = query.getTopNMetricSpec()
.getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs());
this.bufferPool = bufferPool;
}
@ -115,12 +111,15 @@ public class PooledTopNAlgorithm
}
@Override
protected int[] makeDimValSelector(PooledTopNParams params, int numProcessed, int numToProcess)
{
final TopNMetricSpecBuilder<int[]> arrayProvider = params.getArrayProvider();
if (!query.getDimensionSpec().preservesOrdering()) {
return arrayProvider.build();
}
arrayProvider.ignoreFirstN(numProcessed);
arrayProvider.keepOnlyN(numToProcess);
return query.getTopNMetricSpec().configureOptimizer(arrayProvider).build();

View File

@ -23,7 +23,6 @@ import com.google.common.collect.Maps;
import com.google.common.collect.MinMaxPriorityQueue;
import io.druid.query.Result;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
import io.druid.query.dimension.DimensionSpec;
import org.joda.time.DateTime;
@ -40,6 +39,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
private final DateTime timestamp;
private final DimensionSpec dimSpec;
private final String previousStop;
private final Comparator comparator;
private final List<AggregatorFactory> aggFactories;
private MinMaxPriorityQueue<DimValHolder> pQueue = null;
@ -55,6 +55,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
this.timestamp = timestamp;
this.dimSpec = dimSpec;
this.previousStop = previousStop;
this.comparator = comparator;
this.aggFactories = aggFactories;
instantiatePQueue(threshold, comparator);
@ -69,7 +70,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
{
Map<String, Object> metricValues = Maps.newLinkedHashMap();
if (dimName.compareTo(previousStop) > 0) {
if (comparator.compare(dimName, previousStop) > 0) {
metricValues.put(dimSpec.getOutputName(), dimName);
Iterator<AggregatorFactory> aggsIter = aggFactories.iterator();
for (Object metricVal : metricVals) {

View File

@ -60,4 +60,6 @@ public interface TopNMetricSpec
public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector);
public String getMetricName(DimensionSpec dimSpec);
public boolean canBeOptimizedUnordered();
}

View File

@ -31,7 +31,12 @@ public class TopNParams
private final int cardinality;
private final int numValuesPerPass;
protected TopNParams(DimensionSelector dimSelector, Cursor cursor, int cardinality, int numValuesPerPass)
protected TopNParams(
DimensionSelector dimSelector,
Cursor cursor,
int cardinality,
int numValuesPerPass
)
{
this.dimSelector = dimSelector;
this.cursor = cursor;

View File

@ -134,32 +134,32 @@ public class TopNQueryRunnerTest
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>builder()
.put(providerDimension, "total_market")
.put("rows", 186L)
.put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D)
.build(),
.put(providerDimension, "total_market")
.put("rows", 186L)
.put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D)
.build(),
ImmutableMap.<String, Object>builder()
.put(providerDimension, "upfront")
.put("rows", 186L)
.put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D)
.build(),
.put(providerDimension, "upfront")
.put("rows", 186L)
.put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D)
.build(),
ImmutableMap.<String, Object>builder()
.put(providerDimension, "spot")
.put("rows", 837L)
.put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D)
.build()
.put(providerDimension, "spot")
.put("rows", 837L)
.put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D)
.build()
)
)
)
@ -198,32 +198,32 @@ public class TopNQueryRunnerTest
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>builder()
.put(providerDimension, "total_market")
.put("rows", 186L)
.put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D)
.build(),
.put(providerDimension, "total_market")
.put("rows", 186L)
.put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D)
.build(),
ImmutableMap.<String, Object>builder()
.put(providerDimension, "upfront")
.put("rows", 186L)
.put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D)
.build(),
.put(providerDimension, "upfront")
.put("rows", 186L)
.put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D)
.build(),
ImmutableMap.<String, Object>builder()
.put(providerDimension, "spot")
.put("rows", 837L)
.put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D)
.build()
.put(providerDimension, "spot")
.put("rows", 837L)
.put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D)
.build()
)
)
)
@ -263,32 +263,32 @@ public class TopNQueryRunnerTest
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>builder()
.put("provider", "spot")
.put("rows", 837L)
.put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D)
.build(),
.put("provider", "spot")
.put("rows", 837L)
.put("index", 95606.57232284546D)
.put("addRowsIndexConstant", 96444.57232284546D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_9)
.put("maxIndex", 277.2735290527344D)
.put("minIndex", 59.02102279663086D)
.build(),
ImmutableMap.<String, Object>builder()
.put("provider", "total_market")
.put("rows", 186L)
.put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D)
.build(),
.put("provider", "total_market")
.put("rows", 186L)
.put("index", 215679.82879638672D)
.put("addRowsIndexConstant", 215866.82879638672D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1743.9217529296875D)
.put("minIndex", 792.3260498046875D)
.build(),
ImmutableMap.<String, Object>builder()
.put("provider", "upfront")
.put("rows", 186L)
.put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D)
.build()
.put("provider", "upfront")
.put("rows", 186L)
.put("index", 192046.1060180664D)
.put("addRowsIndexConstant", 192233.1060180664D)
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
.put("maxIndex", 1870.06103515625D)
.put("minIndex", 545.9906005859375D)
.build()
)
)
)
@ -696,18 +696,18 @@ public class TopNQueryRunnerTest
public void testTopNWithNonExistentFilterMultiDim()
{
AndDimFilter andDimFilter = Druids.newAndDimFilterBuilder()
.fields(
Lists.<DimFilter>newArrayList(
Druids.newSelectorDimFilterBuilder()
.dimension(providerDimension)
.value("billyblank")
.build(),
Druids.newSelectorDimFilterBuilder()
.dimension(QueryRunnerTestHelper.qualityDimension)
.value("mezzanine")
.build()
)
).build();
.fields(
Lists.<DimFilter>newArrayList(
Druids.newSelectorDimFilterBuilder()
.dimension(providerDimension)
.value("billyblank")
.build(),
Druids.newSelectorDimFilterBuilder()
.dimension(QueryRunnerTestHelper.qualityDimension)
.value("mezzanine")
.build()
)
).build();
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
@ -1077,6 +1077,88 @@ public class TopNQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testTopNInvertedLexicographicWithPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(providerDimension)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("upfront")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "total_market",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "spot",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testTopNInvertedLexicographicWithNonExistingPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(providerDimension)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "total_market",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "spot",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testTopNDimExtraction()
{
@ -1129,6 +1211,246 @@ public class TopNQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testTopNLexicographicDimExtraction()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
)
)
.metric(new LexicographicTopNMetricSpec(null))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "s",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
),
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "u",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNLexicographicDimExtraction2()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
)
)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null)))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "o",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
),
ImmutableMap.<String, Object>of(
providerDimension, "f",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testTopNLexicographicDimExtractionWithPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
)
)
.metric(new LexicographicTopNMetricSpec("spot"))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "u",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNLexicographicDimExtractionWithPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("(.)")
)
)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "s",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNLexicographicDimExtractionWithPreviousStop2()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
)
)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "o",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
),
ImmutableMap.<String, Object>of(
providerDimension, "f",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNQuery()
{
@ -1179,7 +1501,8 @@ public class TopNQueryRunnerTest
}
@Test
public void testTopNDependentPostAgg() {
public void testTopNDependentPostAgg()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)