From 07a95a0e059c238e58c7be912ce01a8760b656f5 Mon Sep 17 00:00:00 2001 From: fjy Date: Tue, 27 May 2014 17:02:28 -0700 Subject: [PATCH 1/6] Fix inverted lexicographic topNs --- .../druid/query/topn/BaseTopNAlgorithm.java | 7 +- .../topn/DimExtractionTopNAlgorithm.java | 4 - .../query/topn/InvertedTopNMetricSpec.java | 22 +- .../topn/LexicographicTopNMetricSpec.java | 31 ++- .../query/topn/NumericTopNMetricSpec.java | 9 +- .../druid/query/topn/PooledTopNAlgorithm.java | 5 - .../io/druid/query/topn/TopNBinaryFn.java | 3 +- .../topn/TopNLexicographicResultBuilder.java | 8 +- .../io/druid/query/topn/TopNMetricSpec.java | 5 +- .../druid/query/topn/TopNQueryRunnerTest.java | 219 +++++++++++------- 10 files changed, 203 insertions(+), 110 deletions(-) diff --git a/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java index 9d8baceb2c9..c438de17183 100644 --- a/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java @@ -234,15 +234,16 @@ public abstract class BaseTopNAlgorithm, TopNParams> { private final TopNQuery query; - private final Comparator comparator; public DimExtractionTopNAlgorithm( Capabilities capabilities, @@ -44,8 +42,6 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm aggFactories, - List postAggs + List postAggs, + boolean optimizeResultStorage ) { - return delegate.getResultBuilder(timestamp, dimSpec, threshold, comparator, aggFactories, postAggs); + return delegate.getResultBuilder( + timestamp, + dimSpec, + threshold, + comparator, + aggFactories, + postAggs, + canBeOptimizedUnordered() + ); } @Override @@ -94,6 +103,9 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec @Override public TopNMetricSpecBuilder configureOptimizer(TopNMetricSpecBuilder builder) { + if (!canBeOptimizedUnordered()) { + return builder; + } return delegate.configureOptimizer(builder); } @@ -109,6 +121,12 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec return delegate.getMetricName(dimSpec); } + @Override + public boolean canBeOptimizedUnordered() + { + return delegate.canBeOptimizedUnordered(); + } + @Override public boolean equals(Object o) { diff --git a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java index b7c7c6a2565..0cad74eb0bb 100644 --- a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java @@ -82,10 +82,19 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec int threshold, Comparator comparator, List aggFactories, - List postAggs + List postAggs, + boolean optimizeResultStorage ) { - return new TopNLexicographicResultBuilder(timestamp, dimSpec, threshold, previousStop, comparator, aggFactories); + return new TopNLexicographicResultBuilder( + timestamp, + dimSpec, + threshold, + previousStop, + comparator, + aggFactories, + optimizeResultStorage + ); } @Override @@ -119,6 +128,12 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec return dimSpec.getOutputName(); } + @Override + public boolean canBeOptimizedUnordered() + { + return false; + } + @Override public String toString() { @@ -130,12 +145,18 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } LexicographicTopNMetricSpec that = (LexicographicTopNMetricSpec) o; - if (previousStop != null ? !previousStop.equals(that.previousStop) : that.previousStop != null) return false; + if (previousStop != null ? !previousStop.equals(that.previousStop) : that.previousStop != null) { + return false; + } return true; } diff --git a/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java index 9ad97e239cd..b88695b61eb 100644 --- a/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java @@ -123,7 +123,8 @@ public class NumericTopNMetricSpec implements TopNMetricSpec int threshold, Comparator comparator, List aggFactories, - List postAggs + List postAggs, + boolean optimizeResultStorage ) { return new TopNNumericResultBuilder(timestamp, dimSpec, metric, threshold, comparator, aggFactories, postAggs); @@ -158,6 +159,12 @@ public class NumericTopNMetricSpec implements TopNMetricSpec return metric; } + @Override + public boolean canBeOptimizedUnordered() + { + return true; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index a8e3f324467..3356a2a44ba 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -31,7 +31,6 @@ import io.druid.segment.data.IndexedInts; import java.nio.ByteBuffer; import java.util.Arrays; -import java.util.Comparator; /** */ @@ -40,7 +39,6 @@ public class PooledTopNAlgorithm { private final Capabilities capabilities; private final TopNQuery query; - private final Comparator comparator; private final StupidPool bufferPool; public PooledTopNAlgorithm( @@ -53,8 +51,6 @@ public class PooledTopNAlgorithm this.capabilities = capabilities; this.query = query; - this.comparator = query.getTopNMetricSpec() - .getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs()); this.bufferPool = bufferPool; } @@ -115,7 +111,6 @@ public class PooledTopNAlgorithm } - @Override protected int[] makeDimValSelector(PooledTopNParams params, int numProcessed, int numToProcess) { diff --git a/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java b/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java index 4c02da447aa..fa43a0e198c 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java @@ -129,7 +129,8 @@ public class TopNBinaryFn implements BinaryFn, Result aggFactories; + private final boolean optimizeResultStorage; private MinMaxPriorityQueue pQueue = null; public TopNLexicographicResultBuilder( @@ -49,13 +49,15 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder int threshold, String previousStop, final Comparator comparator, - List aggFactories + List aggFactories, + boolean optimizeResultStorage ) { this.timestamp = timestamp; this.dimSpec = dimSpec; this.previousStop = previousStop; this.aggFactories = aggFactories; + this.optimizeResultStorage = optimizeResultStorage; instantiatePQueue(threshold, comparator); } @@ -69,7 +71,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder { Map metricValues = Maps.newLinkedHashMap(); - if (dimName.compareTo(previousStop) > 0) { + if (!optimizeResultStorage || dimName.compareTo(previousStop) > 0) { metricValues.put(dimSpec.getOutputName(), dimName); Iterator aggsIter = aggFactories.iterator(); for (Object metricVal : metricVals) { diff --git a/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java index 6e934e32dd6..f7d0718dce9 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java @@ -50,7 +50,8 @@ public interface TopNMetricSpec int threshold, Comparator comparator, List aggFactories, - List postAggs + List postAggs, + boolean optimizeResultStorage ); public byte[] getCacheKey(); @@ -60,4 +61,6 @@ public interface TopNMetricSpec public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector); public String getMetricName(DimensionSpec dimSpec); + + public boolean canBeOptimizedUnordered(); } diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index c8628bfdca5..8d57839fdf8 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -134,32 +134,32 @@ public class TopNQueryRunnerTest new TopNResultValue( Arrays.>asList( ImmutableMap.builder() - .put(providerDimension, "total_market") - .put("rows", 186L) - .put("index", 215679.82879638672D) - .put("addRowsIndexConstant", 215866.82879638672D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_2) - .put("maxIndex", 1743.9217529296875D) - .put("minIndex", 792.3260498046875D) - .build(), + .put(providerDimension, "total_market") + .put("rows", 186L) + .put("index", 215679.82879638672D) + .put("addRowsIndexConstant", 215866.82879638672D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_2) + .put("maxIndex", 1743.9217529296875D) + .put("minIndex", 792.3260498046875D) + .build(), ImmutableMap.builder() - .put(providerDimension, "upfront") - .put("rows", 186L) - .put("index", 192046.1060180664D) - .put("addRowsIndexConstant", 192233.1060180664D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_2) - .put("maxIndex", 1870.06103515625D) - .put("minIndex", 545.9906005859375D) - .build(), + .put(providerDimension, "upfront") + .put("rows", 186L) + .put("index", 192046.1060180664D) + .put("addRowsIndexConstant", 192233.1060180664D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_2) + .put("maxIndex", 1870.06103515625D) + .put("minIndex", 545.9906005859375D) + .build(), ImmutableMap.builder() - .put(providerDimension, "spot") - .put("rows", 837L) - .put("index", 95606.57232284546D) - .put("addRowsIndexConstant", 96444.57232284546D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_9) - .put("maxIndex", 277.2735290527344D) - .put("minIndex", 59.02102279663086D) - .build() + .put(providerDimension, "spot") + .put("rows", 837L) + .put("index", 95606.57232284546D) + .put("addRowsIndexConstant", 96444.57232284546D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_9) + .put("maxIndex", 277.2735290527344D) + .put("minIndex", 59.02102279663086D) + .build() ) ) ) @@ -198,32 +198,32 @@ public class TopNQueryRunnerTest new TopNResultValue( Arrays.>asList( ImmutableMap.builder() - .put(providerDimension, "total_market") - .put("rows", 186L) - .put("index", 215679.82879638672D) - .put("addRowsIndexConstant", 215866.82879638672D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_2) - .put("maxIndex", 1743.9217529296875D) - .put("minIndex", 792.3260498046875D) - .build(), + .put(providerDimension, "total_market") + .put("rows", 186L) + .put("index", 215679.82879638672D) + .put("addRowsIndexConstant", 215866.82879638672D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_2) + .put("maxIndex", 1743.9217529296875D) + .put("minIndex", 792.3260498046875D) + .build(), ImmutableMap.builder() - .put(providerDimension, "upfront") - .put("rows", 186L) - .put("index", 192046.1060180664D) - .put("addRowsIndexConstant", 192233.1060180664D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_2) - .put("maxIndex", 1870.06103515625D) - .put("minIndex", 545.9906005859375D) - .build(), + .put(providerDimension, "upfront") + .put("rows", 186L) + .put("index", 192046.1060180664D) + .put("addRowsIndexConstant", 192233.1060180664D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_2) + .put("maxIndex", 1870.06103515625D) + .put("minIndex", 545.9906005859375D) + .build(), ImmutableMap.builder() - .put(providerDimension, "spot") - .put("rows", 837L) - .put("index", 95606.57232284546D) - .put("addRowsIndexConstant", 96444.57232284546D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_9) - .put("maxIndex", 277.2735290527344D) - .put("minIndex", 59.02102279663086D) - .build() + .put(providerDimension, "spot") + .put("rows", 837L) + .put("index", 95606.57232284546D) + .put("addRowsIndexConstant", 96444.57232284546D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_9) + .put("maxIndex", 277.2735290527344D) + .put("minIndex", 59.02102279663086D) + .build() ) ) ) @@ -263,32 +263,32 @@ public class TopNQueryRunnerTest new TopNResultValue( Arrays.>asList( ImmutableMap.builder() - .put("provider", "spot") - .put("rows", 837L) - .put("index", 95606.57232284546D) - .put("addRowsIndexConstant", 96444.57232284546D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_9) - .put("maxIndex", 277.2735290527344D) - .put("minIndex", 59.02102279663086D) - .build(), + .put("provider", "spot") + .put("rows", 837L) + .put("index", 95606.57232284546D) + .put("addRowsIndexConstant", 96444.57232284546D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_9) + .put("maxIndex", 277.2735290527344D) + .put("minIndex", 59.02102279663086D) + .build(), ImmutableMap.builder() - .put("provider", "total_market") - .put("rows", 186L) - .put("index", 215679.82879638672D) - .put("addRowsIndexConstant", 215866.82879638672D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_2) - .put("maxIndex", 1743.9217529296875D) - .put("minIndex", 792.3260498046875D) - .build(), + .put("provider", "total_market") + .put("rows", 186L) + .put("index", 215679.82879638672D) + .put("addRowsIndexConstant", 215866.82879638672D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_2) + .put("maxIndex", 1743.9217529296875D) + .put("minIndex", 792.3260498046875D) + .build(), ImmutableMap.builder() - .put("provider", "upfront") - .put("rows", 186L) - .put("index", 192046.1060180664D) - .put("addRowsIndexConstant", 192233.1060180664D) - .put("uniques", QueryRunnerTestHelper.UNIQUES_2) - .put("maxIndex", 1870.06103515625D) - .put("minIndex", 545.9906005859375D) - .build() + .put("provider", "upfront") + .put("rows", 186L) + .put("index", 192046.1060180664D) + .put("addRowsIndexConstant", 192233.1060180664D) + .put("uniques", QueryRunnerTestHelper.UNIQUES_2) + .put("maxIndex", 1870.06103515625D) + .put("minIndex", 545.9906005859375D) + .build() ) ) ) @@ -696,18 +696,18 @@ public class TopNQueryRunnerTest public void testTopNWithNonExistentFilterMultiDim() { AndDimFilter andDimFilter = Druids.newAndDimFilterBuilder() - .fields( - Lists.newArrayList( - Druids.newSelectorDimFilterBuilder() - .dimension(providerDimension) - .value("billyblank") - .build(), - Druids.newSelectorDimFilterBuilder() - .dimension(QueryRunnerTestHelper.qualityDimension) - .value("mezzanine") - .build() - ) - ).build(); + .fields( + Lists.newArrayList( + Druids.newSelectorDimFilterBuilder() + .dimension(providerDimension) + .value("billyblank") + .build(), + Druids.newSelectorDimFilterBuilder() + .dimension(QueryRunnerTestHelper.qualityDimension) + .value("mezzanine") + .build() + ) + ).build(); TopNQuery query = new TopNQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) @@ -1077,6 +1077,54 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query)); } + @Test + public void testTopNInvertedLexicographicWithPreviousStop() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension(providerDimension) + .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("upfront"))) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ), + ImmutableMap.of( + providerDimension, "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ), + ImmutableMap.of( + providerDimension, "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + @Test public void testTopNDimExtraction() { @@ -1179,7 +1227,8 @@ public class TopNQueryRunnerTest } @Test - public void testTopNDependentPostAgg() { + public void testTopNDependentPostAgg() + { TopNQuery query = new TopNQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) From b9015cee34650ea928cfb6a8f7a700f3ac53dfc2 Mon Sep 17 00:00:00 2001 From: fjy Date: Tue, 27 May 2014 17:25:00 -0700 Subject: [PATCH 2/6] fix brokeness with dim extraction dim specs --- .../query/dimension/DefaultDimensionSpec.java | 6 ++++++ .../druid/query/dimension/DimensionSpec.java | 5 +++++ .../dimension/ExtractionDimensionSpec.java | 6 ++++++ .../query/extraction/DimExtractionFn.java | 5 ++++- .../extraction/JavascriptDimExtractionFn.java | 6 ++++++ .../extraction/PartialDimExtractionFn.java | 6 ++++++ .../extraction/RegexDimExtractionFn.java | 6 ++++++ .../SearchQuerySpecDimExtractionFn.java | 6 ++++++ .../query/extraction/TimeDimExtractionFn.java | 6 ++++++ .../AggregateTopNMetricFirstAlgorithm.java | 7 ++++++- .../topn/DimExtractionTopNAlgorithm.java | 19 +++++++++++++++---- .../druid/query/topn/PooledTopNAlgorithm.java | 4 ++++ .../java/io/druid/query/topn/TopNParams.java | 7 ++++++- 13 files changed, 82 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java index 8e18ce61228..04169337357 100644 --- a/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java @@ -76,6 +76,12 @@ public class DefaultDimensionSpec implements DimensionSpec .array(); } + @Override + public boolean canTransformValues() + { + return false; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java index 3c552e20763..5e70fd690aa 100644 --- a/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java @@ -33,7 +33,12 @@ import io.druid.query.extraction.DimExtractionFn; public interface DimensionSpec { public String getDimension(); + public String getOutputName(); + public DimExtractionFn getDimExtractionFn(); + public byte[] getCacheKey(); + + public boolean canTransformValues(); } diff --git a/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java index 9fe480e396d..a2f00439a36 100644 --- a/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java @@ -83,6 +83,12 @@ public class ExtractionDimensionSpec implements DimensionSpec .array(); } + @Override + public boolean canTransformValues() + { + return dimExtractionFn.canTransformValues(); + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java index 0509c92714a..17fcab06b00 100644 --- a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java @@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; /** */ -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property="type") +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") @JsonSubTypes(value = { @JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class), @JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class), @@ -35,5 +35,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; public interface DimExtractionFn { public byte[] getCacheKey(); + public String apply(String dimValue); + + public boolean canTransformValues(); } diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java index f552ef4e9cd..b47bacb89b3 100644 --- a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java @@ -92,4 +92,10 @@ public class JavascriptDimExtractionFn implements DimExtractionFn { return fn.apply(dimValue); } + + @Override + public boolean canTransformValues() + { + return true; + } } diff --git a/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java index 5ae4e2bf019..513634534c4 100644 --- a/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java @@ -67,6 +67,12 @@ public class PartialDimExtractionFn implements DimExtractionFn return expr; } + @Override + public boolean canTransformValues() + { + return false; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java index c25d0ba6c7a..5df623eb44d 100644 --- a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java @@ -67,6 +67,12 @@ public class RegexDimExtractionFn implements DimExtractionFn return expr; } + @Override + public boolean canTransformValues() + { + return true; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java index 02c13eca630..f3e9774dc3c 100644 --- a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java @@ -63,6 +63,12 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn return searchQuerySpec.accept(dimValue) ? dimValue : null; } + @Override + public boolean canTransformValues() + { + return false; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java index 57b7f6617d8..1cc1ec9a12f 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java @@ -87,6 +87,12 @@ public class TimeDimExtractionFn implements DimExtractionFn return resultFormat; } + @Override + public boolean canTransformValues() + { + return true; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java index 254d13d581b..51f8d01c28e 100644 --- a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java @@ -62,7 +62,12 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm arrayProvider = params.getArrayProvider(); + if (query.getDimensionSpec().canTransformValues()) { + return arrayProvider.build(); + } + arrayProvider.ignoreFirstN(numProcessed); arrayProvider.keepOnlyN(numToProcess); return query.getTopNMetricSpec().configureOptimizer(arrayProvider).build(); diff --git a/processing/src/main/java/io/druid/query/topn/TopNParams.java b/processing/src/main/java/io/druid/query/topn/TopNParams.java index 8ccc85da284..fbeab73792e 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNParams.java +++ b/processing/src/main/java/io/druid/query/topn/TopNParams.java @@ -31,7 +31,12 @@ public class TopNParams private final int cardinality; private final int numValuesPerPass; - protected TopNParams(DimensionSelector dimSelector, Cursor cursor, int cardinality, int numValuesPerPass) + protected TopNParams( + DimensionSelector dimSelector, + Cursor cursor, + int cardinality, + int numValuesPerPass + ) { this.dimSelector = dimSelector; this.cursor = cursor; From 9f62589d71c21ebd6b2762d6ac10dbfd9b9f76e7 Mon Sep 17 00:00:00 2001 From: fjy Date: Tue, 27 May 2014 19:39:55 -0700 Subject: [PATCH 3/6] fix bug with previousStop not being respected for inverse lexi topN --- .../AggregateTopNMetricFirstAlgorithm.java | 4 -- .../druid/query/topn/BaseTopNAlgorithm.java | 3 +- .../query/topn/InvertedTopNMetricSpec.java | 6 +-- .../topn/LexicographicTopNMetricSpec.java | 6 +-- .../query/topn/NumericTopNMetricSpec.java | 3 +- .../io/druid/query/topn/TopNBinaryFn.java | 3 +- .../topn/TopNLexicographicResultBuilder.java | 9 ++--- .../io/druid/query/topn/TopNMetricSpec.java | 3 +- .../druid/query/topn/TopNQueryRunnerTest.java | 40 +++++++++++++++++-- 9 files changed, 49 insertions(+), 28 deletions(-) diff --git a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java index 51f8d01c28e..cd9593192a6 100644 --- a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java @@ -31,7 +31,6 @@ import io.druid.segment.DimensionSelector; import java.nio.ByteBuffer; import java.util.Arrays; -import java.util.Comparator; import java.util.Iterator; import java.util.List; @@ -41,7 +40,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm comparator; private final StupidPool bufferPool; public AggregateTopNMetricFirstAlgorithm( @@ -52,8 +50,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm aggFactories, - List postAggs, - boolean optimizeResultStorage + List postAggs ) { return delegate.getResultBuilder( @@ -87,8 +86,7 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec threshold, comparator, aggFactories, - postAggs, - canBeOptimizedUnordered() + postAggs ); } diff --git a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java index 0cad74eb0bb..44258273980 100644 --- a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java @@ -82,8 +82,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec int threshold, Comparator comparator, List aggFactories, - List postAggs, - boolean optimizeResultStorage + List postAggs ) { return new TopNLexicographicResultBuilder( @@ -92,8 +91,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec threshold, previousStop, comparator, - aggFactories, - optimizeResultStorage + aggFactories ); } diff --git a/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java index b88695b61eb..ad0a80fe719 100644 --- a/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java @@ -123,8 +123,7 @@ public class NumericTopNMetricSpec implements TopNMetricSpec int threshold, Comparator comparator, List aggFactories, - List postAggs, - boolean optimizeResultStorage + List postAggs ) { return new TopNNumericResultBuilder(timestamp, dimSpec, metric, threshold, comparator, aggFactories, postAggs); diff --git a/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java b/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java index fa43a0e198c..4c02da447aa 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java @@ -129,8 +129,7 @@ public class TopNBinaryFn implements BinaryFn, Result aggFactories; - private final boolean optimizeResultStorage; private MinMaxPriorityQueue pQueue = null; public TopNLexicographicResultBuilder( @@ -49,15 +49,14 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder int threshold, String previousStop, final Comparator comparator, - List aggFactories, - boolean optimizeResultStorage + List aggFactories ) { this.timestamp = timestamp; this.dimSpec = dimSpec; this.previousStop = previousStop; + this.comparator = comparator; this.aggFactories = aggFactories; - this.optimizeResultStorage = optimizeResultStorage; instantiatePQueue(threshold, comparator); } @@ -71,7 +70,7 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder { Map metricValues = Maps.newLinkedHashMap(); - if (!optimizeResultStorage || dimName.compareTo(previousStop) > 0) { + if (comparator.compare(dimName, previousStop) > 0) { metricValues.put(dimSpec.getOutputName(), dimName); Iterator aggsIter = aggFactories.iterator(); for (Object metricVal : metricVals) { diff --git a/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java index f7d0718dce9..25e710d1cff 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java @@ -50,8 +50,7 @@ public interface TopNMetricSpec int threshold, Comparator comparator, List aggFactories, - List postAggs, - boolean optimizeResultStorage + List postAggs ); public byte[] getCacheKey(); diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index 8d57839fdf8..6051381432a 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -1097,12 +1097,46 @@ public class TopNQueryRunnerTest new TopNResultValue( Arrays.>asList( ImmutableMap.of( - providerDimension, "upfront", + providerDimension, "total_market", "rows", 4L, - "index", 4875.669677734375D, - "addRowsIndexConstant", 4880.669677734375D, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, "uniques", QueryRunnerTestHelper.UNIQUES_2 ), + ImmutableMap.of( + providerDimension, "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNInvertedLexicographicWithNonExistingPreviousStop() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension(providerDimension) + .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u"))) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( ImmutableMap.of( providerDimension, "total_market", "rows", 4L, From 8bcf4bc60e13a3e748b587cf3b84d510226b5b0a Mon Sep 17 00:00:00 2001 From: fjy Date: Tue, 27 May 2014 19:47:13 -0700 Subject: [PATCH 4/6] address code review comments --- .../query/dimension/DefaultDimensionSpec.java | 2 +- .../druid/query/dimension/DimensionSpec.java | 2 +- .../dimension/ExtractionDimensionSpec.java | 4 +- .../query/extraction/DimExtractionFn.java | 2 +- .../extraction/JavascriptDimExtractionFn.java | 2 +- .../extraction/PartialDimExtractionFn.java | 2 +- .../extraction/RegexDimExtractionFn.java | 2 +- .../SearchQuerySpecDimExtractionFn.java | 2 +- .../query/extraction/TimeDimExtractionFn.java | 2 +- .../topn/DimExtractionTopNAlgorithm.java | 2 +- .../druid/query/topn/PooledTopNAlgorithm.java | 2 +- .../druid/query/topn/TopNQueryRunnerTest.java | 143 ++++++++++++++++++ 12 files changed, 155 insertions(+), 12 deletions(-) diff --git a/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java index 04169337357..78fb2b4d162 100644 --- a/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java @@ -77,7 +77,7 @@ public class DefaultDimensionSpec implements DimensionSpec } @Override - public boolean canTransformValues() + public boolean preservesOrdering() { return false; } diff --git a/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java index 5e70fd690aa..b8cbc58dfc5 100644 --- a/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java @@ -40,5 +40,5 @@ public interface DimensionSpec public byte[] getCacheKey(); - public boolean canTransformValues(); + public boolean preservesOrdering(); } diff --git a/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java index a2f00439a36..878de5bbad5 100644 --- a/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java @@ -84,9 +84,9 @@ public class ExtractionDimensionSpec implements DimensionSpec } @Override - public boolean canTransformValues() + public boolean preservesOrdering() { - return dimExtractionFn.canTransformValues(); + return dimExtractionFn.preservesOrdering(); } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java index 17fcab06b00..638204b76f8 100644 --- a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java @@ -38,5 +38,5 @@ public interface DimExtractionFn public String apply(String dimValue); - public boolean canTransformValues(); + public boolean preservesOrdering(); } diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java index b47bacb89b3..afdc86d01c3 100644 --- a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java @@ -94,7 +94,7 @@ public class JavascriptDimExtractionFn implements DimExtractionFn } @Override - public boolean canTransformValues() + public boolean preservesOrdering() { return true; } diff --git a/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java index 513634534c4..2ff14633aee 100644 --- a/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java @@ -68,7 +68,7 @@ public class PartialDimExtractionFn implements DimExtractionFn } @Override - public boolean canTransformValues() + public boolean preservesOrdering() { return false; } diff --git a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java index 5df623eb44d..95576b73f97 100644 --- a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java @@ -68,7 +68,7 @@ public class RegexDimExtractionFn implements DimExtractionFn } @Override - public boolean canTransformValues() + public boolean preservesOrdering() { return true; } diff --git a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java index f3e9774dc3c..0fecf90a4d3 100644 --- a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java @@ -64,7 +64,7 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn } @Override - public boolean canTransformValues() + public boolean preservesOrdering() { return false; } diff --git a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java index 1cc1ec9a12f..6dc57f3f6ec 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java @@ -88,7 +88,7 @@ public class TimeDimExtractionFn implements DimExtractionFn } @Override - public boolean canTransformValues() + public boolean preservesOrdering() { return true; } diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index d6e7dc6aa62..f1310e51098 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -65,7 +65,7 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm arrayProvider = params.getArrayProvider(); - if (query.getDimensionSpec().canTransformValues()) { + if (query.getDimensionSpec().preservesOrdering()) { return arrayProvider.build(); } diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index 6051381432a..efc7785effe 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -1211,6 +1211,149 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query)); } + @Test + public void testTopNLexicographicDimExtraction() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new RegexDimExtractionFn("(.)") + ) + ) + .metric(new LexicographicTopNMetricSpec(null)) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "s", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ), + ImmutableMap.of( + providerDimension, "t", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ), + ImmutableMap.of( + providerDimension, "u", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNLexicographicDimExtractionWithPreviousStop() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new RegexDimExtractionFn("(.)") + ) + ) + .metric(new LexicographicTopNMetricSpec("spot")) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "t", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ), + ImmutableMap.of( + providerDimension, "u", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + + @Test + public void testInvertedTopNLexicographicDimExtractionWithPreviousStop() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new RegexDimExtractionFn("(.)") + ) + ) + .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u"))) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "t", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ), + ImmutableMap.of( + providerDimension, "s", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + @Test public void testInvertedTopNQuery() { From 2ee1defce937d736054cd6935b1906f5f93f8d1a Mon Sep 17 00:00:00 2001 From: fjy Date: Wed, 28 May 2014 10:27:00 -0700 Subject: [PATCH 5/6] better handle dim extraction regex cases --- .../extraction/JavascriptDimExtractionFn.java | 2 +- .../extraction/PartialDimExtractionFn.java | 2 +- .../extraction/RegexDimExtractionFn.java | 2 +- .../SearchQuerySpecDimExtractionFn.java | 2 +- .../query/extraction/TimeDimExtractionFn.java | 2 +- .../topn/DimExtractionTopNAlgorithm.java | 4 +- .../query/topn/InvertedTopNMetricSpec.java | 23 ++++- .../topn/LexicographicTopNMetricSpec.java | 11 ++- .../druid/query/topn/TopNQueryRunnerTest.java | 97 +++++++++++++++++++ 9 files changed, 134 insertions(+), 11 deletions(-) diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java index afdc86d01c3..d61d3b26b41 100644 --- a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java @@ -96,6 +96,6 @@ public class JavascriptDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return true; + return false; } } diff --git a/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java index 2ff14633aee..8b6a3b58017 100644 --- a/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java @@ -70,7 +70,7 @@ public class PartialDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return false; + return true; } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java index 95576b73f97..50a418afed3 100644 --- a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java @@ -70,7 +70,7 @@ public class RegexDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return true; + return false; } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java index 0fecf90a4d3..b7579ea1399 100644 --- a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java @@ -66,7 +66,7 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return false; + return true; } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java index 6dc57f3f6ec..215f8c32aee 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java @@ -90,7 +90,7 @@ public class TimeDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return true; + return false; } @Override diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index f1310e51098..54ee3c5e553 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -65,9 +65,11 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm aggregatorSpecs, - List postAggregatorSpecs + final List aggregatorSpecs, + final List postAggregatorSpecs ) { - return Comparators.inverse(delegate.getComparator(aggregatorSpecs, postAggregatorSpecs)); + return Comparators.inverse( + new Comparator() + { + @Override + public int compare(Object o1, Object o2) + { + // nulls last + if (o1 == null) { + return 1; + } + if (o2 == null) { + return -1; + } + return delegate.getComparator(aggregatorSpecs, postAggregatorSpecs).compare(o1, o2); + } + } + ); } @Override diff --git a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java index 44258273980..a23dd2f186f 100644 --- a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java @@ -43,6 +43,13 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec @Override public int compare(String s, String s2) { + // null first + if (s == null) { + return -1; + } + if (s2 == null) { + return 1; + } return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8)); } }; @@ -54,7 +61,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec @JsonProperty("previousStop") String previousStop ) { - this.previousStop = (previousStop == null) ? "" : previousStop; + this.previousStop = previousStop; } @Override @@ -98,7 +105,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec @Override public byte[] getCacheKey() { - byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8); + byte[] previousStopBytes = previousStop == null ? new byte[]{} : previousStop.getBytes(Charsets.UTF_8); return ByteBuffer.allocate(1 + previousStopBytes.length) .put(CACHE_TYPE_ID) diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index efc7785effe..22b750faf00 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -1263,6 +1263,58 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query)); } + @Test + public void testInvertedTopNLexicographicDimExtraction2() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new RegexDimExtractionFn("..(.)") + ) + ) + .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null))) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "t", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ), + ImmutableMap.of( + providerDimension, "o", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ), + ImmutableMap.of( + providerDimension, "f", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + @Test public void testTopNLexicographicDimExtractionWithPreviousStop() { @@ -1354,6 +1406,51 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query)); } + @Test + public void testInvertedTopNLexicographicDimExtractionWithPreviousStop2() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new RegexDimExtractionFn("..(.)") + ) + ) + .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p"))) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "o", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ), + ImmutableMap.of( + providerDimension, "f", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + @Test public void testInvertedTopNQuery() { From 642897dca3aabcd38f0aec1047ba593599cf03da Mon Sep 17 00:00:00 2001 From: fjy Date: Wed, 28 May 2014 10:43:03 -0700 Subject: [PATCH 6/6] fix inverted logic --- .../java/io/druid/query/dimension/DefaultDimensionSpec.java | 2 +- .../src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java index 78fb2b4d162..ee5ee674293 100644 --- a/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java @@ -79,7 +79,7 @@ public class DefaultDimensionSpec implements DimensionSpec @Override public boolean preservesOrdering() { - return false; + return true; } @Override diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index 437b056da6c..ea4fd37547d 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -116,7 +116,7 @@ public class PooledTopNAlgorithm { final TopNMetricSpecBuilder arrayProvider = params.getArrayProvider(); - if (query.getDimensionSpec().preservesOrdering()) { + if (!query.getDimensionSpec().preservesOrdering()) { return arrayProvider.build(); }