From 2ee1defce937d736054cd6935b1906f5f93f8d1a Mon Sep 17 00:00:00 2001 From: fjy Date: Wed, 28 May 2014 10:27:00 -0700 Subject: [PATCH] better handle dim extraction regex cases --- .../extraction/JavascriptDimExtractionFn.java | 2 +- .../extraction/PartialDimExtractionFn.java | 2 +- .../extraction/RegexDimExtractionFn.java | 2 +- .../SearchQuerySpecDimExtractionFn.java | 2 +- .../query/extraction/TimeDimExtractionFn.java | 2 +- .../topn/DimExtractionTopNAlgorithm.java | 4 +- .../query/topn/InvertedTopNMetricSpec.java | 23 ++++- .../topn/LexicographicTopNMetricSpec.java | 11 ++- .../druid/query/topn/TopNQueryRunnerTest.java | 97 +++++++++++++++++++ 9 files changed, 134 insertions(+), 11 deletions(-) diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java index afdc86d01c3..d61d3b26b41 100644 --- a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java @@ -96,6 +96,6 @@ public class JavascriptDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return true; + return false; } } diff --git a/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java index 2ff14633aee..8b6a3b58017 100644 --- a/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/PartialDimExtractionFn.java @@ -70,7 +70,7 @@ public class PartialDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return false; + return true; } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java index 95576b73f97..50a418afed3 100644 --- a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java @@ -70,7 +70,7 @@ public class RegexDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return true; + return false; } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java index 0fecf90a4d3..b7579ea1399 100644 --- a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java @@ -66,7 +66,7 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return false; + return true; } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java index 6dc57f3f6ec..215f8c32aee 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java @@ -90,7 +90,7 @@ public class TimeDimExtractionFn implements DimExtractionFn @Override public boolean preservesOrdering() { - return true; + return false; } @Override diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index f1310e51098..54ee3c5e553 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -65,9 +65,11 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm aggregatorSpecs, - List postAggregatorSpecs + final List aggregatorSpecs, + final List postAggregatorSpecs ) { - return Comparators.inverse(delegate.getComparator(aggregatorSpecs, postAggregatorSpecs)); + return Comparators.inverse( + new Comparator() + { + @Override + public int compare(Object o1, Object o2) + { + // nulls last + if (o1 == null) { + return 1; + } + if (o2 == null) { + return -1; + } + return delegate.getComparator(aggregatorSpecs, postAggregatorSpecs).compare(o1, o2); + } + } + ); } @Override diff --git a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java index 44258273980..a23dd2f186f 100644 --- a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java @@ -43,6 +43,13 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec @Override public int compare(String s, String s2) { + // null first + if (s == null) { + return -1; + } + if (s2 == null) { + return 1; + } return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8)); } }; @@ -54,7 +61,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec @JsonProperty("previousStop") String previousStop ) { - this.previousStop = (previousStop == null) ? "" : previousStop; + this.previousStop = previousStop; } @Override @@ -98,7 +105,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec @Override public byte[] getCacheKey() { - byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8); + byte[] previousStopBytes = previousStop == null ? new byte[]{} : previousStop.getBytes(Charsets.UTF_8); return ByteBuffer.allocate(1 + previousStopBytes.length) .put(CACHE_TYPE_ID) diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index efc7785effe..22b750faf00 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -1263,6 +1263,58 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query)); } + @Test + public void testInvertedTopNLexicographicDimExtraction2() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new RegexDimExtractionFn("..(.)") + ) + ) + .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null))) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "t", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ), + ImmutableMap.of( + providerDimension, "o", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ), + ImmutableMap.of( + providerDimension, "f", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + @Test public void testTopNLexicographicDimExtractionWithPreviousStop() { @@ -1354,6 +1406,51 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query)); } + @Test + public void testInvertedTopNLexicographicDimExtractionWithPreviousStop2() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new RegexDimExtractionFn("..(.)") + ) + ) + .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p"))) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "o", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ), + ImmutableMap.of( + providerDimension, "f", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + @Test public void testInvertedTopNQuery() {