diff --git a/pom.xml b/pom.xml index 31db3effb44..01730de96bf 100644 --- a/pom.xml +++ b/pom.xml @@ -41,7 +41,7 @@ UTF-8 0.25.5 2.4.0 - 0.1.11 + 0.1.12 diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 09535a84678..8aeed190353 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -96,10 +96,7 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm ); } } - ); } @@ -330,7 +329,7 @@ public class IncrementalIndex implements Iterable int count = 0; for (String dimValue : dimValues) { String canonicalDimValue = dimLookup.get(dimValue); - if (canonicalDimValue == null) { + if (canonicalDimValue == null && !dimLookup.contains(dimValue)) { canonicalDimValue = dimValue; dimLookup.add(dimValue); } @@ -560,7 +559,17 @@ public class IncrementalIndex implements Iterable int valsIndex = 0; while (retVal == 0 && valsIndex < lhsVals.length) { - retVal = lhsVals[valsIndex].compareTo(rhsVals[valsIndex]); + final String lhsVal = lhsVals[valsIndex]; + final String rhsVal = rhsVals[valsIndex]; + if (lhsVal == null && rhsVal == null) { + return 0; + } else if (lhsVal == null) { + return -1; + } else if (rhsVal == null) { + return 1; + } else { + retVal = lhsVal.compareTo(rhsVal); + } ++valsIndex; } ++index; @@ -576,16 +585,16 @@ public class IncrementalIndex implements Iterable "timestamp=" + new DateTime(timestamp) + ", dims=" + Lists.transform( Arrays.asList(dims), new Function() - { - @Override - public Object apply(@Nullable String[] input) - { - if (input == null || input.length == 0) { - return Arrays.asList("null"); + { + @Override + public Object apply(@Nullable String[] input) + { + if (input == null || input.length == 0) { + return Arrays.asList("null"); + } + return Arrays.asList(input); + } } - return Arrays.asList(input); - } - } ) + '}'; } @@ -593,6 +602,7 @@ public class IncrementalIndex implements Iterable static class DimDim { + public static final String NULL_STRING = "\u0000"; private final Map poorMansInterning = Maps.newConcurrentMap(); private final Map falseIds; private final Map falseIdsReverse; @@ -605,19 +615,32 @@ public class IncrementalIndex implements Iterable falseIdsReverse = biMap.inverse(); } - public String get(String value) + public boolean contains(@Nullable String value) { - return value == null ? null : poorMansInterning.get(value); + return poorMansInterning.containsKey(value == null ? NULL_STRING : value); } - public int getId(String value) + public String get(@Nullable String value) { - return falseIds.get(value); + final String retVal; + if (value == null) { + retVal = poorMansInterning.get(NULL_STRING); + } else { + retVal = poorMansInterning.get(value); + } + return retVal == null ? null : (retVal.equals(NULL_STRING) ? null : retVal); } + public int getId(@Nullable String value) + { + return value == null ? falseIds.get(NULL_STRING) : falseIds.get(value); + } + + @Nullable public String getValue(int id) { - return falseIdsReverse.get(id); + final String value = falseIdsReverse.get(id); + return value.equals(NULL_STRING) ? null : value; } public int size() @@ -625,27 +648,30 @@ public class IncrementalIndex implements Iterable return poorMansInterning.size(); } - public Set keySet() - { - return poorMansInterning.keySet(); - } - - public synchronized void add(String value) + public synchronized void add(@Nullable String value) { + if (value == null) { + value = NULL_STRING; + } poorMansInterning.put(value, value); falseIds.put(value, falseIds.size()); } - public int getSortedId(String value) + public int getSortedId(@Nullable String value) { assertSorted(); + if (value == null) { + value = NULL_STRING; + } return Arrays.binarySearch(sortedVals, value); } + @Nullable public String getSortedValue(int index) { assertSorted(); - return sortedVals[index]; + final String sortedVal = sortedVals[index]; + return sortedVal.equals(NULL_STRING) ? null : sortedVal; } public void sort() diff --git a/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java b/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java index ca98d740dc3..08bd230e9ae 100644 --- a/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java +++ b/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java @@ -189,7 +189,7 @@ public class SpatialDimensionRowFormatter return false; } for (String dimVal : dimVals) { - if (Floats.tryParse(dimVal) == null) { + if (dimVal == null || Floats.tryParse(dimVal) == null) { return false; } } diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 97e64a0ec0c..45595da48b4 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -47,6 +47,7 @@ import io.druid.query.aggregation.MaxAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; +import io.druid.query.extraction.DimExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.RegexDimFilter; @@ -210,9 +211,11 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } + @Test public void testGroupByWithDimExtractionFn() { + final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})"); GroupByQuery query = GroupByQuery .builder() .setDataSource(QueryRunnerTestHelper.dataSource) @@ -222,7 +225,20 @@ public class GroupByQueryRunnerTest new ExtractionDimensionSpec( "quality", "alias", - new RegexDimExtractionFn("(\\w{1})") + new DimExtractionFn() + { + @Override + public byte[] getCacheKey() + { + return new byte[]{(byte)0xFF}; + } + + @Override + public String apply(String dimValue) + { + return dimValue.equals("mezzanine") ? null : fn1.apply(dimValue); + } + } ) ) ) @@ -236,20 +252,20 @@ public class GroupByQueryRunnerTest .build(); List expectedResults = Arrays.asList( + createExpectedRow("2011-04-01", "alias", null, "rows", 3L, "idx", 2870L), createExpectedRow("2011-04-01", "alias", "a", "rows", 1L, "idx", 135L), createExpectedRow("2011-04-01", "alias", "b", "rows", 1L, "idx", 118L), createExpectedRow("2011-04-01", "alias", "e", "rows", 1L, "idx", 158L), createExpectedRow("2011-04-01", "alias", "h", "rows", 1L, "idx", 120L), - createExpectedRow("2011-04-01", "alias", "m", "rows", 3L, "idx", 2870L), createExpectedRow("2011-04-01", "alias", "n", "rows", 1L, "idx", 121L), createExpectedRow("2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L), createExpectedRow("2011-04-01", "alias", "t", "rows", 2L, "idx", 197L), + createExpectedRow("2011-04-02", "alias", null, "rows", 3L, "idx", 2447L), createExpectedRow("2011-04-02", "alias", "a", "rows", 1L, "idx", 147L), createExpectedRow("2011-04-02", "alias", "b", "rows", 1L, "idx", 112L), createExpectedRow("2011-04-02", "alias", "e", "rows", 1L, "idx", 166L), createExpectedRow("2011-04-02", "alias", "h", "rows", 1L, "idx", 113L), - createExpectedRow("2011-04-02", "alias", "m", "rows", 3L, "idx", 2447L), createExpectedRow("2011-04-02", "alias", "n", "rows", 1L, "idx", 114L), createExpectedRow("2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L), createExpectedRow("2011-04-02", "alias", "t", "rows", 2L, "idx", 223L) diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index c8628bfdca5..c4f1e9220ef 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -23,6 +23,7 @@ import com.google.common.base.Supplier; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.metamx.common.guava.Sequences; import io.druid.collections.StupidPool; import io.druid.query.BySegmentResultValueClass; @@ -36,6 +37,7 @@ import io.druid.query.aggregation.MaxAggregatorFactory; import io.druid.query.aggregation.MinAggregatorFactory; import io.druid.query.aggregation.PostAggregator; import io.druid.query.dimension.ExtractionDimensionSpec; +import io.druid.query.extraction.DimExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.DimFilter; @@ -52,6 +54,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -1129,6 +1132,74 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query)); } + @Test + public void testTopNDimExtractionNulls() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new DimExtractionFn() + { + @Override + public byte[] getCacheKey() + { + return new byte[]{(byte)0xFF}; + } + + @Override + public String apply(String dimValue) + { + return dimValue.equals("total_market") ? null : dimValue; + } + }) + ) + .metric("rows") + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + Map nullValue = Maps.newHashMap(); + nullValue.put(providerDimension, null); + nullValue.putAll(ImmutableMap.of( + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + )); + + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ), + nullValue + , + ImmutableMap.of( + providerDimension, "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + @Test public void testInvertedTopNQuery() {