diff --git a/pom.xml b/pom.xml
index 31db3effb44..01730de96bf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -41,7 +41,7 @@
UTF-8
0.25.5
2.4.0
- 0.1.11
+ 0.1.12
diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java
index 09535a84678..8aeed190353 100644
--- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java
+++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java
@@ -96,10 +96,7 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm
);
}
}
-
);
}
@@ -330,7 +329,7 @@ public class IncrementalIndex implements Iterable
int count = 0;
for (String dimValue : dimValues) {
String canonicalDimValue = dimLookup.get(dimValue);
- if (canonicalDimValue == null) {
+ if (canonicalDimValue == null && !dimLookup.contains(dimValue)) {
canonicalDimValue = dimValue;
dimLookup.add(dimValue);
}
@@ -560,7 +559,17 @@ public class IncrementalIndex implements Iterable
int valsIndex = 0;
while (retVal == 0 && valsIndex < lhsVals.length) {
- retVal = lhsVals[valsIndex].compareTo(rhsVals[valsIndex]);
+ final String lhsVal = lhsVals[valsIndex];
+ final String rhsVal = rhsVals[valsIndex];
+ if (lhsVal == null && rhsVal == null) {
+ return 0;
+ } else if (lhsVal == null) {
+ return -1;
+ } else if (rhsVal == null) {
+ return 1;
+ } else {
+ retVal = lhsVal.compareTo(rhsVal);
+ }
++valsIndex;
}
++index;
@@ -576,16 +585,16 @@ public class IncrementalIndex implements Iterable
"timestamp=" + new DateTime(timestamp) +
", dims=" + Lists.transform(
Arrays.asList(dims), new Function()
- {
- @Override
- public Object apply(@Nullable String[] input)
- {
- if (input == null || input.length == 0) {
- return Arrays.asList("null");
+ {
+ @Override
+ public Object apply(@Nullable String[] input)
+ {
+ if (input == null || input.length == 0) {
+ return Arrays.asList("null");
+ }
+ return Arrays.asList(input);
+ }
}
- return Arrays.asList(input);
- }
- }
) +
'}';
}
@@ -593,6 +602,7 @@ public class IncrementalIndex implements Iterable
static class DimDim
{
+ public static final String NULL_STRING = "\u0000";
private final Map poorMansInterning = Maps.newConcurrentMap();
private final Map falseIds;
private final Map falseIdsReverse;
@@ -605,19 +615,32 @@ public class IncrementalIndex implements Iterable
falseIdsReverse = biMap.inverse();
}
- public String get(String value)
+ public boolean contains(@Nullable String value)
{
- return value == null ? null : poorMansInterning.get(value);
+ return poorMansInterning.containsKey(value == null ? NULL_STRING : value);
}
- public int getId(String value)
+ public String get(@Nullable String value)
{
- return falseIds.get(value);
+ final String retVal;
+ if (value == null) {
+ retVal = poorMansInterning.get(NULL_STRING);
+ } else {
+ retVal = poorMansInterning.get(value);
+ }
+ return retVal == null ? null : (retVal.equals(NULL_STRING) ? null : retVal);
}
+ public int getId(@Nullable String value)
+ {
+ return value == null ? falseIds.get(NULL_STRING) : falseIds.get(value);
+ }
+
+ @Nullable
public String getValue(int id)
{
- return falseIdsReverse.get(id);
+ final String value = falseIdsReverse.get(id);
+ return value.equals(NULL_STRING) ? null : value;
}
public int size()
@@ -625,27 +648,30 @@ public class IncrementalIndex implements Iterable
return poorMansInterning.size();
}
- public Set keySet()
- {
- return poorMansInterning.keySet();
- }
-
- public synchronized void add(String value)
+ public synchronized void add(@Nullable String value)
{
+ if (value == null) {
+ value = NULL_STRING;
+ }
poorMansInterning.put(value, value);
falseIds.put(value, falseIds.size());
}
- public int getSortedId(String value)
+ public int getSortedId(@Nullable String value)
{
assertSorted();
+ if (value == null) {
+ value = NULL_STRING;
+ }
return Arrays.binarySearch(sortedVals, value);
}
+ @Nullable
public String getSortedValue(int index)
{
assertSorted();
- return sortedVals[index];
+ final String sortedVal = sortedVals[index];
+ return sortedVal.equals(NULL_STRING) ? null : sortedVal;
}
public void sort()
diff --git a/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java b/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java
index ca98d740dc3..08bd230e9ae 100644
--- a/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java
+++ b/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java
@@ -189,7 +189,7 @@ public class SpatialDimensionRowFormatter
return false;
}
for (String dimVal : dimVals) {
- if (Floats.tryParse(dimVal) == null) {
+ if (dimVal == null || Floats.tryParse(dimVal) == null) {
return false;
}
}
diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java
index 97e64a0ec0c..45595da48b4 100644
--- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java
+++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java
@@ -47,6 +47,7 @@ import io.druid.query.aggregation.MaxAggregatorFactory;
import io.druid.query.dimension.DefaultDimensionSpec;
import io.druid.query.dimension.DimensionSpec;
import io.druid.query.dimension.ExtractionDimensionSpec;
+import io.druid.query.extraction.DimExtractionFn;
import io.druid.query.extraction.RegexDimExtractionFn;
import io.druid.query.filter.JavaScriptDimFilter;
import io.druid.query.filter.RegexDimFilter;
@@ -210,9 +211,11 @@ public class GroupByQueryRunnerTest
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
+
@Test
public void testGroupByWithDimExtractionFn()
{
+ final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})");
GroupByQuery query = GroupByQuery
.builder()
.setDataSource(QueryRunnerTestHelper.dataSource)
@@ -222,7 +225,20 @@ public class GroupByQueryRunnerTest
new ExtractionDimensionSpec(
"quality",
"alias",
- new RegexDimExtractionFn("(\\w{1})")
+ new DimExtractionFn()
+ {
+ @Override
+ public byte[] getCacheKey()
+ {
+ return new byte[]{(byte)0xFF};
+ }
+
+ @Override
+ public String apply(String dimValue)
+ {
+ return dimValue.equals("mezzanine") ? null : fn1.apply(dimValue);
+ }
+ }
)
)
)
@@ -236,20 +252,20 @@ public class GroupByQueryRunnerTest
.build();
List expectedResults = Arrays.asList(
+ createExpectedRow("2011-04-01", "alias", null, "rows", 3L, "idx", 2870L),
createExpectedRow("2011-04-01", "alias", "a", "rows", 1L, "idx", 135L),
createExpectedRow("2011-04-01", "alias", "b", "rows", 1L, "idx", 118L),
createExpectedRow("2011-04-01", "alias", "e", "rows", 1L, "idx", 158L),
createExpectedRow("2011-04-01", "alias", "h", "rows", 1L, "idx", 120L),
- createExpectedRow("2011-04-01", "alias", "m", "rows", 3L, "idx", 2870L),
createExpectedRow("2011-04-01", "alias", "n", "rows", 1L, "idx", 121L),
createExpectedRow("2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L),
createExpectedRow("2011-04-01", "alias", "t", "rows", 2L, "idx", 197L),
+ createExpectedRow("2011-04-02", "alias", null, "rows", 3L, "idx", 2447L),
createExpectedRow("2011-04-02", "alias", "a", "rows", 1L, "idx", 147L),
createExpectedRow("2011-04-02", "alias", "b", "rows", 1L, "idx", 112L),
createExpectedRow("2011-04-02", "alias", "e", "rows", 1L, "idx", 166L),
createExpectedRow("2011-04-02", "alias", "h", "rows", 1L, "idx", 113L),
- createExpectedRow("2011-04-02", "alias", "m", "rows", 3L, "idx", 2447L),
createExpectedRow("2011-04-02", "alias", "n", "rows", 1L, "idx", 114L),
createExpectedRow("2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L),
createExpectedRow("2011-04-02", "alias", "t", "rows", 2L, "idx", 223L)
diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java
index c8628bfdca5..c4f1e9220ef 100644
--- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java
+++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java
@@ -23,6 +23,7 @@ import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import com.metamx.common.guava.Sequences;
import io.druid.collections.StupidPool;
import io.druid.query.BySegmentResultValueClass;
@@ -36,6 +37,7 @@ import io.druid.query.aggregation.MaxAggregatorFactory;
import io.druid.query.aggregation.MinAggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
import io.druid.query.dimension.ExtractionDimensionSpec;
+import io.druid.query.extraction.DimExtractionFn;
import io.druid.query.extraction.RegexDimExtractionFn;
import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.DimFilter;
@@ -52,6 +54,7 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -1129,6 +1132,74 @@ public class TopNQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
+ @Test
+ public void testTopNDimExtractionNulls()
+ {
+ TopNQuery query = new TopNQueryBuilder()
+ .dataSource(QueryRunnerTestHelper.dataSource)
+ .granularity(QueryRunnerTestHelper.allGran)
+ .dimension(
+ new ExtractionDimensionSpec(
+ providerDimension, providerDimension, new DimExtractionFn()
+ {
+ @Override
+ public byte[] getCacheKey()
+ {
+ return new byte[]{(byte)0xFF};
+ }
+
+ @Override
+ public String apply(String dimValue)
+ {
+ return dimValue.equals("total_market") ? null : dimValue;
+ }
+ })
+ )
+ .metric("rows")
+ .threshold(4)
+ .intervals(QueryRunnerTestHelper.firstToThird)
+ .aggregators(QueryRunnerTestHelper.commonAggregators)
+ .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant))
+ .build();
+
+ Map nullValue = Maps.newHashMap();
+ nullValue.put(providerDimension, null);
+ nullValue.putAll(ImmutableMap.of(
+ "rows", 4L,
+ "index", 5351.814697265625D,
+ "addRowsIndexConstant", 5356.814697265625D,
+ "uniques", QueryRunnerTestHelper.UNIQUES_2
+ ));
+
+ List> expectedResults = Arrays.asList(
+ new Result<>(
+ new DateTime("2011-04-01T00:00:00.000Z"),
+ new TopNResultValue(
+ Arrays.