diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 057a4b95ff2..b45ea54b4d6 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -330,7 +330,7 @@ public class IncrementalIndex implements Iterable int count = 0; for (String dimValue : dimValues) { String canonicalDimValue = dimLookup.get(dimValue); - if (canonicalDimValue == null) { + if (canonicalDimValue == null && !dimLookup.contains(dimValue)) { canonicalDimValue = dimValue; dimLookup.add(dimValue); } @@ -560,7 +560,12 @@ public class IncrementalIndex implements Iterable int valsIndex = 0; while (retVal == 0 && valsIndex < lhsVals.length) { - retVal = lhsVals[valsIndex].compareTo(rhsVals[valsIndex]); + final String lhsVal = lhsVals[valsIndex]; + final String rhsVal = rhsVals[valsIndex]; + if(lhsVal == null && rhsVal == null) return 0; + else if(lhsVal == null) return -1; + else if(rhsVal == null) return 1; + else retVal = lhsVal.compareTo(rhsVal); ++valsIndex; } ++index; @@ -593,6 +598,7 @@ public class IncrementalIndex implements Iterable static class DimDim { + public static final String NULL_STRING = "\u0000"; private final Map poorMansInterning = Maps.newConcurrentMap(); private final Map falseIds; private final Map falseIdsReverse; @@ -605,19 +611,32 @@ public class IncrementalIndex implements Iterable falseIdsReverse = biMap.inverse(); } - public String get(String value) + public boolean contains(@Nullable String value) { - return value == null ? null : poorMansInterning.get(value); + return poorMansInterning.containsKey(value == null ? NULL_STRING : value); } - public int getId(String value) + public String get(@Nullable String value) { - return falseIds.get(value); + final String retVal; + if(value == null) { + retVal = poorMansInterning.get(NULL_STRING); + } else { + retVal = poorMansInterning.get(value); + } + return retVal == null ? null : (retVal.equals(NULL_STRING) ? null : retVal); } + public int getId(@Nullable String value) + { + return value == null ? falseIds.get(NULL_STRING) : falseIds.get(value); + } + + @Nullable public String getValue(int id) { - return falseIdsReverse.get(id); + final String value = falseIdsReverse.get(id); + return value.equals(NULL_STRING) ? null : value; } public int size() @@ -625,27 +644,26 @@ public class IncrementalIndex implements Iterable return poorMansInterning.size(); } - public Set keySet() - { - return poorMansInterning.keySet(); - } - - public synchronized void add(String value) + public synchronized void add(@Nullable String value) { + if(value == null) value = NULL_STRING; poorMansInterning.put(value, value); falseIds.put(value, falseIds.size()); } - public int getSortedId(String value) + public int getSortedId(@Nullable String value) { assertSorted(); + if(value == null) value = NULL_STRING; return Arrays.binarySearch(sortedVals, value); } + @Nullable public String getSortedValue(int index) { assertSorted(); - return sortedVals[index]; + final String sortedVal = sortedVals[index]; + return sortedVal.equals(NULL_STRING) ? null : sortedVal; } public void sort() diff --git a/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java b/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java index ca98d740dc3..08bd230e9ae 100644 --- a/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java +++ b/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java @@ -189,7 +189,7 @@ public class SpatialDimensionRowFormatter return false; } for (String dimVal : dimVals) { - if (Floats.tryParse(dimVal) == null) { + if (dimVal == null || Floats.tryParse(dimVal) == null) { return false; } } diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 97e64a0ec0c..45595da48b4 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -47,6 +47,7 @@ import io.druid.query.aggregation.MaxAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; +import io.druid.query.extraction.DimExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.RegexDimFilter; @@ -210,9 +211,11 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } + @Test public void testGroupByWithDimExtractionFn() { + final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})"); GroupByQuery query = GroupByQuery .builder() .setDataSource(QueryRunnerTestHelper.dataSource) @@ -222,7 +225,20 @@ public class GroupByQueryRunnerTest new ExtractionDimensionSpec( "quality", "alias", - new RegexDimExtractionFn("(\\w{1})") + new DimExtractionFn() + { + @Override + public byte[] getCacheKey() + { + return new byte[]{(byte)0xFF}; + } + + @Override + public String apply(String dimValue) + { + return dimValue.equals("mezzanine") ? null : fn1.apply(dimValue); + } + } ) ) ) @@ -236,20 +252,20 @@ public class GroupByQueryRunnerTest .build(); List expectedResults = Arrays.asList( + createExpectedRow("2011-04-01", "alias", null, "rows", 3L, "idx", 2870L), createExpectedRow("2011-04-01", "alias", "a", "rows", 1L, "idx", 135L), createExpectedRow("2011-04-01", "alias", "b", "rows", 1L, "idx", 118L), createExpectedRow("2011-04-01", "alias", "e", "rows", 1L, "idx", 158L), createExpectedRow("2011-04-01", "alias", "h", "rows", 1L, "idx", 120L), - createExpectedRow("2011-04-01", "alias", "m", "rows", 3L, "idx", 2870L), createExpectedRow("2011-04-01", "alias", "n", "rows", 1L, "idx", 121L), createExpectedRow("2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L), createExpectedRow("2011-04-01", "alias", "t", "rows", 2L, "idx", 197L), + createExpectedRow("2011-04-02", "alias", null, "rows", 3L, "idx", 2447L), createExpectedRow("2011-04-02", "alias", "a", "rows", 1L, "idx", 147L), createExpectedRow("2011-04-02", "alias", "b", "rows", 1L, "idx", 112L), createExpectedRow("2011-04-02", "alias", "e", "rows", 1L, "idx", 166L), createExpectedRow("2011-04-02", "alias", "h", "rows", 1L, "idx", 113L), - createExpectedRow("2011-04-02", "alias", "m", "rows", 3L, "idx", 2447L), createExpectedRow("2011-04-02", "alias", "n", "rows", 1L, "idx", 114L), createExpectedRow("2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L), createExpectedRow("2011-04-02", "alias", "t", "rows", 2L, "idx", 223L)