From 44bd4339b554b7dce2b3c251720d8622bc217a2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Mon, 21 Apr 2014 15:16:56 -0700 Subject: [PATCH 1/4] support for null values in DimExtractionFn --- .../topn/DimExtractionTopNAlgorithm.java | 5 +- .../druid/query/topn/TopNQueryRunnerTest.java | 71 +++++++++++++++++++ 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 09535a84678..8aeed190353 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -96,10 +96,7 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithmasList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + Map nullValue = Maps.newHashMap(); + nullValue.put(providerDimension, null); + nullValue.putAll(ImmutableMap.of( + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + )); + + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + providerDimension, "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ), + nullValue + , + ImmutableMap.of( + providerDimension, "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + @Test public void testInvertedTopNQuery() { From 48590862cb08e7106fcc766327591b26f744b7c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Mon, 21 Apr 2014 17:58:44 -0700 Subject: [PATCH 2/4] support for null values in group-by --- .../segment/incremental/IncrementalIndex.java | 48 +++++++++++++------ .../SpatialDimensionRowFormatter.java | 2 +- .../query/groupby/GroupByQueryRunnerTest.java | 22 +++++++-- 3 files changed, 53 insertions(+), 19 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 057a4b95ff2..b45ea54b4d6 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -330,7 +330,7 @@ public class IncrementalIndex implements Iterable int count = 0; for (String dimValue : dimValues) { String canonicalDimValue = dimLookup.get(dimValue); - if (canonicalDimValue == null) { + if (canonicalDimValue == null && !dimLookup.contains(dimValue)) { canonicalDimValue = dimValue; dimLookup.add(dimValue); } @@ -560,7 +560,12 @@ public class IncrementalIndex implements Iterable int valsIndex = 0; while (retVal == 0 && valsIndex < lhsVals.length) { - retVal = lhsVals[valsIndex].compareTo(rhsVals[valsIndex]); + final String lhsVal = lhsVals[valsIndex]; + final String rhsVal = rhsVals[valsIndex]; + if(lhsVal == null && rhsVal == null) return 0; + else if(lhsVal == null) return -1; + else if(rhsVal == null) return 1; + else retVal = lhsVal.compareTo(rhsVal); ++valsIndex; } ++index; @@ -593,6 +598,7 @@ public class IncrementalIndex implements Iterable static class DimDim { + public static final String NULL_STRING = "\u0000"; private final Map poorMansInterning = Maps.newConcurrentMap(); private final Map falseIds; private final Map falseIdsReverse; @@ -605,19 +611,32 @@ public class IncrementalIndex implements Iterable falseIdsReverse = biMap.inverse(); } - public String get(String value) + public boolean contains(@Nullable String value) { - return value == null ? null : poorMansInterning.get(value); + return poorMansInterning.containsKey(value == null ? NULL_STRING : value); } - public int getId(String value) + public String get(@Nullable String value) { - return falseIds.get(value); + final String retVal; + if(value == null) { + retVal = poorMansInterning.get(NULL_STRING); + } else { + retVal = poorMansInterning.get(value); + } + return retVal == null ? null : (retVal.equals(NULL_STRING) ? null : retVal); } + public int getId(@Nullable String value) + { + return value == null ? falseIds.get(NULL_STRING) : falseIds.get(value); + } + + @Nullable public String getValue(int id) { - return falseIdsReverse.get(id); + final String value = falseIdsReverse.get(id); + return value.equals(NULL_STRING) ? null : value; } public int size() @@ -625,27 +644,26 @@ public class IncrementalIndex implements Iterable return poorMansInterning.size(); } - public Set keySet() - { - return poorMansInterning.keySet(); - } - - public synchronized void add(String value) + public synchronized void add(@Nullable String value) { + if(value == null) value = NULL_STRING; poorMansInterning.put(value, value); falseIds.put(value, falseIds.size()); } - public int getSortedId(String value) + public int getSortedId(@Nullable String value) { assertSorted(); + if(value == null) value = NULL_STRING; return Arrays.binarySearch(sortedVals, value); } + @Nullable public String getSortedValue(int index) { assertSorted(); - return sortedVals[index]; + final String sortedVal = sortedVals[index]; + return sortedVal.equals(NULL_STRING) ? null : sortedVal; } public void sort() diff --git a/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java b/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java index ca98d740dc3..08bd230e9ae 100644 --- a/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java +++ b/processing/src/main/java/io/druid/segment/incremental/SpatialDimensionRowFormatter.java @@ -189,7 +189,7 @@ public class SpatialDimensionRowFormatter return false; } for (String dimVal : dimVals) { - if (Floats.tryParse(dimVal) == null) { + if (dimVal == null || Floats.tryParse(dimVal) == null) { return false; } } diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 97e64a0ec0c..45595da48b4 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -47,6 +47,7 @@ import io.druid.query.aggregation.MaxAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; +import io.druid.query.extraction.DimExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.RegexDimFilter; @@ -210,9 +211,11 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } + @Test public void testGroupByWithDimExtractionFn() { + final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})"); GroupByQuery query = GroupByQuery .builder() .setDataSource(QueryRunnerTestHelper.dataSource) @@ -222,7 +225,20 @@ public class GroupByQueryRunnerTest new ExtractionDimensionSpec( "quality", "alias", - new RegexDimExtractionFn("(\\w{1})") + new DimExtractionFn() + { + @Override + public byte[] getCacheKey() + { + return new byte[]{(byte)0xFF}; + } + + @Override + public String apply(String dimValue) + { + return dimValue.equals("mezzanine") ? null : fn1.apply(dimValue); + } + } ) ) ) @@ -236,20 +252,20 @@ public class GroupByQueryRunnerTest .build(); List expectedResults = Arrays.asList( + createExpectedRow("2011-04-01", "alias", null, "rows", 3L, "idx", 2870L), createExpectedRow("2011-04-01", "alias", "a", "rows", 1L, "idx", 135L), createExpectedRow("2011-04-01", "alias", "b", "rows", 1L, "idx", 118L), createExpectedRow("2011-04-01", "alias", "e", "rows", 1L, "idx", 158L), createExpectedRow("2011-04-01", "alias", "h", "rows", 1L, "idx", 120L), - createExpectedRow("2011-04-01", "alias", "m", "rows", 3L, "idx", 2870L), createExpectedRow("2011-04-01", "alias", "n", "rows", 1L, "idx", 121L), createExpectedRow("2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L), createExpectedRow("2011-04-01", "alias", "t", "rows", 2L, "idx", 197L), + createExpectedRow("2011-04-02", "alias", null, "rows", 3L, "idx", 2447L), createExpectedRow("2011-04-02", "alias", "a", "rows", 1L, "idx", 147L), createExpectedRow("2011-04-02", "alias", "b", "rows", 1L, "idx", 112L), createExpectedRow("2011-04-02", "alias", "e", "rows", 1L, "idx", 166L), createExpectedRow("2011-04-02", "alias", "h", "rows", 1L, "idx", 113L), - createExpectedRow("2011-04-02", "alias", "m", "rows", 3L, "idx", 2447L), createExpectedRow("2011-04-02", "alias", "n", "rows", 1L, "idx", 114L), createExpectedRow("2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L), createExpectedRow("2011-04-02", "alias", "t", "rows", 2L, "idx", 223L) From 52f575025b5b495dd790df8ad92f47250fbcb8ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Mon, 21 Apr 2014 22:37:31 -0700 Subject: [PATCH 3/4] requires druid-api update --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c850609bd7e..1f8ad5af638 100644 --- a/pom.xml +++ b/pom.xml @@ -41,7 +41,7 @@ UTF-8 0.25.4 2.4.0 - 0.1.11 + 0.1.12 From 5ae848ab14fffc255d1289185bc67e9417301ff5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Fri, 25 Apr 2014 15:09:47 -0700 Subject: [PATCH 4/4] fix formatting --- .../segment/incremental/IncrementalIndex.java | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index b45ea54b4d6..dcf4462142d 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -281,7 +281,6 @@ public class IncrementalIndex implements Iterable ); } } - ); } @@ -562,10 +561,15 @@ public class IncrementalIndex implements Iterable while (retVal == 0 && valsIndex < lhsVals.length) { final String lhsVal = lhsVals[valsIndex]; final String rhsVal = rhsVals[valsIndex]; - if(lhsVal == null && rhsVal == null) return 0; - else if(lhsVal == null) return -1; - else if(rhsVal == null) return 1; - else retVal = lhsVal.compareTo(rhsVal); + if (lhsVal == null && rhsVal == null) { + return 0; + } else if (lhsVal == null) { + return -1; + } else if (rhsVal == null) { + return 1; + } else { + retVal = lhsVal.compareTo(rhsVal); + } ++valsIndex; } ++index; @@ -581,16 +585,16 @@ public class IncrementalIndex implements Iterable "timestamp=" + new DateTime(timestamp) + ", dims=" + Lists.transform( Arrays.asList(dims), new Function() - { - @Override - public Object apply(@Nullable String[] input) - { - if (input == null || input.length == 0) { - return Arrays.asList("null"); + { + @Override + public Object apply(@Nullable String[] input) + { + if (input == null || input.length == 0) { + return Arrays.asList("null"); + } + return Arrays.asList(input); + } } - return Arrays.asList(input); - } - } ) + '}'; } @@ -619,7 +623,7 @@ public class IncrementalIndex implements Iterable public String get(@Nullable String value) { final String retVal; - if(value == null) { + if (value == null) { retVal = poorMansInterning.get(NULL_STRING); } else { retVal = poorMansInterning.get(value); @@ -646,7 +650,9 @@ public class IncrementalIndex implements Iterable public synchronized void add(@Nullable String value) { - if(value == null) value = NULL_STRING; + if (value == null) { + value = NULL_STRING; + } poorMansInterning.put(value, value); falseIds.put(value, falseIds.size()); } @@ -654,7 +660,9 @@ public class IncrementalIndex implements Iterable public int getSortedId(@Nullable String value) { assertSorted(); - if(value == null) value = NULL_STRING; + if (value == null) { + value = NULL_STRING; + } return Arrays.binarySearch(sortedVals, value); }