mirror of https://github.com/apache/druid.git
support for null values in group-by
This commit is contained in:
parent
44bd4339b5
commit
48590862cb
|
@ -330,7 +330,7 @@ public class IncrementalIndex implements Iterable<Row>
|
|||
int count = 0;
|
||||
for (String dimValue : dimValues) {
|
||||
String canonicalDimValue = dimLookup.get(dimValue);
|
||||
if (canonicalDimValue == null) {
|
||||
if (canonicalDimValue == null && !dimLookup.contains(dimValue)) {
|
||||
canonicalDimValue = dimValue;
|
||||
dimLookup.add(dimValue);
|
||||
}
|
||||
|
@ -560,7 +560,12 @@ public class IncrementalIndex implements Iterable<Row>
|
|||
|
||||
int valsIndex = 0;
|
||||
while (retVal == 0 && valsIndex < lhsVals.length) {
|
||||
retVal = lhsVals[valsIndex].compareTo(rhsVals[valsIndex]);
|
||||
final String lhsVal = lhsVals[valsIndex];
|
||||
final String rhsVal = rhsVals[valsIndex];
|
||||
if(lhsVal == null && rhsVal == null) return 0;
|
||||
else if(lhsVal == null) return -1;
|
||||
else if(rhsVal == null) return 1;
|
||||
else retVal = lhsVal.compareTo(rhsVal);
|
||||
++valsIndex;
|
||||
}
|
||||
++index;
|
||||
|
@ -593,6 +598,7 @@ public class IncrementalIndex implements Iterable<Row>
|
|||
|
||||
static class DimDim
|
||||
{
|
||||
public static final String NULL_STRING = "\u0000";
|
||||
private final Map<String, String> poorMansInterning = Maps.newConcurrentMap();
|
||||
private final Map<String, Integer> falseIds;
|
||||
private final Map<Integer, String> falseIdsReverse;
|
||||
|
@ -605,19 +611,32 @@ public class IncrementalIndex implements Iterable<Row>
|
|||
falseIdsReverse = biMap.inverse();
|
||||
}
|
||||
|
||||
public String get(String value)
|
||||
public boolean contains(@Nullable String value)
|
||||
{
|
||||
return value == null ? null : poorMansInterning.get(value);
|
||||
return poorMansInterning.containsKey(value == null ? NULL_STRING : value);
|
||||
}
|
||||
|
||||
public int getId(String value)
|
||||
public String get(@Nullable String value)
|
||||
{
|
||||
return falseIds.get(value);
|
||||
final String retVal;
|
||||
if(value == null) {
|
||||
retVal = poorMansInterning.get(NULL_STRING);
|
||||
} else {
|
||||
retVal = poorMansInterning.get(value);
|
||||
}
|
||||
return retVal == null ? null : (retVal.equals(NULL_STRING) ? null : retVal);
|
||||
}
|
||||
|
||||
public int getId(@Nullable String value)
|
||||
{
|
||||
return value == null ? falseIds.get(NULL_STRING) : falseIds.get(value);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public String getValue(int id)
|
||||
{
|
||||
return falseIdsReverse.get(id);
|
||||
final String value = falseIdsReverse.get(id);
|
||||
return value.equals(NULL_STRING) ? null : value;
|
||||
}
|
||||
|
||||
public int size()
|
||||
|
@ -625,27 +644,26 @@ public class IncrementalIndex implements Iterable<Row>
|
|||
return poorMansInterning.size();
|
||||
}
|
||||
|
||||
public Set<String> keySet()
|
||||
{
|
||||
return poorMansInterning.keySet();
|
||||
}
|
||||
|
||||
public synchronized void add(String value)
|
||||
public synchronized void add(@Nullable String value)
|
||||
{
|
||||
if(value == null) value = NULL_STRING;
|
||||
poorMansInterning.put(value, value);
|
||||
falseIds.put(value, falseIds.size());
|
||||
}
|
||||
|
||||
public int getSortedId(String value)
|
||||
public int getSortedId(@Nullable String value)
|
||||
{
|
||||
assertSorted();
|
||||
if(value == null) value = NULL_STRING;
|
||||
return Arrays.binarySearch(sortedVals, value);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public String getSortedValue(int index)
|
||||
{
|
||||
assertSorted();
|
||||
return sortedVals[index];
|
||||
final String sortedVal = sortedVals[index];
|
||||
return sortedVal.equals(NULL_STRING) ? null : sortedVal;
|
||||
}
|
||||
|
||||
public void sort()
|
||||
|
|
|
@ -189,7 +189,7 @@ public class SpatialDimensionRowFormatter
|
|||
return false;
|
||||
}
|
||||
for (String dimVal : dimVals) {
|
||||
if (Floats.tryParse(dimVal) == null) {
|
||||
if (dimVal == null || Floats.tryParse(dimVal) == null) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,7 @@ import io.druid.query.aggregation.MaxAggregatorFactory;
|
|||
import io.druid.query.dimension.DefaultDimensionSpec;
|
||||
import io.druid.query.dimension.DimensionSpec;
|
||||
import io.druid.query.dimension.ExtractionDimensionSpec;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.RegexDimExtractionFn;
|
||||
import io.druid.query.filter.JavaScriptDimFilter;
|
||||
import io.druid.query.filter.RegexDimFilter;
|
||||
|
@ -210,9 +211,11 @@ public class GroupByQueryRunnerTest
|
|||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGroupByWithDimExtractionFn()
|
||||
{
|
||||
final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})");
|
||||
GroupByQuery query = GroupByQuery
|
||||
.builder()
|
||||
.setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
|
@ -222,7 +225,20 @@ public class GroupByQueryRunnerTest
|
|||
new ExtractionDimensionSpec(
|
||||
"quality",
|
||||
"alias",
|
||||
new RegexDimExtractionFn("(\\w{1})")
|
||||
new DimExtractionFn()
|
||||
{
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
return new byte[]{(byte)0xFF};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(String dimValue)
|
||||
{
|
||||
return dimValue.equals("mezzanine") ? null : fn1.apply(dimValue);
|
||||
}
|
||||
}
|
||||
)
|
||||
)
|
||||
)
|
||||
|
@ -236,20 +252,20 @@ public class GroupByQueryRunnerTest
|
|||
.build();
|
||||
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
createExpectedRow("2011-04-01", "alias", null, "rows", 3L, "idx", 2870L),
|
||||
createExpectedRow("2011-04-01", "alias", "a", "rows", 1L, "idx", 135L),
|
||||
createExpectedRow("2011-04-01", "alias", "b", "rows", 1L, "idx", 118L),
|
||||
createExpectedRow("2011-04-01", "alias", "e", "rows", 1L, "idx", 158L),
|
||||
createExpectedRow("2011-04-01", "alias", "h", "rows", 1L, "idx", 120L),
|
||||
createExpectedRow("2011-04-01", "alias", "m", "rows", 3L, "idx", 2870L),
|
||||
createExpectedRow("2011-04-01", "alias", "n", "rows", 1L, "idx", 121L),
|
||||
createExpectedRow("2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L),
|
||||
createExpectedRow("2011-04-01", "alias", "t", "rows", 2L, "idx", 197L),
|
||||
|
||||
createExpectedRow("2011-04-02", "alias", null, "rows", 3L, "idx", 2447L),
|
||||
createExpectedRow("2011-04-02", "alias", "a", "rows", 1L, "idx", 147L),
|
||||
createExpectedRow("2011-04-02", "alias", "b", "rows", 1L, "idx", 112L),
|
||||
createExpectedRow("2011-04-02", "alias", "e", "rows", 1L, "idx", 166L),
|
||||
createExpectedRow("2011-04-02", "alias", "h", "rows", 1L, "idx", 113L),
|
||||
createExpectedRow("2011-04-02", "alias", "m", "rows", 3L, "idx", 2447L),
|
||||
createExpectedRow("2011-04-02", "alias", "n", "rows", 1L, "idx", 114L),
|
||||
createExpectedRow("2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L),
|
||||
createExpectedRow("2011-04-02", "alias", "t", "rows", 2L, "idx", 223L)
|
||||
|
|
Loading…
Reference in New Issue