mirror of https://github.com/apache/druid.git
fix all manners of brokenness from nulls and empty strings
This commit is contained in:
parent
1adec23126
commit
18d3acd3a8
|
@ -23,7 +23,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.base.Function;
|
import com.google.common.base.Function;
|
||||||
import com.google.common.base.Objects;
|
import com.google.common.base.Objects;
|
||||||
import com.google.common.base.Predicate;
|
import com.google.common.base.Predicate;
|
||||||
import com.google.common.base.Predicates;
|
|
||||||
import com.google.common.base.Splitter;
|
import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Iterators;
|
import com.google.common.collect.Iterators;
|
||||||
|
@ -91,7 +90,6 @@ import java.nio.LongBuffer;
|
||||||
import java.util.AbstractList;
|
import java.util.AbstractList;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -463,6 +461,7 @@ public class IndexMaker
|
||||||
);
|
);
|
||||||
|
|
||||||
final Map<String, Integer> dimIndexes = Maps.newHashMap();
|
final Map<String, Integer> dimIndexes = Maps.newHashMap();
|
||||||
|
final Map<String, Integer> dimensionCardinalities = Maps.newHashMap();
|
||||||
final Map<String, Iterable<String>> dimensionValuesLookup = Maps.newHashMap();
|
final Map<String, Iterable<String>> dimensionValuesLookup = Maps.newHashMap();
|
||||||
final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
|
final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
|
||||||
final Set<String> skippedDimensions = Sets.newHashSet();
|
final Set<String> skippedDimensions = Sets.newHashSet();
|
||||||
|
@ -476,6 +475,7 @@ public class IndexMaker
|
||||||
dimConversions,
|
dimConversions,
|
||||||
dimIndexes,
|
dimIndexes,
|
||||||
skippedDimensions,
|
skippedDimensions,
|
||||||
|
dimensionCardinalities,
|
||||||
dimensionValuesLookup
|
dimensionValuesLookup
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -503,6 +503,7 @@ public class IndexMaker
|
||||||
skippedDimensions,
|
skippedDimensions,
|
||||||
theRows,
|
theRows,
|
||||||
columnCapabilities,
|
columnCapabilities,
|
||||||
|
dimensionCardinalities,
|
||||||
dimensionValuesLookup,
|
dimensionValuesLookup,
|
||||||
rowNumConversions
|
rowNumConversions
|
||||||
);
|
);
|
||||||
|
@ -527,6 +528,7 @@ public class IndexMaker
|
||||||
final List<Map<String, IntBuffer>> dimConversions,
|
final List<Map<String, IntBuffer>> dimConversions,
|
||||||
final Map<String, Integer> dimIndexes,
|
final Map<String, Integer> dimIndexes,
|
||||||
final Set<String> skippedDimensions,
|
final Set<String> skippedDimensions,
|
||||||
|
final Map<String, Integer> dimensionCardinalities,
|
||||||
final Map<String, Iterable<String>> dimensionValuesLookup
|
final Map<String, Iterable<String>> dimensionValuesLookup
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
@ -557,7 +559,28 @@ public class IndexMaker
|
||||||
|
|
||||||
// sort all dimension values and treat all null values as empty strings
|
// sort all dimension values and treat all null values as empty strings
|
||||||
final Iterable<String> dimensionValues = CombiningIterable.createSplatted(
|
final Iterable<String> dimensionValues = CombiningIterable.createSplatted(
|
||||||
|
Iterables.transform(
|
||||||
dimValueLookups,
|
dimValueLookups,
|
||||||
|
new Function<Indexed<String>, Iterable<String>>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Iterable<String> apply(Indexed<String> indexed)
|
||||||
|
{
|
||||||
|
return Iterables.transform(
|
||||||
|
indexed,
|
||||||
|
new Function<String, String>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public String apply(@Nullable String input)
|
||||||
|
{
|
||||||
|
return (input == null) ? "" : input;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
,
|
||||||
Ordering.<String>natural().nullsFirst()
|
Ordering.<String>natural().nullsFirst()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -572,6 +595,9 @@ public class IndexMaker
|
||||||
|
|
||||||
++cardinality;
|
++cardinality;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dimensionCardinalities.put(dimension, cardinality);
|
||||||
|
|
||||||
if (cardinality == 0) {
|
if (cardinality == 0) {
|
||||||
log.info("Skipping [%s], it is empty!", dimension);
|
log.info("Skipping [%s], it is empty!", dimension);
|
||||||
skippedDimensions.add(dimension);
|
skippedDimensions.add(dimension);
|
||||||
|
@ -749,6 +775,7 @@ public class IndexMaker
|
||||||
final Set<String> skippedDimensions,
|
final Set<String> skippedDimensions,
|
||||||
final Iterable<Rowboat> theRows,
|
final Iterable<Rowboat> theRows,
|
||||||
final Map<String, ColumnCapabilitiesImpl> columnCapabilities,
|
final Map<String, ColumnCapabilitiesImpl> columnCapabilities,
|
||||||
|
final Map<String, Integer> dimensionCardinalities,
|
||||||
final Map<String, Iterable<String>> dimensionValuesLookup,
|
final Map<String, Iterable<String>> dimensionValuesLookup,
|
||||||
final List<IntBuffer> rowNumConversions
|
final List<IntBuffer> rowNumConversions
|
||||||
) throws IOException
|
) throws IOException
|
||||||
|
@ -771,6 +798,7 @@ public class IndexMaker
|
||||||
dimIndex,
|
dimIndex,
|
||||||
dimension,
|
dimension,
|
||||||
columnCapabilities,
|
columnCapabilities,
|
||||||
|
dimensionCardinalities,
|
||||||
dimensionValuesLookup,
|
dimensionValuesLookup,
|
||||||
rowNumConversions
|
rowNumConversions
|
||||||
);
|
);
|
||||||
|
@ -788,6 +816,7 @@ public class IndexMaker
|
||||||
final int dimIndex,
|
final int dimIndex,
|
||||||
final String dimension,
|
final String dimension,
|
||||||
final Map<String, ColumnCapabilitiesImpl> columnCapabilities,
|
final Map<String, ColumnCapabilitiesImpl> columnCapabilities,
|
||||||
|
final Map<String, Integer> dimensionCardinalities,
|
||||||
final Map<String, Iterable<String>> dimensionValuesLookup,
|
final Map<String, Iterable<String>> dimensionValuesLookup,
|
||||||
final List<IntBuffer> rowNumConversions
|
final List<IntBuffer> rowNumConversions
|
||||||
) throws IOException
|
) throws IOException
|
||||||
|
@ -818,6 +847,7 @@ public class IndexMaker
|
||||||
|
|
||||||
ConciseSet nullSet = null;
|
ConciseSet nullSet = null;
|
||||||
int rowCount = 0;
|
int rowCount = 0;
|
||||||
|
|
||||||
for (Rowboat theRow : theRows) {
|
for (Rowboat theRow : theRows) {
|
||||||
if (dimIndex > theRow.getDims().length) {
|
if (dimIndex > theRow.getDims().length) {
|
||||||
if (nullSet == null) {
|
if (nullSet == null) {
|
||||||
|
@ -838,16 +868,88 @@ public class IndexMaker
|
||||||
rowCount++;
|
rowCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
GenericIndexed<String> dictionary = null;
|
|
||||||
final Iterable<String> dimensionValues = dimensionValuesLookup.get(dimension);
|
final Iterable<String> dimensionValues = dimensionValuesLookup.get(dimension);
|
||||||
|
GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
|
||||||
|
dimensionValues,
|
||||||
|
GenericIndexed.stringStrategy
|
||||||
|
);
|
||||||
boolean bumpDictionary = false;
|
boolean bumpDictionary = false;
|
||||||
|
|
||||||
if (hasMultipleValues) {
|
if (hasMultipleValues) {
|
||||||
List<List<Integer>> vals = ((MultiValColumnDictionaryEntryStore) adder).get();
|
final List<List<Integer>> vals = ((MultiValColumnDictionaryEntryStore) adder).get();
|
||||||
|
if (nullSet != null) {
|
||||||
|
log.info("Dimension[%s] has null rows.", dimension);
|
||||||
|
|
||||||
|
if (Iterables.getFirst(dimensionValues, "") != null) {
|
||||||
|
bumpDictionary = true;
|
||||||
|
log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension);
|
||||||
|
|
||||||
|
final List<String> nullList = Lists.newArrayList();
|
||||||
|
nullList.add(null);
|
||||||
|
|
||||||
|
dictionary = GenericIndexed.fromIterable(
|
||||||
|
Iterables.concat(nullList, dimensionValues),
|
||||||
|
GenericIndexed.stringStrategy
|
||||||
|
);
|
||||||
|
|
||||||
|
final int dictionarySize = dictionary.size();
|
||||||
|
multiValCol = VSizeIndexed.fromIterable(
|
||||||
|
FunctionalIterable
|
||||||
|
.create(vals)
|
||||||
|
.transform(
|
||||||
|
new Function<List<Integer>, VSizeIndexedInts>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public VSizeIndexedInts apply(final List<Integer> input)
|
||||||
|
{
|
||||||
|
if (input == null) {
|
||||||
|
return VSizeIndexedInts.fromList(
|
||||||
|
new AbstractList<Integer>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Integer get(int index)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size()
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}, dictionarySize
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return VSizeIndexedInts.fromList(
|
||||||
|
new AbstractList<Integer>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Integer get(int index)
|
||||||
|
{
|
||||||
|
Integer val = input.get(index);
|
||||||
|
if (val == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return val + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size()
|
||||||
|
{
|
||||||
|
return input.size();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
dictionarySize
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
final int dictionarySize = dictionary.size();
|
||||||
multiValCol = VSizeIndexed.fromIterable(
|
multiValCol = VSizeIndexed.fromIterable(
|
||||||
FunctionalIterable
|
FunctionalIterable
|
||||||
.create(vals)
|
.create(vals)
|
||||||
//.filter(Predicates.<List<Integer>>notNull())
|
|
||||||
.transform(
|
.transform(
|
||||||
new Function<List<Integer>, VSizeIndexedInts>()
|
new Function<List<Integer>, VSizeIndexedInts>()
|
||||||
{
|
{
|
||||||
|
@ -855,27 +957,60 @@ public class IndexMaker
|
||||||
public VSizeIndexedInts apply(List<Integer> input)
|
public VSizeIndexedInts apply(List<Integer> input)
|
||||||
{
|
{
|
||||||
if (input == null) {
|
if (input == null) {
|
||||||
return VSizeIndexedInts.empty();
|
//return null;
|
||||||
|
return VSizeIndexedInts.fromList(
|
||||||
|
new AbstractList<Integer>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Integer get(int index)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size()
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}, dictionarySize
|
||||||
|
);
|
||||||
}
|
}
|
||||||
return VSizeIndexedInts.fromList(
|
return VSizeIndexedInts.fromList(
|
||||||
input,
|
input,
|
||||||
Collections.max(input)
|
dictionarySize
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
dictionary = GenericIndexed.fromIterable(
|
}
|
||||||
dimensionValues,
|
} else {
|
||||||
GenericIndexed.stringStrategy
|
final int dictionarySize = dictionary.size();
|
||||||
|
multiValCol = VSizeIndexed.fromIterable(
|
||||||
|
FunctionalIterable
|
||||||
|
.create(vals)
|
||||||
|
.transform(
|
||||||
|
new Function<List<Integer>, VSizeIndexedInts>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public VSizeIndexedInts apply(List<Integer> input)
|
||||||
|
{
|
||||||
|
return VSizeIndexedInts.fromList(
|
||||||
|
input,
|
||||||
|
dictionarySize
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
final List<Integer> vals = ((SingleValColumnDictionaryEntryStore) adder).get();
|
final List<Integer> vals = ((SingleValColumnDictionaryEntryStore) adder).get();
|
||||||
|
|
||||||
if (nullSet != null) {
|
if (nullSet != null) {
|
||||||
log.info("Dimension[%s] has null rows.", dimension);
|
log.info("Dimension[%s] has null rows.", dimension);
|
||||||
|
|
||||||
if (Iterables.getFirst(dimensionValues, "") != null) {
|
if (Iterables.getFirst(dimensionValues, null) != null) {
|
||||||
bumpDictionary = true;
|
bumpDictionary = true;
|
||||||
log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension);
|
log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension);
|
||||||
|
|
||||||
|
@ -899,6 +1034,27 @@ public class IndexMaker
|
||||||
return val + 1;
|
return val + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size()
|
||||||
|
{
|
||||||
|
return vals.size();
|
||||||
|
}
|
||||||
|
}, dictionary.size()
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
singleValCol = VSizeIndexedInts.fromList(
|
||||||
|
new AbstractList<Integer>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Integer get(int index)
|
||||||
|
{
|
||||||
|
Integer val = vals.get(index);
|
||||||
|
if (val == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int size()
|
public int size()
|
||||||
{
|
{
|
||||||
|
@ -908,10 +1064,6 @@ public class IndexMaker
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dictionary = GenericIndexed.fromIterable(
|
|
||||||
dimensionValues,
|
|
||||||
GenericIndexed.stringStrategy
|
|
||||||
);
|
|
||||||
singleValCol = VSizeIndexedInts.fromList(vals, dictionary.size());
|
singleValCol = VSizeIndexedInts.fromList(vals, dictionary.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -942,7 +1094,7 @@ public class IndexMaker
|
||||||
}
|
}
|
||||||
|
|
||||||
GenericIndexed<ImmutableConciseSet> bitmaps;
|
GenericIndexed<ImmutableConciseSet> bitmaps;
|
||||||
if (!hasMultipleValues) {
|
|
||||||
if (nullSet != null) {
|
if (nullSet != null) {
|
||||||
final ImmutableConciseSet theNullSet = ImmutableConciseSet.newImmutableFromMutable(nullSet);
|
final ImmutableConciseSet theNullSet = ImmutableConciseSet.newImmutableFromMutable(nullSet);
|
||||||
if (bumpDictionary) {
|
if (bumpDictionary) {
|
||||||
|
@ -1005,22 +1157,6 @@ public class IndexMaker
|
||||||
ConciseCompressedIndexedInts.objectStrategy
|
ConciseCompressedIndexedInts.objectStrategy
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
bitmaps = GenericIndexed.fromIterable(
|
|
||||||
Iterables.transform(
|
|
||||||
conciseSets,
|
|
||||||
new Function<ConciseSet, ImmutableConciseSet>()
|
|
||||||
{
|
|
||||||
@Override
|
|
||||||
public ImmutableConciseSet apply(ConciseSet input)
|
|
||||||
{
|
|
||||||
return ImmutableConciseSet.newImmutableFromMutable(input);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
),
|
|
||||||
ConciseCompressedIndexedInts.objectStrategy
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make spatial indexes
|
// Make spatial indexes
|
||||||
ImmutableRTree spatialIndex = null;
|
ImmutableRTree spatialIndex = null;
|
||||||
|
@ -1033,12 +1169,15 @@ public class IndexMaker
|
||||||
int dimValIndex = 0;
|
int dimValIndex = 0;
|
||||||
for (String dimVal : dimensionValuesLookup.get(dimension)) {
|
for (String dimVal : dimensionValuesLookup.get(dimension)) {
|
||||||
if (hasSpatialIndexes) {
|
if (hasSpatialIndexes) {
|
||||||
|
if (dimVal != null && !dimVal.isEmpty()) {
|
||||||
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
|
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
|
||||||
float[] coords = new float[stringCoords.size()];
|
float[] coords = new float[stringCoords.size()];
|
||||||
for (int j = 0; j < coords.length; j++) {
|
for (int j = 0; j < coords.length; j++) {
|
||||||
coords[j] = Float.valueOf(stringCoords.get(j));
|
coords[j] = Float.valueOf(stringCoords.get(j));
|
||||||
}
|
}
|
||||||
tree.insert(coords, conciseSets.get(dimValIndex++));
|
tree.insert(coords, conciseSets.get(dimValIndex));
|
||||||
|
}
|
||||||
|
dimValIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (hasSpatialIndexes) {
|
if (hasSpatialIndexes) {
|
||||||
|
@ -1517,7 +1656,7 @@ public class IndexMaker
|
||||||
final Rowboat retVal = new Rowboat(
|
final Rowboat retVal = new Rowboat(
|
||||||
lhs.getTimestamp(),
|
lhs.getTimestamp(),
|
||||||
lhs.getDims(),
|
lhs.getDims(),
|
||||||
lhs.getMetrics(),
|
metrics,
|
||||||
lhs.getRowNum()
|
lhs.getRowNum()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -1174,6 +1174,15 @@ public class SchemalessTestFull
|
||||||
new DateTime("2011-01-12T00:00:00.000Z"),
|
new DateTime("2011-01-12T00:00:00.000Z"),
|
||||||
new TopNResultValue(
|
new TopNResultValue(
|
||||||
Arrays.<Map<String, Object>>asList(
|
Arrays.<Map<String, Object>>asList(
|
||||||
|
ImmutableMap.<String, Object>builder()
|
||||||
|
.put("provider", "")
|
||||||
|
.put("rows", 6L)
|
||||||
|
.put("index", 400.0D)
|
||||||
|
.put("addRowsIndexConstant", 407.0D)
|
||||||
|
.put("uniques", 0.0)
|
||||||
|
.put("maxIndex", 100.0)
|
||||||
|
.put("minIndex", 0.0)
|
||||||
|
.build(),
|
||||||
ImmutableMap.<String, Object>builder()
|
ImmutableMap.<String, Object>builder()
|
||||||
.put("provider", "spot")
|
.put("provider", "spot")
|
||||||
.put("rows", 4L)
|
.put("rows", 4L)
|
||||||
|
@ -1183,15 +1192,6 @@ public class SchemalessTestFull
|
||||||
.put("maxIndex", 100.0)
|
.put("maxIndex", 100.0)
|
||||||
.put("minIndex", 100.0)
|
.put("minIndex", 100.0)
|
||||||
.build(),
|
.build(),
|
||||||
ImmutableMap.<String, Object>builder()
|
|
||||||
.put("provider", "")
|
|
||||||
.put("rows", 3L)
|
|
||||||
.put("index", 200.0D)
|
|
||||||
.put("addRowsIndexConstant", 204.0D)
|
|
||||||
.put("uniques", 0.0)
|
|
||||||
.put("maxIndex", 100.0)
|
|
||||||
.put("minIndex", 0.0)
|
|
||||||
.build(),
|
|
||||||
ImmutableMap.<String, Object>builder()
|
ImmutableMap.<String, Object>builder()
|
||||||
.put("provider", "total_market")
|
.put("provider", "total_market")
|
||||||
.put("rows", 2L)
|
.put("rows", 2L)
|
||||||
|
|
Loading…
Reference in New Issue