mirror of https://github.com/apache/druid.git
Adjust string comparators used for ingestion (#9742)
* Adjust string comparators used for ingestion * Small tweak * Fix inspection, more javadocs * Address PR comment * Add rollup comment * Add ordering test * Fix IncrementaIndexRowCompTest
This commit is contained in:
parent
7711f776a0
commit
fe000a9e4b
|
@ -122,6 +122,10 @@ public interface DimensionHandler
|
||||||
* Returns a comparator that knows how to compare {@link ColumnValueSelector} of the assumed dimension type,
|
* Returns a comparator that knows how to compare {@link ColumnValueSelector} of the assumed dimension type,
|
||||||
* corresponding to this DimensionHandler. E. g. {@link StringDimensionHandler} returns a comparator, that compares
|
* corresponding to this DimensionHandler. E. g. {@link StringDimensionHandler} returns a comparator, that compares
|
||||||
* {@link ColumnValueSelector}s as {@link DimensionSelector}s.
|
* {@link ColumnValueSelector}s as {@link DimensionSelector}s.
|
||||||
|
*
|
||||||
|
* The comparison rules used by this method should match the rules used by
|
||||||
|
* {@link DimensionIndexer#compareUnsortedEncodedKeyComponents}, otherwise incorrect ordering/merging of rows
|
||||||
|
* can occur during ingestion, causing issues such as imperfect rollup.
|
||||||
*/
|
*/
|
||||||
Comparator<ColumnValueSelector> getEncodedValueSelectorComparator();
|
Comparator<ColumnValueSelector> getEncodedValueSelectorComparator();
|
||||||
|
|
||||||
|
|
|
@ -241,6 +241,10 @@ public interface DimensionIndexer
|
||||||
*
|
*
|
||||||
* Refer to StringDimensionIndexer.compareUnsortedEncodedKeyComponents() for a reference implementation.
|
* Refer to StringDimensionIndexer.compareUnsortedEncodedKeyComponents() for a reference implementation.
|
||||||
*
|
*
|
||||||
|
* The comparison rules used by this method should match the rules used by
|
||||||
|
* {@link DimensionHandler#getEncodedValueSelectorComparator()}, otherwise incorrect ordering/merging of rows
|
||||||
|
* can occur during ingestion, causing issues such as imperfect rollup.
|
||||||
|
*
|
||||||
* @param lhs dimension value array from a Row key
|
* @param lhs dimension value array from a Row key
|
||||||
* @param rhs dimension value array from a Row key
|
* @param rhs dimension value array from a Row key
|
||||||
* @return comparison of the two arrays
|
* @return comparison of the two arrays
|
||||||
|
|
|
@ -33,61 +33,46 @@ import java.util.Comparator;
|
||||||
|
|
||||||
public class StringDimensionHandler implements DimensionHandler<Integer, int[], String>
|
public class StringDimensionHandler implements DimensionHandler<Integer, int[], String>
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compares {@link IndexedInts} lexicographically, with the exception that if a row contains only zeros (that's the
|
* This comparator uses the following rules:
|
||||||
* index of null) at all positions, it is considered "null" as a whole and is "less" than any "non-null" row. Empty
|
* - Compare the two value arrays up to the length of the shorter array
|
||||||
* row (size is zero) is also considered "null".
|
* - If the two arrays match so far, then compare the array lengths, the shorter array is considered smaller
|
||||||
*
|
* - Comparing null and the empty list is a special case: these are considered equal
|
||||||
* The implementation is a bit complicated because it tries to check each position of both rows only once.
|
|
||||||
*/
|
*/
|
||||||
private static final Comparator<ColumnValueSelector> DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> {
|
private static final Comparator<ColumnValueSelector> DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> {
|
||||||
IndexedInts row1 = getRow(s1);
|
IndexedInts row1 = getRow(s1);
|
||||||
IndexedInts row2 = getRow(s2);
|
IndexedInts row2 = getRow(s2);
|
||||||
int len1 = row1.size();
|
int len1 = row1.size();
|
||||||
int len2 = row2.size();
|
int len2 = row2.size();
|
||||||
boolean row1IsNull = true;
|
int lenCompareResult = Integer.compare(len1, len2);
|
||||||
boolean row2IsNull = true;
|
int valsIndex = 0;
|
||||||
for (int i = 0; i < Math.min(len1, len2); i++) {
|
|
||||||
int v1 = row1.get(i);
|
|
||||||
row1IsNull &= v1 == 0;
|
|
||||||
int v2 = row2.get(i);
|
|
||||||
row2IsNull &= v2 == 0;
|
|
||||||
int valueDiff = Integer.compare(v1, v2);
|
|
||||||
if (valueDiff != 0) {
|
|
||||||
return valueDiff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//noinspection SubtractionInCompareTo -- substraction is safe here, because lengths or rows are small numbers.
|
|
||||||
int lenDiff = len1 - len2;
|
|
||||||
if (lenDiff == 0) {
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
if (!row1IsNull || !row2IsNull) {
|
|
||||||
return lenDiff;
|
|
||||||
} else {
|
|
||||||
return compareRestNulls(row1, len1, row2, len2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
private static int compareRestNulls(IndexedInts row1, int len1, IndexedInts row2, int len2)
|
if (lenCompareResult != 0) {
|
||||||
{
|
// if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
|
||||||
if (len1 < len2) {
|
if (len1 + len2 == 1) {
|
||||||
for (int i = len1; i < len2; i++) {
|
IndexedInts longerRow = len2 > len1 ? row2 : row1;
|
||||||
if (row2.get(i) != 0) {
|
if (longerRow.get(0) == 0) {
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = len2; i < len1; i++) {
|
|
||||||
if (row1.get(i) != 0) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
|
} else {
|
||||||
|
//noinspection ObjectEquality -- longerRow is explicitly set to only row1 or row2
|
||||||
|
return longerRow == row1 ? 1 : -1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int lenToCompare = Math.min(len1, len2);
|
||||||
|
while (valsIndex < lenToCompare) {
|
||||||
|
int v1 = row1.get(valsIndex);
|
||||||
|
int v2 = row2.get(valsIndex);
|
||||||
|
int valueCompareResult = Integer.compare(v1, v2);
|
||||||
|
if (valueCompareResult != 0) {
|
||||||
|
return valueCompareResult;
|
||||||
|
}
|
||||||
|
++valsIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
return lenCompareResult;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging.
|
* Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging.
|
||||||
|
|
|
@ -399,23 +399,42 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
||||||
int lhsLen = lhs.length;
|
int lhsLen = lhs.length;
|
||||||
int rhsLen = rhs.length;
|
int rhsLen = rhs.length;
|
||||||
|
|
||||||
int retVal = Ints.compare(lhsLen, rhsLen);
|
int lenCompareResult = Ints.compare(lhsLen, rhsLen);
|
||||||
|
if (lenCompareResult != 0) {
|
||||||
|
// if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
|
||||||
|
if (lhsLen + rhsLen == 1) {
|
||||||
|
int[] longerVal = rhsLen > lhsLen ? rhs : lhs;
|
||||||
|
if (longerVal[0] == dimLookup.idForNull) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
//noinspection ArrayEquality -- longerVal is explicitly set to only lhs or rhs
|
||||||
|
return longerVal == lhs ? 1 : -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int valsIndex = 0;
|
int valsIndex = 0;
|
||||||
while (retVal == 0 && valsIndex < lhsLen) {
|
int lenToCompare = Math.min(lhsLen, rhsLen);
|
||||||
|
while (valsIndex < lenToCompare) {
|
||||||
int lhsVal = lhs[valsIndex];
|
int lhsVal = lhs[valsIndex];
|
||||||
int rhsVal = rhs[valsIndex];
|
int rhsVal = rhs[valsIndex];
|
||||||
if (lhsVal != rhsVal) {
|
if (lhsVal != rhsVal) {
|
||||||
final String lhsValActual = getActualValue(lhsVal, false);
|
final String lhsValActual = getActualValue(lhsVal, false);
|
||||||
final String rhsValActual = getActualValue(rhsVal, false);
|
final String rhsValActual = getActualValue(rhsVal, false);
|
||||||
|
int valueCompareResult = 0;
|
||||||
if (lhsValActual != null && rhsValActual != null) {
|
if (lhsValActual != null && rhsValActual != null) {
|
||||||
retVal = lhsValActual.compareTo(rhsValActual);
|
valueCompareResult = lhsValActual.compareTo(rhsValActual);
|
||||||
} else if (lhsValActual == null ^ rhsValActual == null) {
|
} else if (lhsValActual == null ^ rhsValActual == null) {
|
||||||
retVal = lhsValActual == null ? -1 : 1;
|
valueCompareResult = lhsValActual == null ? -1 : 1;
|
||||||
|
}
|
||||||
|
if (valueCompareResult != 0) {
|
||||||
|
return valueCompareResult;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
++valsIndex;
|
++valsIndex;
|
||||||
}
|
}
|
||||||
return retVal;
|
|
||||||
|
return lenCompareResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -796,6 +815,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
||||||
return sortedLookup == null ? sortedLookup = dimLookup.sort() : sortedLookup;
|
return sortedLookup == null ? sortedLookup = dimLookup.sort() : sortedLookup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nullable
|
||||||
private String getActualValue(int intermediateValue, boolean idSorted)
|
private String getActualValue(int intermediateValue, boolean idSorted)
|
||||||
{
|
{
|
||||||
if (idSorted) {
|
if (idSorted) {
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.druid.segment.column.BitmapIndex;
|
||||||
import org.apache.druid.segment.column.ColumnHolder;
|
import org.apache.druid.segment.column.ColumnHolder;
|
||||||
import org.apache.druid.segment.column.DictionaryEncodedColumn;
|
import org.apache.druid.segment.column.DictionaryEncodedColumn;
|
||||||
import org.apache.druid.segment.data.IncrementalIndexTest;
|
import org.apache.druid.segment.data.IncrementalIndexTest;
|
||||||
|
import org.apache.druid.segment.data.IndexedInts;
|
||||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||||
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
|
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
@ -55,6 +56,10 @@ import java.util.stream.IntStream;
|
||||||
|
|
||||||
public class IndexMergerNullHandlingTest
|
public class IndexMergerNullHandlingTest
|
||||||
{
|
{
|
||||||
|
static {
|
||||||
|
NullHandling.initializeForTests();
|
||||||
|
}
|
||||||
|
|
||||||
private IndexMerger indexMerger;
|
private IndexMerger indexMerger;
|
||||||
private IndexIO indexIO;
|
private IndexIO indexIO;
|
||||||
private IndexSpec indexSpec;
|
private IndexSpec indexSpec;
|
||||||
|
@ -221,8 +226,9 @@ public class IndexMergerNullHandlingTest
|
||||||
retVal.add(NullHandling.emptyToNullIfNeeded(((String) value)));
|
retVal.add(NullHandling.emptyToNullIfNeeded(((String) value)));
|
||||||
} else if (value instanceof List) {
|
} else if (value instanceof List) {
|
||||||
final List<String> list = (List<String>) value;
|
final List<String> list = (List<String>) value;
|
||||||
if (list.isEmpty() && !hasMultipleValues) {
|
if (list.isEmpty()) {
|
||||||
// empty lists become nulls in single valued columns
|
// empty lists become nulls in single valued columns
|
||||||
|
// they sometimes also become nulls in multi-valued columns (see comments in getRow())
|
||||||
retVal.add(NullHandling.emptyToNullIfNeeded(null));
|
retVal.add(NullHandling.emptyToNullIfNeeded(null));
|
||||||
} else {
|
} else {
|
||||||
retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList()));
|
retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList()));
|
||||||
|
@ -242,7 +248,25 @@ public class IndexMergerNullHandlingTest
|
||||||
final List<String> retVal = new ArrayList<>();
|
final List<String> retVal = new ArrayList<>();
|
||||||
|
|
||||||
if (column.hasMultipleValues()) {
|
if (column.hasMultipleValues()) {
|
||||||
column.getMultiValueRow(rowNumber).forEach(i -> retVal.add(column.lookupName(i)));
|
IndexedInts rowVals = column.getMultiValueRow(rowNumber);
|
||||||
|
if (rowVals.size() == 0) {
|
||||||
|
// This is a sort of test hack:
|
||||||
|
// - If we ingest the subset [{d=[]}, {d=[a, b]}], we get an IndexedInts with 0 size for the nully row,
|
||||||
|
// representing the empty list
|
||||||
|
// - If we ingest the subset [{}, {d=[]}, {d=[a, b]}], we instead get an IndexedInts with 1 size,
|
||||||
|
// representing a row with a single null value
|
||||||
|
// This occurs because the dimension value comparator used during ingestion considers null and the empty list
|
||||||
|
// to be the same.
|
||||||
|
// - In the first subset, we only see the empty list and a non-empty list, so the key used in the
|
||||||
|
// incremental index fact table for the nully row is the empty list.
|
||||||
|
// - In the second subset, the fact table initially gets an entry for d=null. When the row with the
|
||||||
|
// empty list value is added, it is treated as identical to the first d=null row, so it gets rolled up.
|
||||||
|
// The resulting persisted segment will have [null] instead of [] because of this rollup.
|
||||||
|
// To simplify this test class, we always normalize the empty list into null here.
|
||||||
|
retVal.add(null);
|
||||||
|
} else {
|
||||||
|
rowVals.forEach(i -> retVal.add(column.lookupName(i)));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
retVal.add(column.lookupName(column.getSingleValueRow(rowNumber)));
|
retVal.add(column.lookupName(column.getSingleValueRow(rowNumber)));
|
||||||
}
|
}
|
||||||
|
|
|
@ -78,6 +78,7 @@ import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -2076,11 +2077,11 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
|
||||||
|
|
||||||
Assert.assertEquals(2, rowList.size());
|
Assert.assertEquals(2, rowList.size());
|
||||||
Assert.assertEquals(
|
Assert.assertEquals(
|
||||||
Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")),
|
Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")),
|
||||||
rowList.get(0).dimensionValues()
|
rowList.get(0).dimensionValues()
|
||||||
);
|
);
|
||||||
Assert.assertEquals(
|
Assert.assertEquals(
|
||||||
Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")),
|
Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")),
|
||||||
rowList.get(1).dimensionValues()
|
rowList.get(1).dimensionValues()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -2210,6 +2211,433 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultivalDim_mergeAcrossSegments_rollupWorks() throws Exception
|
||||||
|
{
|
||||||
|
List<String> dims = Arrays.asList(
|
||||||
|
"dimA",
|
||||||
|
"dimMultiVal"
|
||||||
|
);
|
||||||
|
|
||||||
|
IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
|
||||||
|
.withDimensionsSpec(
|
||||||
|
new DimensionsSpec(
|
||||||
|
ImmutableList.of(
|
||||||
|
new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true),
|
||||||
|
new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withMetrics(
|
||||||
|
new LongSumAggregatorFactory("sumCount", "sumCount")
|
||||||
|
)
|
||||||
|
.withRollup(true)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
IncrementalIndex toPersistA = new IncrementalIndex.Builder()
|
||||||
|
.setIndexSchema(indexSchema)
|
||||||
|
.setMaxRowCount(1000)
|
||||||
|
.buildOnheap();
|
||||||
|
|
||||||
|
Map<String, Object> event1 = new HashMap<>();
|
||||||
|
event1.put("dimA", "leek");
|
||||||
|
event1.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
|
||||||
|
event1.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> event2 = new HashMap<>();
|
||||||
|
event2.put("dimA", "leek");
|
||||||
|
event2.put("dimMultiVal", ImmutableList.of("1", "2", "3", "5"));
|
||||||
|
event2.put("sumCount", 1L);
|
||||||
|
|
||||||
|
toPersistA.add(new MapBasedInputRow(1, dims, event1));
|
||||||
|
toPersistA.add(new MapBasedInputRow(1, dims, event2));
|
||||||
|
|
||||||
|
IncrementalIndex toPersistB = new IncrementalIndex.Builder()
|
||||||
|
.setIndexSchema(indexSchema)
|
||||||
|
.setMaxRowCount(1000)
|
||||||
|
.buildOnheap();
|
||||||
|
|
||||||
|
Map<String, Object> event3 = new HashMap<>();
|
||||||
|
event3.put("dimA", "leek");
|
||||||
|
event3.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
|
||||||
|
event3.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> event4 = new HashMap<>();
|
||||||
|
event4.put("dimA", "potato");
|
||||||
|
event4.put("dimMultiVal", ImmutableList.of("0", "1", "4"));
|
||||||
|
event4.put("sumCount", 1L);
|
||||||
|
|
||||||
|
toPersistB.add(new MapBasedInputRow(1, dims, event3));
|
||||||
|
toPersistB.add(new MapBasedInputRow(1, dims, event4));
|
||||||
|
|
||||||
|
final File tmpDirA = temporaryFolder.newFolder();
|
||||||
|
final File tmpDirB = temporaryFolder.newFolder();
|
||||||
|
final File tmpDirMerged = temporaryFolder.newFolder();
|
||||||
|
|
||||||
|
QueryableIndex indexA = closer.closeLater(
|
||||||
|
indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null))
|
||||||
|
);
|
||||||
|
|
||||||
|
QueryableIndex indexB = closer.closeLater(
|
||||||
|
indexIO.loadIndex(indexMerger.persist(toPersistB, tmpDirB, indexSpec, null))
|
||||||
|
);
|
||||||
|
|
||||||
|
final QueryableIndex merged = closer.closeLater(
|
||||||
|
indexIO.loadIndex(
|
||||||
|
indexMerger.mergeQueryableIndex(
|
||||||
|
Arrays.asList(indexA, indexB),
|
||||||
|
true,
|
||||||
|
new AggregatorFactory[]{
|
||||||
|
new LongSumAggregatorFactory("sumCount", "sumCount")
|
||||||
|
},
|
||||||
|
tmpDirMerged,
|
||||||
|
indexSpec,
|
||||||
|
null
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
|
||||||
|
final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
ImmutableList.of("dimA", "dimMultiVal"),
|
||||||
|
ImmutableList.copyOf(adapter.getDimensionNames())
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(3, rowList.size());
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3", "5")), rowList.get(0).dimensionValues());
|
||||||
|
Assert.assertEquals(1L, rowList.get(0).metricValues().get(0));
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "4")), rowList.get(1).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
|
||||||
|
Assert.assertEquals(Arrays.asList("potato", Arrays.asList("0", "1", "4")), rowList.get(2).dimensionValues());
|
||||||
|
Assert.assertEquals(1L, rowList.get(2).metricValues().get(0));
|
||||||
|
|
||||||
|
checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimA", "leek"));
|
||||||
|
checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimA", "potato"));
|
||||||
|
|
||||||
|
checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimMultiVal", "0"));
|
||||||
|
checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", "1"));
|
||||||
|
checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimMultiVal", "2"));
|
||||||
|
checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "3"));
|
||||||
|
checkBitmapIndex(Arrays.asList(1, 2), adapter.getBitmapIndex("dimMultiVal", "4"));
|
||||||
|
checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "5"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultivalDim_persistAndMerge_dimensionValueOrderingRules() throws Exception
|
||||||
|
{
|
||||||
|
List<String> dims = Arrays.asList(
|
||||||
|
"dimA",
|
||||||
|
"dimMultiVal"
|
||||||
|
);
|
||||||
|
|
||||||
|
IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
|
||||||
|
.withDimensionsSpec(
|
||||||
|
new DimensionsSpec(
|
||||||
|
ImmutableList.of(
|
||||||
|
new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true),
|
||||||
|
new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withMetrics(
|
||||||
|
new LongSumAggregatorFactory("sumCount", "sumCount")
|
||||||
|
)
|
||||||
|
.withRollup(true)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Map<String, Object> nullEvent = new HashMap<>();
|
||||||
|
nullEvent.put("dimA", "leek");
|
||||||
|
nullEvent.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> nullEvent2 = new HashMap<>();
|
||||||
|
nullEvent2.put("dimA", "leek");
|
||||||
|
nullEvent2.put("dimMultiVal", null);
|
||||||
|
nullEvent2.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> emptyListEvent = new HashMap<>();
|
||||||
|
emptyListEvent.put("dimA", "leek");
|
||||||
|
emptyListEvent.put("dimMultiVal", ImmutableList.of());
|
||||||
|
emptyListEvent.put("sumCount", 1L);
|
||||||
|
|
||||||
|
List<String> listWithNull = new ArrayList<>();
|
||||||
|
listWithNull.add(null);
|
||||||
|
Map<String, Object> listWithNullEvent = new HashMap<>();
|
||||||
|
listWithNullEvent.put("dimA", "leek");
|
||||||
|
listWithNullEvent.put("dimMultiVal", listWithNull);
|
||||||
|
listWithNullEvent.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> emptyStringEvent = new HashMap<>();
|
||||||
|
emptyStringEvent.put("dimA", "leek");
|
||||||
|
emptyStringEvent.put("dimMultiVal", "");
|
||||||
|
emptyStringEvent.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> listWithEmptyStringEvent = new HashMap<>();
|
||||||
|
listWithEmptyStringEvent.put("dimA", "leek");
|
||||||
|
listWithEmptyStringEvent.put("dimMultiVal", ImmutableList.of(""));
|
||||||
|
listWithEmptyStringEvent.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> singleValEvent = new HashMap<>();
|
||||||
|
singleValEvent.put("dimA", "leek");
|
||||||
|
singleValEvent.put("dimMultiVal", "1");
|
||||||
|
singleValEvent.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> singleValEvent2 = new HashMap<>();
|
||||||
|
singleValEvent2.put("dimA", "leek");
|
||||||
|
singleValEvent2.put("dimMultiVal", "2");
|
||||||
|
singleValEvent2.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> singleValEvent3 = new HashMap<>();
|
||||||
|
singleValEvent3.put("dimA", "potato");
|
||||||
|
singleValEvent3.put("dimMultiVal", "2");
|
||||||
|
singleValEvent3.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> listWithSingleValEvent = new HashMap<>();
|
||||||
|
listWithSingleValEvent.put("dimA", "leek");
|
||||||
|
listWithSingleValEvent.put("dimMultiVal", ImmutableList.of("1"));
|
||||||
|
listWithSingleValEvent.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> listWithSingleValEvent2 = new HashMap<>();
|
||||||
|
listWithSingleValEvent2.put("dimA", "leek");
|
||||||
|
listWithSingleValEvent2.put("dimMultiVal", ImmutableList.of("2"));
|
||||||
|
listWithSingleValEvent2.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> listWithSingleValEvent3 = new HashMap<>();
|
||||||
|
listWithSingleValEvent3.put("dimA", "potato");
|
||||||
|
listWithSingleValEvent3.put("dimMultiVal", ImmutableList.of("2"));
|
||||||
|
listWithSingleValEvent3.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> multivalEvent = new HashMap<>();
|
||||||
|
multivalEvent.put("dimA", "leek");
|
||||||
|
multivalEvent.put("dimMultiVal", ImmutableList.of("1", "3"));
|
||||||
|
multivalEvent.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> multivalEvent2 = new HashMap<>();
|
||||||
|
multivalEvent2.put("dimA", "leek");
|
||||||
|
multivalEvent2.put("dimMultiVal", ImmutableList.of("1", "4"));
|
||||||
|
multivalEvent2.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> multivalEvent3 = new HashMap<>();
|
||||||
|
multivalEvent3.put("dimA", "leek");
|
||||||
|
multivalEvent3.put("dimMultiVal", ImmutableList.of("1", "3", "5"));
|
||||||
|
multivalEvent3.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> multivalEvent4 = new HashMap<>();
|
||||||
|
multivalEvent4.put("dimA", "leek");
|
||||||
|
multivalEvent4.put("dimMultiVal", ImmutableList.of("1", "2", "3"));
|
||||||
|
multivalEvent4.put("sumCount", 1L);
|
||||||
|
|
||||||
|
List<String> multivalEvent5List = Arrays.asList("1", "2", "3", null);
|
||||||
|
Map<String, Object> multivalEvent5 = new HashMap<>();
|
||||||
|
multivalEvent5.put("dimA", "leek");
|
||||||
|
multivalEvent5.put("dimMultiVal", multivalEvent5List);
|
||||||
|
multivalEvent5.put("sumCount", 1L);
|
||||||
|
|
||||||
|
List<String> multivalEvent6List = Arrays.asList(null, "3");
|
||||||
|
Map<String, Object> multivalEvent6 = new HashMap<>();
|
||||||
|
multivalEvent6.put("dimA", "leek");
|
||||||
|
multivalEvent6.put("dimMultiVal", multivalEvent6List);
|
||||||
|
multivalEvent6.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> multivalEvent7 = new HashMap<>();
|
||||||
|
multivalEvent7.put("dimA", "leek");
|
||||||
|
multivalEvent7.put("dimMultiVal", ImmutableList.of("1", "2", "3", ""));
|
||||||
|
multivalEvent7.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> multivalEvent8 = new HashMap<>();
|
||||||
|
multivalEvent8.put("dimA", "leek");
|
||||||
|
multivalEvent8.put("dimMultiVal", ImmutableList.of("", "3"));
|
||||||
|
multivalEvent8.put("sumCount", 1L);
|
||||||
|
|
||||||
|
Map<String, Object> multivalEvent9 = new HashMap<>();
|
||||||
|
multivalEvent9.put("dimA", "potato");
|
||||||
|
multivalEvent9.put("dimMultiVal", ImmutableList.of("1", "3"));
|
||||||
|
multivalEvent9.put("sumCount", 1L);
|
||||||
|
|
||||||
|
List<Map<String, Object>> events = ImmutableList.of(
|
||||||
|
nullEvent,
|
||||||
|
nullEvent2,
|
||||||
|
emptyListEvent,
|
||||||
|
listWithNullEvent,
|
||||||
|
emptyStringEvent,
|
||||||
|
listWithEmptyStringEvent,
|
||||||
|
singleValEvent,
|
||||||
|
singleValEvent2,
|
||||||
|
singleValEvent3,
|
||||||
|
listWithSingleValEvent,
|
||||||
|
listWithSingleValEvent2,
|
||||||
|
listWithSingleValEvent3,
|
||||||
|
multivalEvent,
|
||||||
|
multivalEvent2,
|
||||||
|
multivalEvent3,
|
||||||
|
multivalEvent4,
|
||||||
|
multivalEvent5,
|
||||||
|
multivalEvent6,
|
||||||
|
multivalEvent7,
|
||||||
|
multivalEvent8,
|
||||||
|
multivalEvent9
|
||||||
|
);
|
||||||
|
|
||||||
|
IncrementalIndex toPersistA = new IncrementalIndex.Builder()
|
||||||
|
.setIndexSchema(indexSchema)
|
||||||
|
.setMaxRowCount(1000)
|
||||||
|
.buildOnheap();
|
||||||
|
|
||||||
|
for (Map<String, Object> event : events) {
|
||||||
|
toPersistA.add(new MapBasedInputRow(1, dims, event));
|
||||||
|
}
|
||||||
|
|
||||||
|
final File tmpDirA = temporaryFolder.newFolder();
|
||||||
|
QueryableIndex indexA = closer.closeLater(
|
||||||
|
indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null))
|
||||||
|
);
|
||||||
|
|
||||||
|
List<QueryableIndex> singleEventIndexes = new ArrayList<>();
|
||||||
|
for (Map<String, Object> event : events) {
|
||||||
|
IncrementalIndex toPersist = new IncrementalIndex.Builder()
|
||||||
|
.setIndexSchema(indexSchema)
|
||||||
|
.setMaxRowCount(1000)
|
||||||
|
.buildOnheap();
|
||||||
|
|
||||||
|
toPersist.add(new MapBasedInputRow(1, dims, event));
|
||||||
|
final File tmpDir = temporaryFolder.newFolder();
|
||||||
|
QueryableIndex queryableIndex = closer.closeLater(
|
||||||
|
indexIO.loadIndex(indexMerger.persist(toPersist, tmpDir, indexSpec, null))
|
||||||
|
);
|
||||||
|
singleEventIndexes.add(queryableIndex);
|
||||||
|
}
|
||||||
|
singleEventIndexes.add(indexA);
|
||||||
|
|
||||||
|
final File tmpDirMerged = temporaryFolder.newFolder();
|
||||||
|
final QueryableIndex merged = closer.closeLater(
|
||||||
|
indexIO.loadIndex(
|
||||||
|
indexMerger.mergeQueryableIndex(
|
||||||
|
singleEventIndexes,
|
||||||
|
true,
|
||||||
|
new AggregatorFactory[]{
|
||||||
|
new LongSumAggregatorFactory("sumCount", "sumCount")
|
||||||
|
},
|
||||||
|
tmpDirMerged,
|
||||||
|
indexSpec,
|
||||||
|
null
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
|
||||||
|
final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
ImmutableList.of("dimA", "dimMultiVal"),
|
||||||
|
ImmutableList.copyOf(adapter.getDimensionNames())
|
||||||
|
);
|
||||||
|
|
||||||
|
if (NullHandling.replaceWithDefault()) {
|
||||||
|
Assert.assertEquals(11, rowList.size());
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues());
|
||||||
|
Assert.assertEquals(12L, rowList.get(0).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(1).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(2).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(3).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(3).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(4).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(4).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(5).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(5).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(6).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(6).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(7).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(7).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(8).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(8).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(9).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(9).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(10).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(10).metricValues().get(0));
|
||||||
|
|
||||||
|
checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8), adapter.getBitmapIndex("dimA", "leek"));
|
||||||
|
checkBitmapIndex(Arrays.asList(9, 10), adapter.getBitmapIndex("dimA", "potato"));
|
||||||
|
|
||||||
|
checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null));
|
||||||
|
checkBitmapIndex(ImmutableList.of(), adapter.getBitmapIndex("dimMultiVal", ""));
|
||||||
|
checkBitmapIndex(Arrays.asList(1, 3, 4, 5, 6, 7, 9), adapter.getBitmapIndex("dimMultiVal", "1"));
|
||||||
|
checkBitmapIndex(Arrays.asList(1, 4, 8, 10), adapter.getBitmapIndex("dimMultiVal", "2"));
|
||||||
|
checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 6, 9), adapter.getBitmapIndex("dimMultiVal", "3"));
|
||||||
|
checkBitmapIndex(Collections.singletonList(7), adapter.getBitmapIndex("dimMultiVal", "4"));
|
||||||
|
checkBitmapIndex(Collections.singletonList(6), adapter.getBitmapIndex("dimMultiVal", "5"));
|
||||||
|
} else {
|
||||||
|
Assert.assertEquals(14, rowList.size());
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues());
|
||||||
|
Assert.assertEquals(8L, rowList.get(0).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(2).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", ""), rowList.get(3).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(3).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "1", "2", "3")), rowList.get(4).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(4).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "3")), rowList.get(5).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(5).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(6).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(6).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(7).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(7).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(8).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(8).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(9).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(9).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(10).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(10).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(11).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(11).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(12).dimensionValues());
|
||||||
|
Assert.assertEquals(2L, rowList.get(12).metricValues().get(0));
|
||||||
|
|
||||||
|
Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(13).dimensionValues());
|
||||||
|
Assert.assertEquals(4L, rowList.get(13).metricValues().get(0));
|
||||||
|
|
||||||
|
checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), adapter.getBitmapIndex("dimA", "leek"));
|
||||||
|
checkBitmapIndex(Arrays.asList(12, 13), adapter.getBitmapIndex("dimA", "potato"));
|
||||||
|
|
||||||
|
checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null));
|
||||||
|
checkBitmapIndex(ImmutableList.of(3, 4, 5), adapter.getBitmapIndex("dimMultiVal", ""));
|
||||||
|
checkBitmapIndex(Arrays.asList(1, 4, 6, 7, 8, 9, 10, 12), adapter.getBitmapIndex("dimMultiVal", "1"));
|
||||||
|
checkBitmapIndex(Arrays.asList(1, 4, 7, 11, 13), adapter.getBitmapIndex("dimMultiVal", "2"));
|
||||||
|
checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 7, 8, 9, 12), adapter.getBitmapIndex("dimMultiVal", "3"));
|
||||||
|
checkBitmapIndex(Collections.singletonList(10), adapter.getBitmapIndex("dimMultiVal", "4"));
|
||||||
|
checkBitmapIndex(Collections.singletonList(9), adapter.getBitmapIndex("dimMultiVal", "5"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private QueryableIndex persistAndLoad(List<DimensionSchema> schema, InputRow... rows) throws IOException
|
private QueryableIndex persistAndLoad(List<DimensionSchema> schema, InputRow... rows) throws IOException
|
||||||
{
|
{
|
||||||
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null));
|
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null));
|
||||||
|
|
|
@ -71,8 +71,8 @@ public class IncrementalIndexRowCompTest extends InitializedNullHandlingTest
|
||||||
|
|
||||||
Assert.assertTrue(comparator.compare(ir4, ir6) > 0);
|
Assert.assertTrue(comparator.compare(ir4, ir6) > 0);
|
||||||
Assert.assertTrue(comparator.compare(ir5, ir6) > 0);
|
Assert.assertTrue(comparator.compare(ir5, ir6) > 0);
|
||||||
Assert.assertTrue(comparator.compare(ir4, ir5) < 0);
|
Assert.assertTrue(comparator.compare(ir5, ir4) < 0);
|
||||||
Assert.assertTrue(comparator.compare(ir5, ir4) > 0);
|
Assert.assertTrue(comparator.compare(ir4, ir5) > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private MapBasedInputRow toMapRow(long time, Object... dimAndVal)
|
private MapBasedInputRow toMapRow(long time, Object... dimAndVal)
|
||||||
|
|
Loading…
Reference in New Issue