Adjust string comparators used for ingestion (#9742)

* Adjust string comparators used for ingestion

* Small tweak

* Fix inspection, more javadocs

* Address PR comment

* Add rollup comment

* Add ordering test

* Fix IncrementaIndexRowCompTest
This commit is contained in:
Jonathan Wei 2020-04-25 13:47:07 -07:00 committed by GitHub
parent 7711f776a0
commit fe000a9e4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 520 additions and 55 deletions

View File

@ -122,6 +122,10 @@ public interface DimensionHandler
* Returns a comparator that knows how to compare {@link ColumnValueSelector} of the assumed dimension type, * Returns a comparator that knows how to compare {@link ColumnValueSelector} of the assumed dimension type,
* corresponding to this DimensionHandler. E. g. {@link StringDimensionHandler} returns a comparator, that compares * corresponding to this DimensionHandler. E. g. {@link StringDimensionHandler} returns a comparator, that compares
* {@link ColumnValueSelector}s as {@link DimensionSelector}s. * {@link ColumnValueSelector}s as {@link DimensionSelector}s.
*
* The comparison rules used by this method should match the rules used by
* {@link DimensionIndexer#compareUnsortedEncodedKeyComponents}, otherwise incorrect ordering/merging of rows
* can occur during ingestion, causing issues such as imperfect rollup.
*/ */
Comparator<ColumnValueSelector> getEncodedValueSelectorComparator(); Comparator<ColumnValueSelector> getEncodedValueSelectorComparator();

View File

@ -241,6 +241,10 @@ public interface DimensionIndexer
* *
* Refer to StringDimensionIndexer.compareUnsortedEncodedKeyComponents() for a reference implementation. * Refer to StringDimensionIndexer.compareUnsortedEncodedKeyComponents() for a reference implementation.
* *
* The comparison rules used by this method should match the rules used by
* {@link DimensionHandler#getEncodedValueSelectorComparator()}, otherwise incorrect ordering/merging of rows
* can occur during ingestion, causing issues such as imperfect rollup.
*
* @param lhs dimension value array from a Row key * @param lhs dimension value array from a Row key
* @param rhs dimension value array from a Row key * @param rhs dimension value array from a Row key
* @return comparison of the two arrays * @return comparison of the two arrays

View File

@ -33,61 +33,46 @@ import java.util.Comparator;
public class StringDimensionHandler implements DimensionHandler<Integer, int[], String> public class StringDimensionHandler implements DimensionHandler<Integer, int[], String>
{ {
/** /**
* Compares {@link IndexedInts} lexicographically, with the exception that if a row contains only zeros (that's the * This comparator uses the following rules:
* index of null) at all positions, it is considered "null" as a whole and is "less" than any "non-null" row. Empty * - Compare the two value arrays up to the length of the shorter array
* row (size is zero) is also considered "null". * - If the two arrays match so far, then compare the array lengths, the shorter array is considered smaller
* * - Comparing null and the empty list is a special case: these are considered equal
* The implementation is a bit complicated because it tries to check each position of both rows only once.
*/ */
private static final Comparator<ColumnValueSelector> DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> { private static final Comparator<ColumnValueSelector> DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> {
IndexedInts row1 = getRow(s1); IndexedInts row1 = getRow(s1);
IndexedInts row2 = getRow(s2); IndexedInts row2 = getRow(s2);
int len1 = row1.size(); int len1 = row1.size();
int len2 = row2.size(); int len2 = row2.size();
boolean row1IsNull = true; int lenCompareResult = Integer.compare(len1, len2);
boolean row2IsNull = true; int valsIndex = 0;
for (int i = 0; i < Math.min(len1, len2); i++) {
int v1 = row1.get(i);
row1IsNull &= v1 == 0;
int v2 = row2.get(i);
row2IsNull &= v2 == 0;
int valueDiff = Integer.compare(v1, v2);
if (valueDiff != 0) {
return valueDiff;
}
}
//noinspection SubtractionInCompareTo -- substraction is safe here, because lengths or rows are small numbers.
int lenDiff = len1 - len2;
if (lenDiff == 0) {
return 0;
} else {
if (!row1IsNull || !row2IsNull) {
return lenDiff;
} else {
return compareRestNulls(row1, len1, row2, len2);
}
}
};
private static int compareRestNulls(IndexedInts row1, int len1, IndexedInts row2, int len2) if (lenCompareResult != 0) {
{ // if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
if (len1 < len2) { if (len1 + len2 == 1) {
for (int i = len1; i < len2; i++) { IndexedInts longerRow = len2 > len1 ? row2 : row1;
if (row2.get(i) != 0) { if (longerRow.get(0) == 0) {
return -1;
}
}
} else {
for (int i = len2; i < len1; i++) {
if (row1.get(i) != 0) {
return 1;
}
}
}
return 0; return 0;
} else {
//noinspection ObjectEquality -- longerRow is explicitly set to only row1 or row2
return longerRow == row1 ? 1 : -1;
} }
}
}
int lenToCompare = Math.min(len1, len2);
while (valsIndex < lenToCompare) {
int v1 = row1.get(valsIndex);
int v2 = row2.get(valsIndex);
int valueCompareResult = Integer.compare(v1, v2);
if (valueCompareResult != 0) {
return valueCompareResult;
}
++valsIndex;
}
return lenCompareResult;
};
/** /**
* Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging. * Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging.

View File

@ -399,23 +399,42 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
int lhsLen = lhs.length; int lhsLen = lhs.length;
int rhsLen = rhs.length; int rhsLen = rhs.length;
int retVal = Ints.compare(lhsLen, rhsLen); int lenCompareResult = Ints.compare(lhsLen, rhsLen);
if (lenCompareResult != 0) {
// if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
if (lhsLen + rhsLen == 1) {
int[] longerVal = rhsLen > lhsLen ? rhs : lhs;
if (longerVal[0] == dimLookup.idForNull) {
return 0;
} else {
//noinspection ArrayEquality -- longerVal is explicitly set to only lhs or rhs
return longerVal == lhs ? 1 : -1;
}
}
}
int valsIndex = 0; int valsIndex = 0;
while (retVal == 0 && valsIndex < lhsLen) { int lenToCompare = Math.min(lhsLen, rhsLen);
while (valsIndex < lenToCompare) {
int lhsVal = lhs[valsIndex]; int lhsVal = lhs[valsIndex];
int rhsVal = rhs[valsIndex]; int rhsVal = rhs[valsIndex];
if (lhsVal != rhsVal) { if (lhsVal != rhsVal) {
final String lhsValActual = getActualValue(lhsVal, false); final String lhsValActual = getActualValue(lhsVal, false);
final String rhsValActual = getActualValue(rhsVal, false); final String rhsValActual = getActualValue(rhsVal, false);
int valueCompareResult = 0;
if (lhsValActual != null && rhsValActual != null) { if (lhsValActual != null && rhsValActual != null) {
retVal = lhsValActual.compareTo(rhsValActual); valueCompareResult = lhsValActual.compareTo(rhsValActual);
} else if (lhsValActual == null ^ rhsValActual == null) { } else if (lhsValActual == null ^ rhsValActual == null) {
retVal = lhsValActual == null ? -1 : 1; valueCompareResult = lhsValActual == null ? -1 : 1;
}
if (valueCompareResult != 0) {
return valueCompareResult;
} }
} }
++valsIndex; ++valsIndex;
} }
return retVal;
return lenCompareResult;
} }
@Override @Override
@ -796,6 +815,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
return sortedLookup == null ? sortedLookup = dimLookup.sort() : sortedLookup; return sortedLookup == null ? sortedLookup = dimLookup.sort() : sortedLookup;
} }
@Nullable
private String getActualValue(int intermediateValue, boolean idSorted) private String getActualValue(int intermediateValue, boolean idSorted)
{ {
if (idSorted) { if (idSorted) {

View File

@ -33,6 +33,7 @@ import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.data.IncrementalIndexTest; import org.apache.druid.segment.data.IncrementalIndexTest;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
import org.junit.Assert; import org.junit.Assert;
@ -55,6 +56,10 @@ import java.util.stream.IntStream;
public class IndexMergerNullHandlingTest public class IndexMergerNullHandlingTest
{ {
static {
NullHandling.initializeForTests();
}
private IndexMerger indexMerger; private IndexMerger indexMerger;
private IndexIO indexIO; private IndexIO indexIO;
private IndexSpec indexSpec; private IndexSpec indexSpec;
@ -221,8 +226,9 @@ public class IndexMergerNullHandlingTest
retVal.add(NullHandling.emptyToNullIfNeeded(((String) value))); retVal.add(NullHandling.emptyToNullIfNeeded(((String) value)));
} else if (value instanceof List) { } else if (value instanceof List) {
final List<String> list = (List<String>) value; final List<String> list = (List<String>) value;
if (list.isEmpty() && !hasMultipleValues) { if (list.isEmpty()) {
// empty lists become nulls in single valued columns // empty lists become nulls in single valued columns
// they sometimes also become nulls in multi-valued columns (see comments in getRow())
retVal.add(NullHandling.emptyToNullIfNeeded(null)); retVal.add(NullHandling.emptyToNullIfNeeded(null));
} else { } else {
retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList())); retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList()));
@ -242,7 +248,25 @@ public class IndexMergerNullHandlingTest
final List<String> retVal = new ArrayList<>(); final List<String> retVal = new ArrayList<>();
if (column.hasMultipleValues()) { if (column.hasMultipleValues()) {
column.getMultiValueRow(rowNumber).forEach(i -> retVal.add(column.lookupName(i))); IndexedInts rowVals = column.getMultiValueRow(rowNumber);
if (rowVals.size() == 0) {
// This is a sort of test hack:
// - If we ingest the subset [{d=[]}, {d=[a, b]}], we get an IndexedInts with 0 size for the nully row,
// representing the empty list
// - If we ingest the subset [{}, {d=[]}, {d=[a, b]}], we instead get an IndexedInts with 1 size,
// representing a row with a single null value
// This occurs because the dimension value comparator used during ingestion considers null and the empty list
// to be the same.
// - In the first subset, we only see the empty list and a non-empty list, so the key used in the
// incremental index fact table for the nully row is the empty list.
// - In the second subset, the fact table initially gets an entry for d=null. When the row with the
// empty list value is added, it is treated as identical to the first d=null row, so it gets rolled up.
// The resulting persisted segment will have [null] instead of [] because of this rollup.
// To simplify this test class, we always normalize the empty list into null here.
retVal.add(null);
} else {
rowVals.forEach(i -> retVal.add(column.lookupName(i)));
}
} else { } else {
retVal.add(column.lookupName(column.getSingleValueRow(rowNumber))); retVal.add(column.lookupName(column.getSingleValueRow(rowNumber)));
} }

View File

@ -78,6 +78,7 @@ import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -2076,11 +2077,11 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
Assert.assertEquals(2, rowList.size()); Assert.assertEquals(2, rowList.size());
Assert.assertEquals( Assert.assertEquals(
Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")), Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")),
rowList.get(0).dimensionValues() rowList.get(0).dimensionValues()
); );
Assert.assertEquals( Assert.assertEquals(
Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")), Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")),
rowList.get(1).dimensionValues() rowList.get(1).dimensionValues()
); );
@ -2210,6 +2211,433 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
); );
} }
@Test
public void testMultivalDim_mergeAcrossSegments_rollupWorks() throws Exception
{
List<String> dims = Arrays.asList(
"dimA",
"dimMultiVal"
);
IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
.withDimensionsSpec(
new DimensionsSpec(
ImmutableList.of(
new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true),
new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)
)
)
)
.withMetrics(
new LongSumAggregatorFactory("sumCount", "sumCount")
)
.withRollup(true)
.build();
IncrementalIndex toPersistA = new IncrementalIndex.Builder()
.setIndexSchema(indexSchema)
.setMaxRowCount(1000)
.buildOnheap();
Map<String, Object> event1 = new HashMap<>();
event1.put("dimA", "leek");
event1.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
event1.put("sumCount", 1L);
Map<String, Object> event2 = new HashMap<>();
event2.put("dimA", "leek");
event2.put("dimMultiVal", ImmutableList.of("1", "2", "3", "5"));
event2.put("sumCount", 1L);
toPersistA.add(new MapBasedInputRow(1, dims, event1));
toPersistA.add(new MapBasedInputRow(1, dims, event2));
IncrementalIndex toPersistB = new IncrementalIndex.Builder()
.setIndexSchema(indexSchema)
.setMaxRowCount(1000)
.buildOnheap();
Map<String, Object> event3 = new HashMap<>();
event3.put("dimA", "leek");
event3.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
event3.put("sumCount", 1L);
Map<String, Object> event4 = new HashMap<>();
event4.put("dimA", "potato");
event4.put("dimMultiVal", ImmutableList.of("0", "1", "4"));
event4.put("sumCount", 1L);
toPersistB.add(new MapBasedInputRow(1, dims, event3));
toPersistB.add(new MapBasedInputRow(1, dims, event4));
final File tmpDirA = temporaryFolder.newFolder();
final File tmpDirB = temporaryFolder.newFolder();
final File tmpDirMerged = temporaryFolder.newFolder();
QueryableIndex indexA = closer.closeLater(
indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null))
);
QueryableIndex indexB = closer.closeLater(
indexIO.loadIndex(indexMerger.persist(toPersistB, tmpDirB, indexSpec, null))
);
final QueryableIndex merged = closer.closeLater(
indexIO.loadIndex(
indexMerger.mergeQueryableIndex(
Arrays.asList(indexA, indexB),
true,
new AggregatorFactory[]{
new LongSumAggregatorFactory("sumCount", "sumCount")
},
tmpDirMerged,
indexSpec,
null
)
)
);
final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
Assert.assertEquals(
ImmutableList.of("dimA", "dimMultiVal"),
ImmutableList.copyOf(adapter.getDimensionNames())
);
Assert.assertEquals(3, rowList.size());
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3", "5")), rowList.get(0).dimensionValues());
Assert.assertEquals(1L, rowList.get(0).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "4")), rowList.get(1).dimensionValues());
Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
Assert.assertEquals(Arrays.asList("potato", Arrays.asList("0", "1", "4")), rowList.get(2).dimensionValues());
Assert.assertEquals(1L, rowList.get(2).metricValues().get(0));
checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimA", "leek"));
checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimA", "potato"));
checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimMultiVal", "0"));
checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", "1"));
checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimMultiVal", "2"));
checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "3"));
checkBitmapIndex(Arrays.asList(1, 2), adapter.getBitmapIndex("dimMultiVal", "4"));
checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "5"));
}
@Test
public void testMultivalDim_persistAndMerge_dimensionValueOrderingRules() throws Exception
{
List<String> dims = Arrays.asList(
"dimA",
"dimMultiVal"
);
IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
.withDimensionsSpec(
new DimensionsSpec(
ImmutableList.of(
new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true),
new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)
)
)
)
.withMetrics(
new LongSumAggregatorFactory("sumCount", "sumCount")
)
.withRollup(true)
.build();
Map<String, Object> nullEvent = new HashMap<>();
nullEvent.put("dimA", "leek");
nullEvent.put("sumCount", 1L);
Map<String, Object> nullEvent2 = new HashMap<>();
nullEvent2.put("dimA", "leek");
nullEvent2.put("dimMultiVal", null);
nullEvent2.put("sumCount", 1L);
Map<String, Object> emptyListEvent = new HashMap<>();
emptyListEvent.put("dimA", "leek");
emptyListEvent.put("dimMultiVal", ImmutableList.of());
emptyListEvent.put("sumCount", 1L);
List<String> listWithNull = new ArrayList<>();
listWithNull.add(null);
Map<String, Object> listWithNullEvent = new HashMap<>();
listWithNullEvent.put("dimA", "leek");
listWithNullEvent.put("dimMultiVal", listWithNull);
listWithNullEvent.put("sumCount", 1L);
Map<String, Object> emptyStringEvent = new HashMap<>();
emptyStringEvent.put("dimA", "leek");
emptyStringEvent.put("dimMultiVal", "");
emptyStringEvent.put("sumCount", 1L);
Map<String, Object> listWithEmptyStringEvent = new HashMap<>();
listWithEmptyStringEvent.put("dimA", "leek");
listWithEmptyStringEvent.put("dimMultiVal", ImmutableList.of(""));
listWithEmptyStringEvent.put("sumCount", 1L);
Map<String, Object> singleValEvent = new HashMap<>();
singleValEvent.put("dimA", "leek");
singleValEvent.put("dimMultiVal", "1");
singleValEvent.put("sumCount", 1L);
Map<String, Object> singleValEvent2 = new HashMap<>();
singleValEvent2.put("dimA", "leek");
singleValEvent2.put("dimMultiVal", "2");
singleValEvent2.put("sumCount", 1L);
Map<String, Object> singleValEvent3 = new HashMap<>();
singleValEvent3.put("dimA", "potato");
singleValEvent3.put("dimMultiVal", "2");
singleValEvent3.put("sumCount", 1L);
Map<String, Object> listWithSingleValEvent = new HashMap<>();
listWithSingleValEvent.put("dimA", "leek");
listWithSingleValEvent.put("dimMultiVal", ImmutableList.of("1"));
listWithSingleValEvent.put("sumCount", 1L);
Map<String, Object> listWithSingleValEvent2 = new HashMap<>();
listWithSingleValEvent2.put("dimA", "leek");
listWithSingleValEvent2.put("dimMultiVal", ImmutableList.of("2"));
listWithSingleValEvent2.put("sumCount", 1L);
Map<String, Object> listWithSingleValEvent3 = new HashMap<>();
listWithSingleValEvent3.put("dimA", "potato");
listWithSingleValEvent3.put("dimMultiVal", ImmutableList.of("2"));
listWithSingleValEvent3.put("sumCount", 1L);
Map<String, Object> multivalEvent = new HashMap<>();
multivalEvent.put("dimA", "leek");
multivalEvent.put("dimMultiVal", ImmutableList.of("1", "3"));
multivalEvent.put("sumCount", 1L);
Map<String, Object> multivalEvent2 = new HashMap<>();
multivalEvent2.put("dimA", "leek");
multivalEvent2.put("dimMultiVal", ImmutableList.of("1", "4"));
multivalEvent2.put("sumCount", 1L);
Map<String, Object> multivalEvent3 = new HashMap<>();
multivalEvent3.put("dimA", "leek");
multivalEvent3.put("dimMultiVal", ImmutableList.of("1", "3", "5"));
multivalEvent3.put("sumCount", 1L);
Map<String, Object> multivalEvent4 = new HashMap<>();
multivalEvent4.put("dimA", "leek");
multivalEvent4.put("dimMultiVal", ImmutableList.of("1", "2", "3"));
multivalEvent4.put("sumCount", 1L);
List<String> multivalEvent5List = Arrays.asList("1", "2", "3", null);
Map<String, Object> multivalEvent5 = new HashMap<>();
multivalEvent5.put("dimA", "leek");
multivalEvent5.put("dimMultiVal", multivalEvent5List);
multivalEvent5.put("sumCount", 1L);
List<String> multivalEvent6List = Arrays.asList(null, "3");
Map<String, Object> multivalEvent6 = new HashMap<>();
multivalEvent6.put("dimA", "leek");
multivalEvent6.put("dimMultiVal", multivalEvent6List);
multivalEvent6.put("sumCount", 1L);
Map<String, Object> multivalEvent7 = new HashMap<>();
multivalEvent7.put("dimA", "leek");
multivalEvent7.put("dimMultiVal", ImmutableList.of("1", "2", "3", ""));
multivalEvent7.put("sumCount", 1L);
Map<String, Object> multivalEvent8 = new HashMap<>();
multivalEvent8.put("dimA", "leek");
multivalEvent8.put("dimMultiVal", ImmutableList.of("", "3"));
multivalEvent8.put("sumCount", 1L);
Map<String, Object> multivalEvent9 = new HashMap<>();
multivalEvent9.put("dimA", "potato");
multivalEvent9.put("dimMultiVal", ImmutableList.of("1", "3"));
multivalEvent9.put("sumCount", 1L);
List<Map<String, Object>> events = ImmutableList.of(
nullEvent,
nullEvent2,
emptyListEvent,
listWithNullEvent,
emptyStringEvent,
listWithEmptyStringEvent,
singleValEvent,
singleValEvent2,
singleValEvent3,
listWithSingleValEvent,
listWithSingleValEvent2,
listWithSingleValEvent3,
multivalEvent,
multivalEvent2,
multivalEvent3,
multivalEvent4,
multivalEvent5,
multivalEvent6,
multivalEvent7,
multivalEvent8,
multivalEvent9
);
IncrementalIndex toPersistA = new IncrementalIndex.Builder()
.setIndexSchema(indexSchema)
.setMaxRowCount(1000)
.buildOnheap();
for (Map<String, Object> event : events) {
toPersistA.add(new MapBasedInputRow(1, dims, event));
}
final File tmpDirA = temporaryFolder.newFolder();
QueryableIndex indexA = closer.closeLater(
indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null))
);
List<QueryableIndex> singleEventIndexes = new ArrayList<>();
for (Map<String, Object> event : events) {
IncrementalIndex toPersist = new IncrementalIndex.Builder()
.setIndexSchema(indexSchema)
.setMaxRowCount(1000)
.buildOnheap();
toPersist.add(new MapBasedInputRow(1, dims, event));
final File tmpDir = temporaryFolder.newFolder();
QueryableIndex queryableIndex = closer.closeLater(
indexIO.loadIndex(indexMerger.persist(toPersist, tmpDir, indexSpec, null))
);
singleEventIndexes.add(queryableIndex);
}
singleEventIndexes.add(indexA);
final File tmpDirMerged = temporaryFolder.newFolder();
final QueryableIndex merged = closer.closeLater(
indexIO.loadIndex(
indexMerger.mergeQueryableIndex(
singleEventIndexes,
true,
new AggregatorFactory[]{
new LongSumAggregatorFactory("sumCount", "sumCount")
},
tmpDirMerged,
indexSpec,
null
)
)
);
final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
Assert.assertEquals(
ImmutableList.of("dimA", "dimMultiVal"),
ImmutableList.copyOf(adapter.getDimensionNames())
);
if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(11, rowList.size());
Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues());
Assert.assertEquals(12L, rowList.get(0).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues());
Assert.assertEquals(4L, rowList.get(1).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues());
Assert.assertEquals(4L, rowList.get(2).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(3).dimensionValues());
Assert.assertEquals(4L, rowList.get(3).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(4).dimensionValues());
Assert.assertEquals(2L, rowList.get(4).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(5).dimensionValues());
Assert.assertEquals(2L, rowList.get(5).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(6).dimensionValues());
Assert.assertEquals(2L, rowList.get(6).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(7).dimensionValues());
Assert.assertEquals(2L, rowList.get(7).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(8).dimensionValues());
Assert.assertEquals(4L, rowList.get(8).metricValues().get(0));
Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(9).dimensionValues());
Assert.assertEquals(2L, rowList.get(9).metricValues().get(0));
Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(10).dimensionValues());
Assert.assertEquals(4L, rowList.get(10).metricValues().get(0));
checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8), adapter.getBitmapIndex("dimA", "leek"));
checkBitmapIndex(Arrays.asList(9, 10), adapter.getBitmapIndex("dimA", "potato"));
checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null));
checkBitmapIndex(ImmutableList.of(), adapter.getBitmapIndex("dimMultiVal", ""));
checkBitmapIndex(Arrays.asList(1, 3, 4, 5, 6, 7, 9), adapter.getBitmapIndex("dimMultiVal", "1"));
checkBitmapIndex(Arrays.asList(1, 4, 8, 10), adapter.getBitmapIndex("dimMultiVal", "2"));
checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 6, 9), adapter.getBitmapIndex("dimMultiVal", "3"));
checkBitmapIndex(Collections.singletonList(7), adapter.getBitmapIndex("dimMultiVal", "4"));
checkBitmapIndex(Collections.singletonList(6), adapter.getBitmapIndex("dimMultiVal", "5"));
} else {
Assert.assertEquals(14, rowList.size());
Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues());
Assert.assertEquals(8L, rowList.get(0).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues());
Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues());
Assert.assertEquals(2L, rowList.get(2).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", ""), rowList.get(3).dimensionValues());
Assert.assertEquals(4L, rowList.get(3).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "1", "2", "3")), rowList.get(4).dimensionValues());
Assert.assertEquals(2L, rowList.get(4).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "3")), rowList.get(5).dimensionValues());
Assert.assertEquals(2L, rowList.get(5).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(6).dimensionValues());
Assert.assertEquals(4L, rowList.get(6).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(7).dimensionValues());
Assert.assertEquals(2L, rowList.get(7).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(8).dimensionValues());
Assert.assertEquals(2L, rowList.get(8).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(9).dimensionValues());
Assert.assertEquals(2L, rowList.get(9).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(10).dimensionValues());
Assert.assertEquals(2L, rowList.get(10).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(11).dimensionValues());
Assert.assertEquals(4L, rowList.get(11).metricValues().get(0));
Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(12).dimensionValues());
Assert.assertEquals(2L, rowList.get(12).metricValues().get(0));
Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(13).dimensionValues());
Assert.assertEquals(4L, rowList.get(13).metricValues().get(0));
checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), adapter.getBitmapIndex("dimA", "leek"));
checkBitmapIndex(Arrays.asList(12, 13), adapter.getBitmapIndex("dimA", "potato"));
checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null));
checkBitmapIndex(ImmutableList.of(3, 4, 5), adapter.getBitmapIndex("dimMultiVal", ""));
checkBitmapIndex(Arrays.asList(1, 4, 6, 7, 8, 9, 10, 12), adapter.getBitmapIndex("dimMultiVal", "1"));
checkBitmapIndex(Arrays.asList(1, 4, 7, 11, 13), adapter.getBitmapIndex("dimMultiVal", "2"));
checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 7, 8, 9, 12), adapter.getBitmapIndex("dimMultiVal", "3"));
checkBitmapIndex(Collections.singletonList(10), adapter.getBitmapIndex("dimMultiVal", "4"));
checkBitmapIndex(Collections.singletonList(9), adapter.getBitmapIndex("dimMultiVal", "5"));
}
}
private QueryableIndex persistAndLoad(List<DimensionSchema> schema, InputRow... rows) throws IOException private QueryableIndex persistAndLoad(List<DimensionSchema> schema, InputRow... rows) throws IOException
{ {
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null)); IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null));

View File

@ -71,8 +71,8 @@ public class IncrementalIndexRowCompTest extends InitializedNullHandlingTest
Assert.assertTrue(comparator.compare(ir4, ir6) > 0); Assert.assertTrue(comparator.compare(ir4, ir6) > 0);
Assert.assertTrue(comparator.compare(ir5, ir6) > 0); Assert.assertTrue(comparator.compare(ir5, ir6) > 0);
Assert.assertTrue(comparator.compare(ir4, ir5) < 0); Assert.assertTrue(comparator.compare(ir5, ir4) < 0);
Assert.assertTrue(comparator.compare(ir5, ir4) > 0); Assert.assertTrue(comparator.compare(ir4, ir5) > 0);
} }
private MapBasedInputRow toMapRow(long time, Object... dimAndVal) private MapBasedInputRow toMapRow(long time, Object... dimAndVal)