From fe000a9e4bf1d2681760f72e9c593bd2aa5a1416 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Sat, 25 Apr 2020 13:47:07 -0700 Subject: [PATCH] Adjust string comparators used for ingestion (#9742) * Adjust string comparators used for ingestion * Small tweak * Fix inspection, more javadocs * Address PR comment * Add rollup comment * Add ordering test * Fix IncrementaIndexRowCompTest --- .../druid/segment/DimensionHandler.java | 4 + .../druid/segment/DimensionIndexer.java | 4 + .../druid/segment/StringDimensionHandler.java | 73 ++- .../druid/segment/StringDimensionIndexer.java | 30 +- .../segment/IndexMergerNullHandlingTest.java | 28 +- .../druid/segment/IndexMergerTestBase.java | 432 +++++++++++++++++- .../IncrementalIndexRowCompTest.java | 4 +- 7 files changed, 520 insertions(+), 55 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java index e1ea3342fb4..56bfed2e30c 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java @@ -122,6 +122,10 @@ public interface DimensionHandler * Returns a comparator that knows how to compare {@link ColumnValueSelector} of the assumed dimension type, * corresponding to this DimensionHandler. E. g. {@link StringDimensionHandler} returns a comparator, that compares * {@link ColumnValueSelector}s as {@link DimensionSelector}s. + * + * The comparison rules used by this method should match the rules used by + * {@link DimensionIndexer#compareUnsortedEncodedKeyComponents}, otherwise incorrect ordering/merging of rows + * can occur during ingestion, causing issues such as imperfect rollup. */ Comparator getEncodedValueSelectorComparator(); diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index 06c27f7d719..99921eb79c0 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -241,6 +241,10 @@ public interface DimensionIndexer * * Refer to StringDimensionIndexer.compareUnsortedEncodedKeyComponents() for a reference implementation. * + * The comparison rules used by this method should match the rules used by + * {@link DimensionHandler#getEncodedValueSelectorComparator()}, otherwise incorrect ordering/merging of rows + * can occur during ingestion, causing issues such as imperfect rollup. + * * @param lhs dimension value array from a Row key * @param rhs dimension value array from a Row key * @return comparison of the two arrays diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java index c14bd319bb1..65ff7f356d7 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java @@ -33,61 +33,46 @@ import java.util.Comparator; public class StringDimensionHandler implements DimensionHandler { - /** - * Compares {@link IndexedInts} lexicographically, with the exception that if a row contains only zeros (that's the - * index of null) at all positions, it is considered "null" as a whole and is "less" than any "non-null" row. Empty - * row (size is zero) is also considered "null". - * - * The implementation is a bit complicated because it tries to check each position of both rows only once. + * This comparator uses the following rules: + * - Compare the two value arrays up to the length of the shorter array + * - If the two arrays match so far, then compare the array lengths, the shorter array is considered smaller + * - Comparing null and the empty list is a special case: these are considered equal */ private static final Comparator DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> { IndexedInts row1 = getRow(s1); IndexedInts row2 = getRow(s2); int len1 = row1.size(); int len2 = row2.size(); - boolean row1IsNull = true; - boolean row2IsNull = true; - for (int i = 0; i < Math.min(len1, len2); i++) { - int v1 = row1.get(i); - row1IsNull &= v1 == 0; - int v2 = row2.get(i); - row2IsNull &= v2 == 0; - int valueDiff = Integer.compare(v1, v2); - if (valueDiff != 0) { - return valueDiff; - } - } - //noinspection SubtractionInCompareTo -- substraction is safe here, because lengths or rows are small numbers. - int lenDiff = len1 - len2; - if (lenDiff == 0) { - return 0; - } else { - if (!row1IsNull || !row2IsNull) { - return lenDiff; - } else { - return compareRestNulls(row1, len1, row2, len2); - } - } - }; + int lenCompareResult = Integer.compare(len1, len2); + int valsIndex = 0; - private static int compareRestNulls(IndexedInts row1, int len1, IndexedInts row2, int len2) - { - if (len1 < len2) { - for (int i = len1; i < len2; i++) { - if (row2.get(i) != 0) { - return -1; - } - } - } else { - for (int i = len2; i < len1; i++) { - if (row1.get(i) != 0) { - return 1; + if (lenCompareResult != 0) { + // if the values don't have the same length, check if we're comparing [] and [null], which are equivalent + if (len1 + len2 == 1) { + IndexedInts longerRow = len2 > len1 ? row2 : row1; + if (longerRow.get(0) == 0) { + return 0; + } else { + //noinspection ObjectEquality -- longerRow is explicitly set to only row1 or row2 + return longerRow == row1 ? 1 : -1; } } } - return 0; - } + + int lenToCompare = Math.min(len1, len2); + while (valsIndex < lenToCompare) { + int v1 = row1.get(valsIndex); + int v2 = row2.get(valsIndex); + int valueCompareResult = Integer.compare(v1, v2); + if (valueCompareResult != 0) { + return valueCompareResult; + } + ++valsIndex; + } + + return lenCompareResult; + }; /** * Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging. diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index f08a495cbc8..ada146d56d4 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -399,23 +399,42 @@ public class StringDimensionIndexer implements DimensionIndexer lhsLen ? rhs : lhs; + if (longerVal[0] == dimLookup.idForNull) { + return 0; + } else { + //noinspection ArrayEquality -- longerVal is explicitly set to only lhs or rhs + return longerVal == lhs ? 1 : -1; + } + } + } + int valsIndex = 0; - while (retVal == 0 && valsIndex < lhsLen) { + int lenToCompare = Math.min(lhsLen, rhsLen); + while (valsIndex < lenToCompare) { int lhsVal = lhs[valsIndex]; int rhsVal = rhs[valsIndex]; if (lhsVal != rhsVal) { final String lhsValActual = getActualValue(lhsVal, false); final String rhsValActual = getActualValue(rhsVal, false); + int valueCompareResult = 0; if (lhsValActual != null && rhsValActual != null) { - retVal = lhsValActual.compareTo(rhsValActual); + valueCompareResult = lhsValActual.compareTo(rhsValActual); } else if (lhsValActual == null ^ rhsValActual == null) { - retVal = lhsValActual == null ? -1 : 1; + valueCompareResult = lhsValActual == null ? -1 : 1; + } + if (valueCompareResult != 0) { + return valueCompareResult; } } ++valsIndex; } - return retVal; + + return lenCompareResult; } @Override @@ -796,6 +815,7 @@ public class StringDimensionIndexer implements DimensionIndexer list = (List) value; - if (list.isEmpty() && !hasMultipleValues) { + if (list.isEmpty()) { // empty lists become nulls in single valued columns + // they sometimes also become nulls in multi-valued columns (see comments in getRow()) retVal.add(NullHandling.emptyToNullIfNeeded(null)); } else { retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList())); @@ -242,7 +248,25 @@ public class IndexMergerNullHandlingTest final List retVal = new ArrayList<>(); if (column.hasMultipleValues()) { - column.getMultiValueRow(rowNumber).forEach(i -> retVal.add(column.lookupName(i))); + IndexedInts rowVals = column.getMultiValueRow(rowNumber); + if (rowVals.size() == 0) { + // This is a sort of test hack: + // - If we ingest the subset [{d=[]}, {d=[a, b]}], we get an IndexedInts with 0 size for the nully row, + // representing the empty list + // - If we ingest the subset [{}, {d=[]}, {d=[a, b]}], we instead get an IndexedInts with 1 size, + // representing a row with a single null value + // This occurs because the dimension value comparator used during ingestion considers null and the empty list + // to be the same. + // - In the first subset, we only see the empty list and a non-empty list, so the key used in the + // incremental index fact table for the nully row is the empty list. + // - In the second subset, the fact table initially gets an entry for d=null. When the row with the + // empty list value is added, it is treated as identical to the first d=null row, so it gets rolled up. + // The resulting persisted segment will have [null] instead of [] because of this rollup. + // To simplify this test class, we always normalize the empty list into null here. + retVal.add(null); + } else { + rowVals.forEach(i -> retVal.add(column.lookupName(i))); + } } else { retVal.add(column.lookupName(column.getSingleValueRow(rowNumber))); } diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java index 1783f433b1b..b253614cc3d 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java @@ -78,6 +78,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.EnumSet; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -2076,11 +2077,11 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest Assert.assertEquals(2, rowList.size()); Assert.assertEquals( - Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")), + Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")), rowList.get(0).dimensionValues() ); Assert.assertEquals( - Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")), + Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")), rowList.get(1).dimensionValues() ); @@ -2210,6 +2211,433 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest ); } + @Test + public void testMultivalDim_mergeAcrossSegments_rollupWorks() throws Exception + { + List dims = Arrays.asList( + "dimA", + "dimMultiVal" + ); + + IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() + .withDimensionsSpec( + new DimensionsSpec( + ImmutableList.of( + new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true), + new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true) + ) + ) + ) + .withMetrics( + new LongSumAggregatorFactory("sumCount", "sumCount") + ) + .withRollup(true) + .build(); + + IncrementalIndex toPersistA = new IncrementalIndex.Builder() + .setIndexSchema(indexSchema) + .setMaxRowCount(1000) + .buildOnheap(); + + Map event1 = new HashMap<>(); + event1.put("dimA", "leek"); + event1.put("dimMultiVal", ImmutableList.of("1", "2", "4")); + event1.put("sumCount", 1L); + + Map event2 = new HashMap<>(); + event2.put("dimA", "leek"); + event2.put("dimMultiVal", ImmutableList.of("1", "2", "3", "5")); + event2.put("sumCount", 1L); + + toPersistA.add(new MapBasedInputRow(1, dims, event1)); + toPersistA.add(new MapBasedInputRow(1, dims, event2)); + + IncrementalIndex toPersistB = new IncrementalIndex.Builder() + .setIndexSchema(indexSchema) + .setMaxRowCount(1000) + .buildOnheap(); + + Map event3 = new HashMap<>(); + event3.put("dimA", "leek"); + event3.put("dimMultiVal", ImmutableList.of("1", "2", "4")); + event3.put("sumCount", 1L); + + Map event4 = new HashMap<>(); + event4.put("dimA", "potato"); + event4.put("dimMultiVal", ImmutableList.of("0", "1", "4")); + event4.put("sumCount", 1L); + + toPersistB.add(new MapBasedInputRow(1, dims, event3)); + toPersistB.add(new MapBasedInputRow(1, dims, event4)); + + final File tmpDirA = temporaryFolder.newFolder(); + final File tmpDirB = temporaryFolder.newFolder(); + final File tmpDirMerged = temporaryFolder.newFolder(); + + QueryableIndex indexA = closer.closeLater( + indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null)) + ); + + QueryableIndex indexB = closer.closeLater( + indexIO.loadIndex(indexMerger.persist(toPersistB, tmpDirB, indexSpec, null)) + ); + + final QueryableIndex merged = closer.closeLater( + indexIO.loadIndex( + indexMerger.mergeQueryableIndex( + Arrays.asList(indexA, indexB), + true, + new AggregatorFactory[]{ + new LongSumAggregatorFactory("sumCount", "sumCount") + }, + tmpDirMerged, + indexSpec, + null + ) + ) + ); + + final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged); + final List rowList = RowIteratorHelper.toList(adapter.getRows()); + + Assert.assertEquals( + ImmutableList.of("dimA", "dimMultiVal"), + ImmutableList.copyOf(adapter.getDimensionNames()) + ); + + Assert.assertEquals(3, rowList.size()); + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3", "5")), rowList.get(0).dimensionValues()); + Assert.assertEquals(1L, rowList.get(0).metricValues().get(0)); + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "4")), rowList.get(1).dimensionValues()); + Assert.assertEquals(2L, rowList.get(1).metricValues().get(0)); + Assert.assertEquals(Arrays.asList("potato", Arrays.asList("0", "1", "4")), rowList.get(2).dimensionValues()); + Assert.assertEquals(1L, rowList.get(2).metricValues().get(0)); + + checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimA", "leek")); + checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimA", "potato")); + + checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimMultiVal", "0")); + checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", "1")); + checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimMultiVal", "2")); + checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "3")); + checkBitmapIndex(Arrays.asList(1, 2), adapter.getBitmapIndex("dimMultiVal", "4")); + checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "5")); + } + + + @Test + public void testMultivalDim_persistAndMerge_dimensionValueOrderingRules() throws Exception + { + List dims = Arrays.asList( + "dimA", + "dimMultiVal" + ); + + IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() + .withDimensionsSpec( + new DimensionsSpec( + ImmutableList.of( + new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true), + new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true) + ) + ) + ) + .withMetrics( + new LongSumAggregatorFactory("sumCount", "sumCount") + ) + .withRollup(true) + .build(); + + Map nullEvent = new HashMap<>(); + nullEvent.put("dimA", "leek"); + nullEvent.put("sumCount", 1L); + + Map nullEvent2 = new HashMap<>(); + nullEvent2.put("dimA", "leek"); + nullEvent2.put("dimMultiVal", null); + nullEvent2.put("sumCount", 1L); + + Map emptyListEvent = new HashMap<>(); + emptyListEvent.put("dimA", "leek"); + emptyListEvent.put("dimMultiVal", ImmutableList.of()); + emptyListEvent.put("sumCount", 1L); + + List listWithNull = new ArrayList<>(); + listWithNull.add(null); + Map listWithNullEvent = new HashMap<>(); + listWithNullEvent.put("dimA", "leek"); + listWithNullEvent.put("dimMultiVal", listWithNull); + listWithNullEvent.put("sumCount", 1L); + + Map emptyStringEvent = new HashMap<>(); + emptyStringEvent.put("dimA", "leek"); + emptyStringEvent.put("dimMultiVal", ""); + emptyStringEvent.put("sumCount", 1L); + + Map listWithEmptyStringEvent = new HashMap<>(); + listWithEmptyStringEvent.put("dimA", "leek"); + listWithEmptyStringEvent.put("dimMultiVal", ImmutableList.of("")); + listWithEmptyStringEvent.put("sumCount", 1L); + + Map singleValEvent = new HashMap<>(); + singleValEvent.put("dimA", "leek"); + singleValEvent.put("dimMultiVal", "1"); + singleValEvent.put("sumCount", 1L); + + Map singleValEvent2 = new HashMap<>(); + singleValEvent2.put("dimA", "leek"); + singleValEvent2.put("dimMultiVal", "2"); + singleValEvent2.put("sumCount", 1L); + + Map singleValEvent3 = new HashMap<>(); + singleValEvent3.put("dimA", "potato"); + singleValEvent3.put("dimMultiVal", "2"); + singleValEvent3.put("sumCount", 1L); + + Map listWithSingleValEvent = new HashMap<>(); + listWithSingleValEvent.put("dimA", "leek"); + listWithSingleValEvent.put("dimMultiVal", ImmutableList.of("1")); + listWithSingleValEvent.put("sumCount", 1L); + + Map listWithSingleValEvent2 = new HashMap<>(); + listWithSingleValEvent2.put("dimA", "leek"); + listWithSingleValEvent2.put("dimMultiVal", ImmutableList.of("2")); + listWithSingleValEvent2.put("sumCount", 1L); + + Map listWithSingleValEvent3 = new HashMap<>(); + listWithSingleValEvent3.put("dimA", "potato"); + listWithSingleValEvent3.put("dimMultiVal", ImmutableList.of("2")); + listWithSingleValEvent3.put("sumCount", 1L); + + Map multivalEvent = new HashMap<>(); + multivalEvent.put("dimA", "leek"); + multivalEvent.put("dimMultiVal", ImmutableList.of("1", "3")); + multivalEvent.put("sumCount", 1L); + + Map multivalEvent2 = new HashMap<>(); + multivalEvent2.put("dimA", "leek"); + multivalEvent2.put("dimMultiVal", ImmutableList.of("1", "4")); + multivalEvent2.put("sumCount", 1L); + + Map multivalEvent3 = new HashMap<>(); + multivalEvent3.put("dimA", "leek"); + multivalEvent3.put("dimMultiVal", ImmutableList.of("1", "3", "5")); + multivalEvent3.put("sumCount", 1L); + + Map multivalEvent4 = new HashMap<>(); + multivalEvent4.put("dimA", "leek"); + multivalEvent4.put("dimMultiVal", ImmutableList.of("1", "2", "3")); + multivalEvent4.put("sumCount", 1L); + + List multivalEvent5List = Arrays.asList("1", "2", "3", null); + Map multivalEvent5 = new HashMap<>(); + multivalEvent5.put("dimA", "leek"); + multivalEvent5.put("dimMultiVal", multivalEvent5List); + multivalEvent5.put("sumCount", 1L); + + List multivalEvent6List = Arrays.asList(null, "3"); + Map multivalEvent6 = new HashMap<>(); + multivalEvent6.put("dimA", "leek"); + multivalEvent6.put("dimMultiVal", multivalEvent6List); + multivalEvent6.put("sumCount", 1L); + + Map multivalEvent7 = new HashMap<>(); + multivalEvent7.put("dimA", "leek"); + multivalEvent7.put("dimMultiVal", ImmutableList.of("1", "2", "3", "")); + multivalEvent7.put("sumCount", 1L); + + Map multivalEvent8 = new HashMap<>(); + multivalEvent8.put("dimA", "leek"); + multivalEvent8.put("dimMultiVal", ImmutableList.of("", "3")); + multivalEvent8.put("sumCount", 1L); + + Map multivalEvent9 = new HashMap<>(); + multivalEvent9.put("dimA", "potato"); + multivalEvent9.put("dimMultiVal", ImmutableList.of("1", "3")); + multivalEvent9.put("sumCount", 1L); + + List> events = ImmutableList.of( + nullEvent, + nullEvent2, + emptyListEvent, + listWithNullEvent, + emptyStringEvent, + listWithEmptyStringEvent, + singleValEvent, + singleValEvent2, + singleValEvent3, + listWithSingleValEvent, + listWithSingleValEvent2, + listWithSingleValEvent3, + multivalEvent, + multivalEvent2, + multivalEvent3, + multivalEvent4, + multivalEvent5, + multivalEvent6, + multivalEvent7, + multivalEvent8, + multivalEvent9 + ); + + IncrementalIndex toPersistA = new IncrementalIndex.Builder() + .setIndexSchema(indexSchema) + .setMaxRowCount(1000) + .buildOnheap(); + + for (Map event : events) { + toPersistA.add(new MapBasedInputRow(1, dims, event)); + } + + final File tmpDirA = temporaryFolder.newFolder(); + QueryableIndex indexA = closer.closeLater( + indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null)) + ); + + List singleEventIndexes = new ArrayList<>(); + for (Map event : events) { + IncrementalIndex toPersist = new IncrementalIndex.Builder() + .setIndexSchema(indexSchema) + .setMaxRowCount(1000) + .buildOnheap(); + + toPersist.add(new MapBasedInputRow(1, dims, event)); + final File tmpDir = temporaryFolder.newFolder(); + QueryableIndex queryableIndex = closer.closeLater( + indexIO.loadIndex(indexMerger.persist(toPersist, tmpDir, indexSpec, null)) + ); + singleEventIndexes.add(queryableIndex); + } + singleEventIndexes.add(indexA); + + final File tmpDirMerged = temporaryFolder.newFolder(); + final QueryableIndex merged = closer.closeLater( + indexIO.loadIndex( + indexMerger.mergeQueryableIndex( + singleEventIndexes, + true, + new AggregatorFactory[]{ + new LongSumAggregatorFactory("sumCount", "sumCount") + }, + tmpDirMerged, + indexSpec, + null + ) + ) + ); + + final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged); + final List rowList = RowIteratorHelper.toList(adapter.getRows()); + + Assert.assertEquals( + ImmutableList.of("dimA", "dimMultiVal"), + ImmutableList.copyOf(adapter.getDimensionNames()) + ); + + if (NullHandling.replaceWithDefault()) { + Assert.assertEquals(11, rowList.size()); + + Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues()); + Assert.assertEquals(12L, rowList.get(0).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues()); + Assert.assertEquals(4L, rowList.get(1).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues()); + Assert.assertEquals(4L, rowList.get(2).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(3).dimensionValues()); + Assert.assertEquals(4L, rowList.get(3).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(4).dimensionValues()); + Assert.assertEquals(2L, rowList.get(4).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(5).dimensionValues()); + Assert.assertEquals(2L, rowList.get(5).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(6).dimensionValues()); + Assert.assertEquals(2L, rowList.get(6).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(7).dimensionValues()); + Assert.assertEquals(2L, rowList.get(7).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(8).dimensionValues()); + Assert.assertEquals(4L, rowList.get(8).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(9).dimensionValues()); + Assert.assertEquals(2L, rowList.get(9).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(10).dimensionValues()); + Assert.assertEquals(4L, rowList.get(10).metricValues().get(0)); + + checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8), adapter.getBitmapIndex("dimA", "leek")); + checkBitmapIndex(Arrays.asList(9, 10), adapter.getBitmapIndex("dimA", "potato")); + + checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null)); + checkBitmapIndex(ImmutableList.of(), adapter.getBitmapIndex("dimMultiVal", "")); + checkBitmapIndex(Arrays.asList(1, 3, 4, 5, 6, 7, 9), adapter.getBitmapIndex("dimMultiVal", "1")); + checkBitmapIndex(Arrays.asList(1, 4, 8, 10), adapter.getBitmapIndex("dimMultiVal", "2")); + checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 6, 9), adapter.getBitmapIndex("dimMultiVal", "3")); + checkBitmapIndex(Collections.singletonList(7), adapter.getBitmapIndex("dimMultiVal", "4")); + checkBitmapIndex(Collections.singletonList(6), adapter.getBitmapIndex("dimMultiVal", "5")); + } else { + Assert.assertEquals(14, rowList.size()); + + Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues()); + Assert.assertEquals(8L, rowList.get(0).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues()); + Assert.assertEquals(2L, rowList.get(1).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues()); + Assert.assertEquals(2L, rowList.get(2).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", ""), rowList.get(3).dimensionValues()); + Assert.assertEquals(4L, rowList.get(3).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "1", "2", "3")), rowList.get(4).dimensionValues()); + Assert.assertEquals(2L, rowList.get(4).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "3")), rowList.get(5).dimensionValues()); + Assert.assertEquals(2L, rowList.get(5).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(6).dimensionValues()); + Assert.assertEquals(4L, rowList.get(6).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(7).dimensionValues()); + Assert.assertEquals(2L, rowList.get(7).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(8).dimensionValues()); + Assert.assertEquals(2L, rowList.get(8).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(9).dimensionValues()); + Assert.assertEquals(2L, rowList.get(9).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(10).dimensionValues()); + Assert.assertEquals(2L, rowList.get(10).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(11).dimensionValues()); + Assert.assertEquals(4L, rowList.get(11).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(12).dimensionValues()); + Assert.assertEquals(2L, rowList.get(12).metricValues().get(0)); + + Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(13).dimensionValues()); + Assert.assertEquals(4L, rowList.get(13).metricValues().get(0)); + + checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), adapter.getBitmapIndex("dimA", "leek")); + checkBitmapIndex(Arrays.asList(12, 13), adapter.getBitmapIndex("dimA", "potato")); + + checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null)); + checkBitmapIndex(ImmutableList.of(3, 4, 5), adapter.getBitmapIndex("dimMultiVal", "")); + checkBitmapIndex(Arrays.asList(1, 4, 6, 7, 8, 9, 10, 12), adapter.getBitmapIndex("dimMultiVal", "1")); + checkBitmapIndex(Arrays.asList(1, 4, 7, 11, 13), adapter.getBitmapIndex("dimMultiVal", "2")); + checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 7, 8, 9, 12), adapter.getBitmapIndex("dimMultiVal", "3")); + checkBitmapIndex(Collections.singletonList(10), adapter.getBitmapIndex("dimMultiVal", "4")); + checkBitmapIndex(Collections.singletonList(9), adapter.getBitmapIndex("dimMultiVal", "5")); + } + } + private QueryableIndex persistAndLoad(List schema, InputRow... rows) throws IOException { IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null)); diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java index 94e17c645e5..f6f95e02cef 100644 --- a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java +++ b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java @@ -71,8 +71,8 @@ public class IncrementalIndexRowCompTest extends InitializedNullHandlingTest Assert.assertTrue(comparator.compare(ir4, ir6) > 0); Assert.assertTrue(comparator.compare(ir5, ir6) > 0); - Assert.assertTrue(comparator.compare(ir4, ir5) < 0); - Assert.assertTrue(comparator.compare(ir5, ir4) > 0); + Assert.assertTrue(comparator.compare(ir5, ir4) < 0); + Assert.assertTrue(comparator.compare(ir4, ir5) > 0); } private MapBasedInputRow toMapRow(long time, Object... dimAndVal)