Speed up composite key joins on IndexedTable. (#13516)

* Speed up composite key joins on IndexedTable.

Prior to this patch, IndexedTable indexes are sorted IntList. This works
great when we have a single-column join key: we simply retrieve the list
and we know what rows match. However, when we have a composite key, we
need to merge the sorted lists. This is inefficient when one is very dense
and others are very sparse.

This patch switches from sorted IntList to IntSortedSet, and changes
to the following intersection algorithm:

1) Initialize the intersection set to the smallest matching set from the
   various parts of the composite key.

2) For each element in that smallest set, check other sets for that element.
   If any do *not* include it, then remove the element from the intersection
   set.

This way, complexity scales with the size of the smallest set, not the
largest one.

* RangeIntSet stuff.
This commit is contained in:
Gian Merlino 2023-02-17 22:01:01 -08:00 committed by GitHub
parent 85d36be085
commit 882ae9f002
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 532 additions and 383 deletions

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.collections;
import it.unimi.dsi.fastutil.ints.AbstractIntSortedSet;
import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
import it.unimi.dsi.fastutil.ints.IntComparator;
import it.unimi.dsi.fastutil.ints.IntIterators;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import it.unimi.dsi.fastutil.ints.IntSortedSets;
import java.util.NoSuchElementException;
/**
* Set from start (inclusive) to end (exclusive).
*/
public class RangeIntSet extends AbstractIntSortedSet
{
private final int start;
private final int end;
public RangeIntSet(int start, int end)
{
this.start = start;
this.end = end;
}
@Override
public IntBidirectionalIterator iterator()
{
return IntIterators.fromTo(start, end);
}
@Override
public IntBidirectionalIterator iterator(int fromElement)
{
if (fromElement < end) {
return IntIterators.fromTo(Math.max(start, fromElement), end);
} else {
return IntIterators.EMPTY_ITERATOR;
}
}
@Override
public boolean contains(int k)
{
return k >= start && k < end;
}
@Override
public IntSortedSet subSet(int fromElement, int toElement)
{
if (fromElement < end && toElement > start) {
return new RangeIntSet(Math.max(fromElement, start), Math.min(toElement, end));
} else {
return IntSortedSets.EMPTY_SET;
}
}
@Override
public IntSortedSet headSet(int toElement)
{
if (toElement > start) {
return new RangeIntSet(start, Math.min(toElement, end));
} else {
return IntSortedSets.EMPTY_SET;
}
}
@Override
public IntSortedSet tailSet(int fromElement)
{
if (fromElement < end) {
return new RangeIntSet(Math.max(start, fromElement), end);
} else {
return IntSortedSets.EMPTY_SET;
}
}
@Override
public IntComparator comparator()
{
// Natural ordering.
return null;
}
@Override
public int firstInt()
{
if (start < end) {
return start;
} else {
throw new NoSuchElementException();
}
}
@Override
public int lastInt()
{
if (start < end) {
return end - 1;
} else {
throw new NoSuchElementException();
}
}
@Override
public int size()
{
return end > start ? end - start : 0;
}
@Override
public boolean isEmpty()
{
return end <= start;
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o instanceof RangeIntSet) {
final RangeIntSet other = (RangeIntSet) o;
return (other.start == start && other.end == end) || (other.isEmpty() && isEmpty());
} else {
return super.equals(o);
}
}
@Override
public int hashCode()
{
return isEmpty() ? 0 : start + 31 * end;
}
}

View File

@ -19,7 +19,7 @@
package org.apache.druid.segment.join.table; package org.apache.druid.segment.join.table;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSortedSet;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnSelectorFactory;
@ -134,7 +134,7 @@ public interface IndexedTable extends ReferenceCountedObject, Closeable
* If "key" is some type other than the natural type {@link #keyType()}, it will be converted before checking * If "key" is some type other than the natural type {@link #keyType()}, it will be converted before checking
* the index. * the index.
*/ */
IntList find(Object key); IntSortedSet find(Object key);
/** /**
* Returns the row number corresponding to "key" in this index, or {@link #NOT_FOUND} if the key does not exist * Returns the row number corresponding to "key" in this index, or {@link #NOT_FOUND} if the key does not exist

View File

@ -23,11 +23,14 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import io.netty.util.SuppressForbidden; import io.netty.util.SuppressForbidden;
import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap; import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap;
import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntIterators;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.ints.IntRBTreeSet; import it.unimi.dsi.fastutil.ints.IntRBTreeSet;
import it.unimi.dsi.fastutil.ints.IntSet; import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import it.unimi.dsi.fastutil.ints.IntSortedSets;
import org.apache.druid.collections.RangeIntSet;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.Pair;
@ -74,7 +77,6 @@ public class IndexedTableJoinMatcher implements JoinMatcher
private final IndexedTable table; private final IndexedTable table;
private final List<ConditionMatcher> conditionMatchers; private final List<ConditionMatcher> conditionMatchers;
private final boolean singleRowMatching; private final boolean singleRowMatching;
private final IntIterator[] currentMatchedRows;
private final ColumnSelectorFactory selectorFactory; private final ColumnSelectorFactory selectorFactory;
// matchedRows and matchingRemainder are used to implement matchRemainder(). // matchedRows and matchingRemainder are used to implement matchRemainder().
@ -105,10 +107,10 @@ public class IndexedTableJoinMatcher implements JoinMatcher
reset(); reset();
if (condition.isAlwaysTrue()) { if (condition.isAlwaysTrue()) {
this.conditionMatchers = Collections.singletonList(() -> IntIterators.fromTo(0, table.numRows())); this.conditionMatchers = Collections.singletonList(() -> new RangeIntSet(0, table.numRows()));
this.singleRowMatching = false; this.singleRowMatching = false;
} else if (condition.isAlwaysFalse()) { } else if (condition.isAlwaysFalse()) {
this.conditionMatchers = Collections.singletonList(() -> IntIterators.EMPTY_ITERATOR); this.conditionMatchers = Collections.singletonList(() -> IntSortedSets.EMPTY_SET);
this.singleRowMatching = false; this.singleRowMatching = false;
} else if (condition.getNonEquiConditions().isEmpty()) { } else if (condition.getNonEquiConditions().isEmpty()) {
final List<Pair<IndexedTable.Index, Equality>> indexes = final List<Pair<IndexedTable.Index, Equality>> indexes =
@ -130,13 +132,6 @@ public class IndexedTableJoinMatcher implements JoinMatcher
); );
} }
if (!singleRowMatching) {
// Only used by "matchCondition", and only in the multi-row-matching case.
this.currentMatchedRows = new IntIterator[conditionMatchers.size()];
} else {
this.currentMatchedRows = null;
}
ColumnSelectorFactory selectorFactory = table.makeColumnSelectorFactory(joinableOffset, descending, closer); ColumnSelectorFactory selectorFactory = table.makeColumnSelectorFactory(joinableOffset, descending, closer);
this.selectorFactory = selectorFactory != null this.selectorFactory = selectorFactory != null
? selectorFactory ? selectorFactory
@ -204,18 +199,50 @@ public class IndexedTableJoinMatcher implements JoinMatcher
} }
} else { } else {
if (conditionMatchers.size() == 1) { if (conditionMatchers.size() == 1) {
currentIterator = conditionMatchers.get(0).match(); currentIterator = conditionMatchers.get(0).match().iterator();
} else { } else {
final IntSortedSet[] matchingSets = new IntSortedSet[conditionMatchers.size()];
int smallestMatchingSet = -1;
for (int i = 0; i < conditionMatchers.size(); i++) { for (int i = 0; i < conditionMatchers.size(); i++) {
final IntIterator rows = conditionMatchers.get(i).match(); matchingSets[i] = conditionMatchers.get(i).match();
if (rows.hasNext()) { if (i == 0 || matchingSets[i].size() < matchingSets[smallestMatchingSet].size()) {
currentMatchedRows[i] = rows; smallestMatchingSet = i;
} else {
return;
} }
} }
currentIterator = new SortedIntIntersectionIterator(currentMatchedRows); // Start intersection using the smallest matching set.
IntSortedSet intersection = matchingSets[smallestMatchingSet];
// Remember if we copied matchingSets[smallestMatchingSet] or not. Avoids unnecessary copies.
boolean copied = false;
for (int i = 0; i < conditionMatchers.size(); i++) {
final int numIntersectionElements = intersection.size();
if (numIntersectionElements > 0 && i != smallestMatchingSet) {
final IntBidirectionalIterator it = intersection.iterator();
while (it.hasNext()) {
final int rowNumber = it.nextInt();
if (!matchingSets[i].contains(rowNumber)) {
// Remove from intersection.
if (numIntersectionElements == 1) {
intersection = IntSortedSets.EMPTY_SET;
break;
} else {
if (!copied) {
intersection = new IntAVLTreeSet(intersection);
copied = true;
}
intersection.remove(rowNumber);
}
}
}
}
}
currentIterator = intersection.iterator();
} }
advanceCurrentRow(); advanceCurrentRow();
@ -323,14 +350,14 @@ public class IndexedTableJoinMatcher implements JoinMatcher
*/ */
default int matchSingleRow() default int matchSingleRow()
{ {
final IntIterator it = match(); final IntSortedSet rows = match();
return it.hasNext() ? it.nextInt() : NO_CONDITION_MATCH; return rows.isEmpty() ? NO_CONDITION_MATCH : rows.firstInt();
} }
/** /**
* Returns an iterator for the row numbers that match the current cursor position. * Returns row numbers that match the current cursor position.
*/ */
IntIterator match(); IntSortedSet match();
} }
/** /**
@ -347,9 +374,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
private final ColumnType keyType; private final ColumnType keyType;
private final IndexedTable.Index index; private final IndexedTable.Index index;
// DimensionSelector -> (int) dimension id -> (IntList) row numbers // DimensionSelector -> (int) dimension id -> (IntSortedSet) row numbers
@SuppressWarnings("MismatchedQueryAndUpdateOfCollection") // updated via computeIfAbsent @SuppressWarnings("MismatchedQueryAndUpdateOfCollection") // updated via computeIfAbsent
private final LruLoadingHashMap<DimensionSelector, Int2IntListMap> dimensionCaches; private final LruLoadingHashMap<DimensionSelector, Int2IntSortedSetMap> dimensionCaches;
ConditionMatcherFactory(IndexedTable.Index index) ConditionMatcherFactory(IndexedTable.Index index)
{ {
@ -360,21 +387,21 @@ public class IndexedTableJoinMatcher implements JoinMatcher
MAX_NUM_CACHE, MAX_NUM_CACHE,
selector -> { selector -> {
int cardinality = selector.getValueCardinality(); int cardinality = selector.getValueCardinality();
IntFunction<IntList> loader = dimensionId -> getRowNumbers(selector, dimensionId); IntFunction<IntSortedSet> loader = dimensionId -> getRowNumbers(selector, dimensionId);
return cardinality <= CACHE_MAX_SIZE return cardinality <= CACHE_MAX_SIZE
? new Int2IntListLookupTable(cardinality, loader) ? new Int2IntSortedSetLookupTable(cardinality, loader)
: new Int2IntListLruCache(CACHE_MAX_SIZE, loader); : new Int2IntSortedSetLruCache(CACHE_MAX_SIZE, loader);
} }
); );
} }
private IntList getRowNumbers(DimensionSelector selector, int dimensionId) private IntSortedSet getRowNumbers(DimensionSelector selector, int dimensionId)
{ {
final String key = selector.lookupName(dimensionId); final String key = selector.lookupName(dimensionId);
return index.find(key); return index.find(key);
} }
private IntList getAndCacheRowNumbers(DimensionSelector selector, int dimensionId) private IntSortedSet getAndCacheRowNumbers(DimensionSelector selector, int dimensionId)
{ {
return dimensionCaches.getAndLoadIfAbsent(selector).getAndLoadIfAbsent(dimensionId); return dimensionCaches.getAndLoadIfAbsent(selector).getAndLoadIfAbsent(dimensionId);
} }
@ -403,10 +430,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
if (row.size() == 1) { if (row.size() == 1) {
int dimensionId = row.get(0); int dimensionId = row.get(0);
IntList rowNumbers = getRowNumbers(selector, dimensionId); return getRowNumbers(selector, dimensionId);
return rowNumbers.iterator();
} else if (row.size() == 0) { } else if (row.size() == 0) {
return IntIterators.EMPTY_ITERATOR; return IntSortedSets.EMPTY_SET;
} else { } else {
// Multi-valued rows are not handled by the join system right now // Multi-valued rows are not handled by the join system right now
// TODO: Remove when https://github.com/apache/druid/issues/9924 is done // TODO: Remove when https://github.com/apache/druid/issues/9924 is done
@ -421,10 +447,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
if (row.size() == 1) { if (row.size() == 1) {
int dimensionId = row.get(0); int dimensionId = row.get(0);
IntList rowNumbers = getAndCacheRowNumbers(selector, dimensionId); return getAndCacheRowNumbers(selector, dimensionId);
return rowNumbers.iterator();
} else if (row.size() == 0) { } else if (row.size() == 0) {
return IntIterators.EMPTY_ITERATOR; return IntSortedSets.EMPTY_SET;
} else { } else {
// Multi-valued rows are not handled by the join system right now // Multi-valued rows are not handled by the join system right now
// TODO: Remove when https://github.com/apache/druid/issues/9924 is done // TODO: Remove when https://github.com/apache/druid/issues/9924 is done
@ -438,9 +463,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
public ConditionMatcher makeFloatProcessor(BaseFloatColumnValueSelector selector) public ConditionMatcher makeFloatProcessor(BaseFloatColumnValueSelector selector)
{ {
if (NullHandling.replaceWithDefault()) { if (NullHandling.replaceWithDefault()) {
return () -> index.find(selector.getFloat()).iterator(); return () -> index.find(selector.getFloat());
} else { } else {
return () -> selector.isNull() ? IntIterators.EMPTY_ITERATOR : index.find(selector.getFloat()).iterator(); return () -> selector.isNull() ? IntSortedSets.EMPTY_SET : index.find(selector.getFloat());
} }
} }
@ -448,9 +473,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
public ConditionMatcher makeDoubleProcessor(BaseDoubleColumnValueSelector selector) public ConditionMatcher makeDoubleProcessor(BaseDoubleColumnValueSelector selector)
{ {
if (NullHandling.replaceWithDefault()) { if (NullHandling.replaceWithDefault()) {
return () -> index.find(selector.getDouble()).iterator(); return () -> index.find(selector.getDouble());
} else { } else {
return () -> selector.isNull() ? IntIterators.EMPTY_ITERATOR : index.find(selector.getDouble()).iterator(); return () -> selector.isNull() ? IntSortedSets.EMPTY_SET : index.find(selector.getDouble());
} }
} }
@ -460,9 +485,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
if (index.keyType().is(ValueType.LONG)) { if (index.keyType().is(ValueType.LONG)) {
return makePrimitiveLongMatcher(selector); return makePrimitiveLongMatcher(selector);
} else if (NullHandling.replaceWithDefault()) { } else if (NullHandling.replaceWithDefault()) {
return () -> index.find(selector.getLong()).iterator(); return () -> index.find(selector.getLong());
} else { } else {
return () -> selector.isNull() ? IntIterators.EMPTY_ITERATOR : index.find(selector.getLong()).iterator(); return () -> selector.isNull() ? IntSortedSets.EMPTY_SET : index.find(selector.getLong());
} }
} }
@ -478,9 +503,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
} }
@Override @Override
public IntIterator match() public IntSortedSet match()
{ {
return IntIterators.EMPTY_ITERATOR; return IntSortedSets.EMPTY_SET;
} }
}; };
} }
@ -501,9 +526,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
} }
@Override @Override
public IntIterator match() public IntSortedSet match()
{ {
return index.find(selector.getLong()).iterator(); return index.find(selector.getLong());
} }
}; };
} else { } else {
@ -516,9 +541,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
} }
@Override @Override
public IntIterator match() public IntSortedSet match()
{ {
return selector.isNull() ? IntIterators.EMPTY_ITERATOR : index.find(selector.getLong()).iterator(); return selector.isNull() ? IntSortedSets.EMPTY_SET : index.find(selector.getLong());
} }
}; };
} }
@ -559,30 +584,30 @@ public class IndexedTableJoinMatcher implements JoinMatcher
} }
} }
private interface Int2IntListMap private interface Int2IntSortedSetMap
{ {
IntList getAndLoadIfAbsent(int key); IntSortedSet getAndLoadIfAbsent(int key);
} }
/** /**
* Lookup table for keys in the range from 0 to maxSize - 1 * Lookup table for keys in the range from 0 to maxSize - 1
*/ */
@VisibleForTesting @VisibleForTesting
static class Int2IntListLookupTable implements Int2IntListMap static class Int2IntSortedSetLookupTable implements Int2IntSortedSetMap
{ {
private final IntList[] lookup; private final IntSortedSet[] lookup;
private final IntFunction<IntList> loader; private final IntFunction<IntSortedSet> loader;
Int2IntListLookupTable(int maxSize, IntFunction<IntList> loader) Int2IntSortedSetLookupTable(int maxSize, IntFunction<IntSortedSet> loader)
{ {
this.loader = loader; this.loader = loader;
this.lookup = new IntList[maxSize]; this.lookup = new IntSortedSet[maxSize];
} }
@Override @Override
public IntList getAndLoadIfAbsent(int key) public IntSortedSet getAndLoadIfAbsent(int key)
{ {
IntList value = lookup[key]; IntSortedSet value = lookup[key];
if (value == null) { if (value == null) {
value = loader.apply(key); value = loader.apply(key);
@ -597,13 +622,13 @@ public class IndexedTableJoinMatcher implements JoinMatcher
* LRU cache optimized for primitive int keys * LRU cache optimized for primitive int keys
*/ */
@VisibleForTesting @VisibleForTesting
static class Int2IntListLruCache implements Int2IntListMap static class Int2IntSortedSetLruCache implements Int2IntSortedSetMap
{ {
private final Int2ObjectLinkedOpenHashMap<IntList> cache; private final Int2ObjectLinkedOpenHashMap<IntSortedSet> cache;
private final int maxSize; private final int maxSize;
private final IntFunction<IntList> loader; private final IntFunction<IntSortedSet> loader;
Int2IntListLruCache(int maxSize, IntFunction<IntList> loader) Int2IntSortedSetLruCache(int maxSize, IntFunction<IntSortedSet> loader)
{ {
this.cache = new Int2ObjectLinkedOpenHashMap<>(maxSize); this.cache = new Int2ObjectLinkedOpenHashMap<>(maxSize);
this.maxSize = maxSize; this.maxSize = maxSize;
@ -611,9 +636,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
} }
@Override @Override
public IntList getAndLoadIfAbsent(int key) public IntSortedSet getAndLoadIfAbsent(int key)
{ {
IntList value = cache.getAndMoveToFirst(key); IntSortedSet value = cache.getAndMoveToFirst(key);
if (value == null) { if (value == null) {
value = loader.apply(key); value = loader.apply(key);
@ -628,7 +653,7 @@ public class IndexedTableJoinMatcher implements JoinMatcher
} }
@VisibleForTesting @VisibleForTesting
IntList get(int key) IntSortedSet get(int key)
{ {
return cache.get(key); return cache.get(key);
} }

View File

@ -20,7 +20,8 @@
package org.apache.druid.segment.join.table; package org.apache.druid.segment.join.table;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.io.Closer;
@ -150,9 +151,10 @@ public class IndexedTableJoinable implements Joinable
IndexedTable.Index index = table.columnIndex(filterColumnPosition); IndexedTable.Index index = table.columnIndex(filterColumnPosition);
IndexedTable.Reader reader = table.columnReader(correlatedColumnPosition); IndexedTable.Reader reader = table.columnReader(correlatedColumnPosition);
closer.register(reader); closer.register(reader);
IntList rowIndex = index.find(searchColumnValue); final IntSortedSet rowIndex = index.find(searchColumnValue);
for (int i = 0; i < rowIndex.size(); i++) { final IntBidirectionalIterator rowIterator = rowIndex.iterator();
int rowNum = rowIndex.getInt(i); while (rowIterator.hasNext()) {
int rowNum = rowIterator.nextInt();
String correlatedDimVal = DimensionHandlerUtils.convertObjectToString(reader.read(rowNum)); String correlatedDimVal = DimensionHandlerUtils.convertObjectToString(reader.read(rowNum));
correlatedValues.add(correlatedDimVal); correlatedValues.add(correlatedDimVal);

View File

@ -20,8 +20,8 @@
package org.apache.druid.segment.join.table; package org.apache.druid.segment.join.table;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSortedSet;
import it.unimi.dsi.fastutil.ints.IntLists; import it.unimi.dsi.fastutil.ints.IntSortedSets;
import it.unimi.dsi.fastutil.longs.Long2ObjectMap; import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
@ -34,7 +34,7 @@ import java.util.Map;
public class MapIndex implements IndexedTable.Index public class MapIndex implements IndexedTable.Index
{ {
private final ColumnType keyType; private final ColumnType keyType;
private final Map<Object, IntList> index; private final Map<Object, IntSortedSet> index;
private final boolean keysUnique; private final boolean keysUnique;
private final boolean isLong2ObjectMap; private final boolean isLong2ObjectMap;
@ -47,7 +47,7 @@ public class MapIndex implements IndexedTable.Index
* *
* @see RowBasedIndexBuilder#build() the main caller * @see RowBasedIndexBuilder#build() the main caller
*/ */
MapIndex(final ColumnType keyType, final Map<Object, IntList> index, final boolean keysUnique) MapIndex(final ColumnType keyType, final Map<Object, IntSortedSet> index, final boolean keysUnique)
{ {
this.keyType = Preconditions.checkNotNull(keyType, "keyType"); this.keyType = Preconditions.checkNotNull(keyType, "keyType");
this.index = Preconditions.checkNotNull(index, "index"); this.index = Preconditions.checkNotNull(index, "index");
@ -68,19 +68,19 @@ public class MapIndex implements IndexedTable.Index
} }
@Override @Override
public IntList find(Object key) public IntSortedSet find(Object key)
{ {
final Object convertedKey = DimensionHandlerUtils.convertObjectToType(key, keyType, false); final Object convertedKey = DimensionHandlerUtils.convertObjectToType(key, keyType, false);
if (convertedKey != null) { if (convertedKey != null) {
final IntList found = index.get(convertedKey); final IntSortedSet found = index.get(convertedKey);
if (found != null) { if (found != null) {
return found; return found;
} else { } else {
return IntLists.EMPTY_LIST; return IntSortedSets.EMPTY_SET;
} }
} else { } else {
return IntLists.EMPTY_LIST; return IntSortedSets.EMPTY_SET;
} }
} }
@ -88,9 +88,9 @@ public class MapIndex implements IndexedTable.Index
public int findUniqueLong(long key) public int findUniqueLong(long key)
{ {
if (isLong2ObjectMap && keysUnique) { if (isLong2ObjectMap && keysUnique) {
final IntList rows = ((Long2ObjectMap<IntList>) (Map) index).get(key); final IntSortedSet rows = ((Long2ObjectMap<IntSortedSet>) (Map) index).get(key);
assert rows == null || rows.size() == 1; assert rows == null || rows.size() == 1;
return rows != null ? rows.getInt(0) : NOT_FOUND; return rows != null ? rows.firstInt() : NOT_FOUND;
} else { } else {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }

View File

@ -20,8 +20,9 @@
package org.apache.druid.segment.join.table; package org.apache.druid.segment.join.table;
import com.google.common.primitives.Ints; import com.google.common.primitives.Ints;
import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import it.unimi.dsi.fastutil.longs.Long2ObjectMap; import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap; import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectIterator; import it.unimi.dsi.fastutil.objects.ObjectIterator;
@ -51,7 +52,7 @@ public class RowBasedIndexBuilder
private int currentRow = 0; private int currentRow = 0;
private int nullKeys = 0; private int nullKeys = 0;
private final ColumnType keyType; private final ColumnType keyType;
private final Map<Object, IntList> index; private final Map<Object, IntSortedSet> index;
private long minLongKey = Long.MAX_VALUE; private long minLongKey = Long.MAX_VALUE;
private long maxLongKey = Long.MIN_VALUE; private long maxLongKey = Long.MIN_VALUE;
@ -81,7 +82,7 @@ public class RowBasedIndexBuilder
final Object castKey = DimensionHandlerUtils.convertObjectToType(key, keyType); final Object castKey = DimensionHandlerUtils.convertObjectToType(key, keyType);
if (castKey != null) { if (castKey != null) {
final IntList rowNums = index.computeIfAbsent(castKey, k -> new IntArrayList()); final IntSortedSet rowNums = index.computeIfAbsent(castKey, k -> new IntAVLTreeSet());
rowNums.add(currentRow); rowNums.add(currentRow);
// Track min, max long value so we can decide later on if it's appropriate to use an array-backed implementation. // Track min, max long value so we can decide later on if it's appropriate to use an array-backed implementation.
@ -132,18 +133,18 @@ public class RowBasedIndexBuilder
Arrays.fill(indexAsArray, IndexedTable.Index.NOT_FOUND); Arrays.fill(indexAsArray, IndexedTable.Index.NOT_FOUND);
// Safe to cast to Long2ObjectMap because the constructor always uses one for long-typed keys. // Safe to cast to Long2ObjectMap because the constructor always uses one for long-typed keys.
final ObjectIterator<Long2ObjectMap.Entry<IntList>> entries = final ObjectIterator<Long2ObjectMap.Entry<IntSortedSet>> entries =
((Long2ObjectMap<IntList>) ((Map) index)).long2ObjectEntrySet().iterator(); ((Long2ObjectMap<IntSortedSet>) ((Map) index)).long2ObjectEntrySet().iterator();
while (entries.hasNext()) { while (entries.hasNext()) {
final Long2ObjectMap.Entry<IntList> entry = entries.next(); final Long2ObjectMap.Entry<IntSortedSet> entry = entries.next();
final IntList rowNums = entry.getValue(); final IntSortedSet rowNums = entry.getValue();
if (rowNums.size() != 1) { if (rowNums.size() != 1) {
throw new ISE("Expected single element"); throw new ISE("Expected single element");
} }
indexAsArray[Ints.checkedCast(entry.getLongKey() - minLongKey)] = rowNums.getInt(0); indexAsArray[Ints.checkedCast(entry.getLongKey() - minLongKey)] = rowNums.firstInt();
entries.remove(); entries.remove();
} }

View File

@ -1,98 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.join.table;
import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.ints.IntIterator;
import java.util.Arrays;
import java.util.NoSuchElementException;
/**
* Iterates over the intersection of an array of sorted int lists. Intended for situations where the number
* of iterators is fairly small. The iterators must be composed of ascending, nonnegative ints.
*
* @see RowBasedIndexedTable#columnReader uses this
*/
public class SortedIntIntersectionIterator implements IntIterator
{
private static final int NIL = -1;
private final IntIterator[] iterators;
private final int[] currents;
private int next = NIL;
SortedIntIntersectionIterator(final IntIterator[] iterators)
{
Preconditions.checkArgument(iterators.length > 0, "iterators.length > 0");
this.iterators = iterators;
this.currents = new int[iterators.length];
Arrays.fill(currents, NIL);
advance();
}
@Override
public int nextInt()
{
if (next == NIL) {
throw new NoSuchElementException();
}
final int retVal = next;
advance();
return retVal;
}
@Override
public boolean hasNext()
{
return next != NIL;
}
private void advance()
{
next++;
// This is the part that assumes the number of iterators is fairly small.
boolean foundNext = false;
while (!foundNext) {
foundNext = true;
for (int i = 0; i < iterators.length; i++) {
while (currents[i] < next && iterators[i].hasNext()) {
currents[i] = iterators[i].nextInt();
}
if (currents[i] < next && !iterators[i].hasNext()) {
next = NIL;
return;
} else if (currents[i] > next) {
next = currents[i];
foundNext = false;
}
}
}
assert Arrays.stream(currents).allMatch(x -> x == next);
}
}

View File

@ -19,8 +19,8 @@
package org.apache.druid.segment.join.table; package org.apache.druid.segment.join.table;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSortedSet;
import it.unimi.dsi.fastutil.ints.IntLists; import it.unimi.dsi.fastutil.ints.IntSortedSets;
import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
@ -61,18 +61,18 @@ public class UniqueLongArrayIndex implements IndexedTable.Index
} }
@Override @Override
public IntList find(Object key) public IntSortedSet find(Object key)
{ {
final Long longKey = DimensionHandlerUtils.convertObjectToLong(key); final Long longKey = DimensionHandlerUtils.convertObjectToLong(key);
if (longKey != null) { if (longKey != null) {
final int row = findUniqueLong(longKey); final int row = findUniqueLong(longKey);
if (row >= 0) { if (row >= 0) {
return IntLists.singleton(row); return IntSortedSets.singleton(row);
} }
} }
return IntLists.EMPTY_LIST; return IntSortedSets.EMPTY_SET;
} }
@Override @Override

View File

@ -0,0 +1,167 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.collections;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.junit.Assert;
import org.junit.Test;
import java.util.Collections;
import java.util.NoSuchElementException;
public class RangeIntSetTest
{
@Test
public void test_constructor_zeroZero()
{
Assert.assertEquals(Collections.emptySet(), new RangeIntSet(0, 0));
}
@Test
public void test_constructor_zeroTwo()
{
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2));
}
@Test
public void test_contains()
{
Assert.assertFalse(new RangeIntSet(0, 2).contains(-1));
Assert.assertTrue(new RangeIntSet(0, 2).contains(0));
Assert.assertTrue(new RangeIntSet(0, 2).contains(1));
Assert.assertFalse(new RangeIntSet(0, 2).contains(2));
Assert.assertFalse(new RangeIntSet(0, 2).contains(3));
}
@Test
public void test_headSet()
{
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).headSet(-1));
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).headSet(0));
Assert.assertEquals(ImmutableSet.of(0), new RangeIntSet(0, 2).headSet(1));
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).headSet(2));
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).headSet(3));
}
@Test
public void test_tailSet()
{
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).tailSet(-1));
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).tailSet(0));
Assert.assertEquals(ImmutableSet.of(1), new RangeIntSet(0, 2).tailSet(1));
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).tailSet(2));
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).tailSet(3));
}
@Test
public void test_subSet()
{
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).subSet(-2, -1));
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).subSet(-1, 0));
Assert.assertEquals(ImmutableSet.of(0), new RangeIntSet(0, 2).subSet(-1, 1));
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).subSet(-1, 2));
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).subSet(-1, 3));
Assert.assertEquals(ImmutableSet.of(0), new RangeIntSet(0, 2).subSet(0, 1));
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).subSet(0, 2));
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).subSet(0, 3));
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).subSet(1, 1));
Assert.assertEquals(ImmutableSet.of(1), new RangeIntSet(0, 2).subSet(1, 2));
Assert.assertEquals(ImmutableSet.of(1), new RangeIntSet(0, 2).subSet(1, 3));
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).subSet(2, 3));
}
@Test
public void test_firstInt()
{
Assert.assertEquals(0, new RangeIntSet(0, 2).firstInt());
Assert.assertThrows(NoSuchElementException.class, () -> new RangeIntSet(0, 0).firstInt());
}
@Test
public void test_lastInt()
{
Assert.assertEquals(1, new RangeIntSet(0, 2).lastInt());
Assert.assertThrows(NoSuchElementException.class, () -> new RangeIntSet(0, 0).firstInt());
}
@Test
public void test_size()
{
Assert.assertEquals(0, new RangeIntSet(0, 0).size());
Assert.assertEquals(2, new RangeIntSet(0, 2).size());
}
@Test
public void test_iterator()
{
Assert.assertEquals(
ImmutableList.of(0, 1),
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator())
);
}
@Test
public void test_iterator_from()
{
Assert.assertEquals(
ImmutableList.of(0, 1),
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator(0))
);
Assert.assertEquals(
ImmutableList.of(1),
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator(1))
);
Assert.assertEquals(
ImmutableList.of(),
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator(2))
);
Assert.assertEquals(
ImmutableList.of(),
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator(3))
);
}
@Test
public void test_equals()
{
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(0, 0));
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(1, 0));
Assert.assertNotEquals(new RangeIntSet(0, 0), new RangeIntSet(0, 1));
}
@Test
public void test_equals_empty()
{
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(1, 1));
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(1, 0));
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(0, -1));
}
@Test
public void test_equals_otherSet()
{
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2));
Assert.assertNotEquals(ImmutableSet.of(0, 1, 2), new RangeIntSet(0, 2));
}
}

View File

@ -24,7 +24,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.jackson.SegmentizerModule; import org.apache.druid.jackson.SegmentizerModule;
@ -263,18 +264,19 @@ public class BroadcastSegmentIndexedTableTest extends InitializedNullHandlingTes
// lets try a few values out // lets try a few values out
for (Object val : vals) { for (Object val : vals) {
final IntList valIndex = valueIndex.find(val); final IntSortedSet valIndex = valueIndex.find(val);
if (val == null) { if (val == null) {
Assert.assertEquals(0, valIndex.size()); Assert.assertEquals(0, valIndex.size());
} else { } else {
Assert.assertTrue(valIndex.size() > 0); Assert.assertTrue(valIndex.size() > 0);
for (int i = 0; i < valIndex.size(); i++) { final IntBidirectionalIterator rowIterator = valIndex.iterator();
Assert.assertEquals(val, reader.read(valIndex.getInt(i))); while (rowIterator.hasNext()) {
Assert.assertEquals(val, reader.read(rowIterator.nextInt()));
} }
} }
} }
for (Object val : nonmatchingVals) { for (Object val : nonmatchingVals) {
final IntList valIndex = valueIndex.find(val); final IntSortedSet valIndex = valueIndex.find(val);
Assert.assertEquals(0, valIndex.size()); Assert.assertEquals(0, valIndex.size());
} }
} }

View File

@ -21,9 +21,10 @@ package org.apache.druid.segment.join.table;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints; import com.google.common.primitives.Ints;
import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSortedSet;
import it.unimi.dsi.fastutil.ints.IntLists; import it.unimi.dsi.fastutil.ints.IntSortedSets;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.query.QueryUnsupportedException; import org.apache.druid.query.QueryUnsupportedException;
import org.apache.druid.segment.BaseLongColumnValueSelector; import org.apache.druid.segment.BaseLongColumnValueSelector;
@ -262,7 +263,7 @@ public class IndexedTableJoinMatcherTest
); );
Assert.assertNotNull(dimensionProcessor.match()); Assert.assertNotNull(dimensionProcessor.match());
Assert.assertFalse(dimensionProcessor.match().hasNext()); Assert.assertTrue(dimensionProcessor.match().isEmpty());
Assert.assertEquals(IndexedTableJoinMatcher.NO_CONDITION_MATCH, dimensionProcessor.matchSingleRow()); Assert.assertEquals(IndexedTableJoinMatcher.NO_CONDITION_MATCH, dimensionProcessor.matchSingleRow());
} }
@ -284,7 +285,7 @@ public class IndexedTableJoinMatcherTest
); );
Assert.assertNotNull(dimensionProcessor.match()); Assert.assertNotNull(dimensionProcessor.match());
Assert.assertFalse(dimensionProcessor.match().hasNext()); Assert.assertTrue(dimensionProcessor.match().isEmpty());
Assert.assertEquals(IndexedTableJoinMatcher.NO_CONDITION_MATCH, dimensionProcessor.matchSingleRow()); Assert.assertEquals(IndexedTableJoinMatcher.NO_CONDITION_MATCH, dimensionProcessor.matchSingleRow());
} }
@ -405,7 +406,7 @@ public class IndexedTableJoinMatcherTest
public static class Int2IntListLookupTableTest public static class Int2IntListLookupTableTest
{ {
private IndexedTableJoinMatcher.Int2IntListLookupTable target; private IndexedTableJoinMatcher.Int2IntSortedSetLookupTable target;
private AtomicLong counter; private AtomicLong counter;
@ -413,19 +414,19 @@ public class IndexedTableJoinMatcherTest
public void setup() public void setup()
{ {
counter = new AtomicLong(0); counter = new AtomicLong(0);
IntFunction<IntList> loader = key -> { IntFunction<IntSortedSet> loader = key -> {
counter.incrementAndGet(); counter.incrementAndGet();
return IntLists.singleton(key); return IntSortedSets.singleton(key);
}; };
target = new IndexedTableJoinMatcher.Int2IntListLookupTable(SIZE, loader); target = new IndexedTableJoinMatcher.Int2IntSortedSetLookupTable(SIZE, loader);
} }
@Test @Test
public void loadsValueIfAbsent() public void loadsValueIfAbsent()
{ {
int key = 1; int key = 1;
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key)); Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
Assert.assertEquals(1L, counter.longValue()); Assert.assertEquals(1L, counter.longValue());
} }
@ -433,15 +434,15 @@ public class IndexedTableJoinMatcherTest
public void doesNotLoadIfPresent() public void doesNotLoadIfPresent()
{ {
int key = 1; int key = 1;
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key)); Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key)); Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
Assert.assertEquals(1L, counter.longValue()); Assert.assertEquals(1L, counter.longValue());
} }
} }
public static class Int2IntListLruCache public static class Int2IntSortedSetLruCache
{ {
private IndexedTableJoinMatcher.Int2IntListLruCache target; private IndexedTableJoinMatcher.Int2IntSortedSetLruCache target;
private AtomicLong counter; private AtomicLong counter;
@ -449,19 +450,19 @@ public class IndexedTableJoinMatcherTest
public void setup() public void setup()
{ {
counter = new AtomicLong(0); counter = new AtomicLong(0);
IntFunction<IntList> loader = key -> { IntFunction<IntSortedSet> loader = key -> {
counter.incrementAndGet(); counter.incrementAndGet();
return IntLists.singleton(key); return IntSortedSets.singleton(key);
}; };
target = new IndexedTableJoinMatcher.Int2IntListLruCache(SIZE, loader); target = new IndexedTableJoinMatcher.Int2IntSortedSetLruCache(SIZE, loader);
} }
@Test @Test
public void loadsValueIfAbsent() public void loadsValueIfAbsent()
{ {
int key = 1; int key = 1;
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key)); Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
Assert.assertEquals(1L, counter.longValue()); Assert.assertEquals(1L, counter.longValue());
} }
@ -469,8 +470,8 @@ public class IndexedTableJoinMatcherTest
public void doesNotLoadIfPresent() public void doesNotLoadIfPresent()
{ {
int key = 1; int key = 1;
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key)); Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key)); Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
Assert.assertEquals(1L, counter.longValue()); Assert.assertEquals(1L, counter.longValue());
} }
@ -481,10 +482,10 @@ public class IndexedTableJoinMatcherTest
int next = start + SIZE; int next = start + SIZE;
for (int key = start; key < next; key++) { for (int key = start; key < next; key++) {
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key)); Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
} }
Assert.assertEquals(IntLists.singleton(next), target.getAndLoadIfAbsent(next)); Assert.assertEquals(IntSortedSets.singleton(next), target.getAndLoadIfAbsent(next));
Assert.assertNull(target.get(start)); Assert.assertNull(target.get(start));
Assert.assertEquals(SIZE + 1, counter.longValue()); Assert.assertEquals(SIZE + 1, counter.longValue());
@ -508,9 +509,9 @@ public class IndexedTableJoinMatcherTest
} }
@Override @Override
public IntList find(Object key) public IntSortedSet find(Object key)
{ {
return IntLists.singleton(((String) key).length()); return IntSortedSets.singleton(((String) key).length());
} }
@Override @Override
@ -538,12 +539,12 @@ public class IndexedTableJoinMatcherTest
} }
@Override @Override
public IntList find(Object key) public IntSortedSet find(Object key)
{ {
if ("1".equals(DimensionHandlerUtils.convertObjectToString(key))) { if ("1".equals(DimensionHandlerUtils.convertObjectToString(key))) {
return IntLists.singleton(3); return IntSortedSets.singleton(3);
} else { } else {
return IntLists.EMPTY_LIST; return IntSortedSets.EMPTY_SET;
} }
} }
@ -572,14 +573,14 @@ public class IndexedTableJoinMatcherTest
} }
@Override @Override
public IntList find(Object key) public IntSortedSet find(Object key)
{ {
final Long l = DimensionHandlerUtils.convertObjectToLong(key); final Long l = DimensionHandlerUtils.convertObjectToLong(key);
if (l == null && NullHandling.sqlCompatible()) { if (l == null && NullHandling.sqlCompatible()) {
return IntLists.EMPTY_LIST; return IntSortedSets.EMPTY_SET;
} else { } else {
return IntLists.singleton(Ints.checkedCast((l == null ? 0L : l) + 1)); return IntSortedSets.singleton(Ints.checkedCast((l == null ? 0L : l) + 1));
} }
} }
@ -608,9 +609,9 @@ public class IndexedTableJoinMatcherTest
} }
@Override @Override
public IntList find(Object key) public IntSortedSet find(Object key)
{ {
return new IntArrayList(new int[]{1, 2, 3}); return new IntAVLTreeSet(new int[]{1, 2, 3});
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.apache.druid.segment.join.table; package org.apache.druid.segment.join.table;
import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSortedSet;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import org.hamcrest.CoreMatchers; import org.hamcrest.CoreMatchers;
import org.junit.Assert; import org.junit.Assert;
@ -50,13 +50,13 @@ public class RowBasedIndexBuilderTest
Assert.assertEquals(ColumnType.STRING, index.keyType()); Assert.assertEquals(ColumnType.STRING, index.keyType());
Assert.assertTrue(index.areKeysUnique()); Assert.assertTrue(index.areKeysUnique());
Assert.assertEquals(intList(0), index.find("abc")); Assert.assertEquals(intSet(0), index.find("abc"));
Assert.assertEquals(intList(1), index.find("")); Assert.assertEquals(intSet(1), index.find(""));
Assert.assertEquals(intList(3), index.find(1L)); Assert.assertEquals(intSet(3), index.find(1L));
Assert.assertEquals(intList(3), index.find("1")); Assert.assertEquals(intSet(3), index.find("1"));
Assert.assertEquals(intList(4), index.find("def")); Assert.assertEquals(intSet(4), index.find("def"));
Assert.assertEquals(intList(), index.find(null)); Assert.assertEquals(intSet(), index.find(null));
Assert.assertEquals(intList(), index.find("nonexistent")); Assert.assertEquals(intSet(), index.find("nonexistent"));
expectedException.expect(UnsupportedOperationException.class); expectedException.expect(UnsupportedOperationException.class);
index.findUniqueLong(0L); index.findUniqueLong(0L);
@ -80,13 +80,13 @@ public class RowBasedIndexBuilderTest
Assert.assertEquals(ColumnType.STRING, index.keyType()); Assert.assertEquals(ColumnType.STRING, index.keyType());
Assert.assertFalse(index.areKeysUnique()); Assert.assertFalse(index.areKeysUnique());
Assert.assertEquals(intList(0, 3), index.find("abc")); Assert.assertEquals(intSet(0, 3), index.find("abc"));
Assert.assertEquals(intList(1), index.find("")); Assert.assertEquals(intSet(1), index.find(""));
Assert.assertEquals(intList(4), index.find(1L)); Assert.assertEquals(intSet(4), index.find(1L));
Assert.assertEquals(intList(4), index.find("1")); Assert.assertEquals(intSet(4), index.find("1"));
Assert.assertEquals(intList(5), index.find("def")); Assert.assertEquals(intSet(5), index.find("def"));
Assert.assertEquals(intList(), index.find(null)); Assert.assertEquals(intSet(), index.find(null));
Assert.assertEquals(intList(), index.find("nonexistent")); Assert.assertEquals(intSet(), index.find("nonexistent"));
expectedException.expect(UnsupportedOperationException.class); expectedException.expect(UnsupportedOperationException.class);
index.findUniqueLong(0L); index.findUniqueLong(0L);
@ -107,10 +107,10 @@ public class RowBasedIndexBuilderTest
Assert.assertEquals(ColumnType.LONG, index.keyType()); Assert.assertEquals(ColumnType.LONG, index.keyType());
Assert.assertTrue(index.areKeysUnique()); Assert.assertTrue(index.areKeysUnique());
Assert.assertEquals(intList(0), index.find(1L)); Assert.assertEquals(intSet(0), index.find(1L));
Assert.assertEquals(intList(1), index.find(5L)); Assert.assertEquals(intSet(1), index.find(5L));
Assert.assertEquals(intList(2), index.find(2L)); Assert.assertEquals(intSet(2), index.find(2L));
Assert.assertEquals(intList(), index.find(3L)); Assert.assertEquals(intSet(), index.find(3L));
Assert.assertEquals(0, index.findUniqueLong(1L)); Assert.assertEquals(0, index.findUniqueLong(1L));
Assert.assertEquals(1, index.findUniqueLong(5L)); Assert.assertEquals(1, index.findUniqueLong(5L));
@ -133,10 +133,10 @@ public class RowBasedIndexBuilderTest
Assert.assertEquals(ColumnType.LONG, index.keyType()); Assert.assertEquals(ColumnType.LONG, index.keyType());
Assert.assertTrue(index.areKeysUnique()); Assert.assertTrue(index.areKeysUnique());
Assert.assertEquals(intList(0), index.find(1L)); Assert.assertEquals(intSet(0), index.find(1L));
Assert.assertEquals(intList(1), index.find(10_000_000L)); Assert.assertEquals(intSet(1), index.find(10_000_000L));
Assert.assertEquals(intList(2), index.find(2L)); Assert.assertEquals(intSet(2), index.find(2L));
Assert.assertEquals(intList(), index.find(3L)); Assert.assertEquals(intSet(), index.find(3L));
Assert.assertEquals(0, index.findUniqueLong(1L)); Assert.assertEquals(0, index.findUniqueLong(1L));
Assert.assertEquals(1, index.findUniqueLong(10_000_000L)); Assert.assertEquals(1, index.findUniqueLong(10_000_000L));
@ -160,20 +160,20 @@ public class RowBasedIndexBuilderTest
Assert.assertEquals(ColumnType.LONG, index.keyType()); Assert.assertEquals(ColumnType.LONG, index.keyType());
Assert.assertFalse(index.areKeysUnique()); Assert.assertFalse(index.areKeysUnique());
Assert.assertEquals(intList(0, 2), index.find("1")); Assert.assertEquals(intSet(0, 2), index.find("1"));
Assert.assertEquals(intList(0, 2), index.find(1)); Assert.assertEquals(intSet(0, 2), index.find(1));
Assert.assertEquals(intList(0, 2), index.find(1L)); Assert.assertEquals(intSet(0, 2), index.find(1L));
Assert.assertEquals(intList(1), index.find(5L)); Assert.assertEquals(intSet(1), index.find(5L));
Assert.assertEquals(intList(3), index.find(2L)); Assert.assertEquals(intSet(3), index.find(2L));
Assert.assertEquals(intList(), index.find(3L)); Assert.assertEquals(intSet(), index.find(3L));
expectedException.expect(UnsupportedOperationException.class); expectedException.expect(UnsupportedOperationException.class);
index.findUniqueLong(5L); index.findUniqueLong(5L);
} }
public IntList intList(final int... ints) public IntSortedSet intSet(final int... ints)
{ {
final IntArrayList retVal = new IntArrayList(ints.length); final IntAVLTreeSet retVal = new IntAVLTreeSet();
for (int i : ints) { for (int i : ints) {
retVal.add(i); retVal.add(i);
} }

View File

@ -19,7 +19,6 @@
package org.apache.druid.segment.join.table; package org.apache.druid.segment.join.table;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
@ -92,9 +91,9 @@ public class RowBasedIndexedTableTest
{ {
final IndexedTable.Index index = countriesTable.columnIndex(INDEX_COUNTRIES_COUNTRY_ISO_CODE); final IndexedTable.Index index = countriesTable.columnIndex(INDEX_COUNTRIES_COUNTRY_ISO_CODE);
Assert.assertEquals(ImmutableList.of(), index.find(null)); Assert.assertEquals(ImmutableSet.of(), index.find(null));
Assert.assertEquals(ImmutableList.of(), index.find(2)); Assert.assertEquals(ImmutableSet.of(), index.find(2));
Assert.assertEquals(ImmutableList.of(13), index.find("US")); Assert.assertEquals(ImmutableSet.of(13), index.find("US"));
} }
@Test @Test
@ -102,15 +101,15 @@ public class RowBasedIndexedTableTest
{ {
final IndexedTable.Index index = countriesTable.columnIndex(INDEX_COUNTRIES_COUNTRY_NUMBER); final IndexedTable.Index index = countriesTable.columnIndex(INDEX_COUNTRIES_COUNTRY_NUMBER);
Assert.assertEquals(ImmutableList.of(), index.find(null)); Assert.assertEquals(ImmutableSet.of(), index.find(null));
Assert.assertEquals(ImmutableList.of(0), index.find(0)); Assert.assertEquals(ImmutableSet.of(0), index.find(0));
Assert.assertEquals(ImmutableList.of(0), index.find(0.0)); Assert.assertEquals(ImmutableSet.of(0), index.find(0.0));
Assert.assertEquals(ImmutableList.of(0), index.find("0")); Assert.assertEquals(ImmutableSet.of(0), index.find("0"));
Assert.assertEquals(ImmutableList.of(2), index.find(2)); Assert.assertEquals(ImmutableSet.of(2), index.find(2));
Assert.assertEquals(ImmutableList.of(2), index.find(2.0)); Assert.assertEquals(ImmutableSet.of(2), index.find(2.0));
Assert.assertEquals(ImmutableList.of(2), index.find("2")); Assert.assertEquals(ImmutableSet.of(2), index.find("2"));
Assert.assertEquals(ImmutableList.of(), index.find(20)); Assert.assertEquals(ImmutableSet.of(), index.find(20));
Assert.assertEquals(ImmutableList.of(), index.find("US")); Assert.assertEquals(ImmutableSet.of(), index.find("US"));
} }
@Test @Test
@ -132,11 +131,11 @@ public class RowBasedIndexedTableTest
{ {
final IndexedTable.Index index = regionsTable.columnIndex(INDEX_REGIONS_REGION_ISO_CODE); final IndexedTable.Index index = regionsTable.columnIndex(INDEX_REGIONS_REGION_ISO_CODE);
Assert.assertEquals(ImmutableList.of(), index.find(null)); Assert.assertEquals(ImmutableSet.of(), index.find(null));
Assert.assertEquals(ImmutableList.of(0), index.find("11")); Assert.assertEquals(ImmutableSet.of(0), index.find("11"));
Assert.assertEquals(ImmutableList.of(1), index.find(13)); Assert.assertEquals(ImmutableSet.of(1), index.find(13));
Assert.assertEquals(ImmutableList.of(12), index.find("QC")); Assert.assertEquals(ImmutableSet.of(12), index.find("QC"));
Assert.assertEquals(ImmutableList.of(15, 16), index.find("VA")); Assert.assertEquals(ImmutableSet.of(15, 16), index.find("VA"));
} }
@Test @Test

View File

@ -1,106 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.join.table;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntList;
import org.apache.druid.java.util.common.StringUtils;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
public class SortedIntIntersectionIteratorTest
{
@Test
public void test_iterator_allPossibleSingleListsWithCardinalityUpToThree()
{
// 8 possibilities
for (int i = 0; i < 8; i++) {
final IntList ints = intsFromBits(i);
Assert.assertEquals(ints.toString(), ints, intersection(ints));
}
}
@Test
public void test_iterator_allPossibleSetsOfTwoListsWithCardinalityUpToSix()
{
// 4096 possibilities: 64 for each list, 2 lists
for (int i = 0; i < 4096; i++) {
final int bits1 = i & 63;
final int bits2 = (i >> 6) & 63;
final IntList ints1 = intsFromBits(bits1);
final IntList ints2 = intsFromBits(bits2);
Assert.assertEquals(
StringUtils.format("ints1 = %s; ints2 = %s", ints1, ints2),
intsFromBits(bits1 & bits2),
intersection(ints1, ints2)
);
}
}
@Test
public void test_iterator_allPossibleSetsOfThreeListsWithCardinalityUpToFour()
{
// 4096 possibilities: 16 for each list, 3 lists
for (int i = 0; i < 4096; i++) {
final int bits1 = i & 15;
final int bits2 = (i >> 4) & 15;
final int bits3 = (i >> 8) & 15;
final IntList ints1 = intsFromBits(bits1);
final IntList ints2 = intsFromBits(bits2);
final IntList ints3 = intsFromBits(bits3);
Assert.assertEquals(
StringUtils.format("ints1 = %s; ints2 = %s; ints3 = %s", ints1, ints2, ints3),
intsFromBits(bits1 & bits2 & bits3),
intersection(ints1, ints2, ints3)
);
}
}
private static IntList intersection(final IntList... lists)
{
final SortedIntIntersectionIterator comboIterator = new SortedIntIntersectionIterator(
Arrays.stream(lists)
.map(IntList::iterator)
.toArray(IntIterator[]::new)
);
return new IntArrayList(comboIterator);
}
private static IntList intsFromBits(final int bits)
{
final IntArrayList retVal = new IntArrayList(4);
for (int i = 0; i < 32; i++) {
if (((bits >> i) & 1) == 1) {
retVal.add(i);
}
}
return retVal;
}
}