mirror of https://github.com/apache/druid.git
Speed up composite key joins on IndexedTable. (#13516)
* Speed up composite key joins on IndexedTable. Prior to this patch, IndexedTable indexes are sorted IntList. This works great when we have a single-column join key: we simply retrieve the list and we know what rows match. However, when we have a composite key, we need to merge the sorted lists. This is inefficient when one is very dense and others are very sparse. This patch switches from sorted IntList to IntSortedSet, and changes to the following intersection algorithm: 1) Initialize the intersection set to the smallest matching set from the various parts of the composite key. 2) For each element in that smallest set, check other sets for that element. If any do *not* include it, then remove the element from the intersection set. This way, complexity scales with the size of the smallest set, not the largest one. * RangeIntSet stuff.
This commit is contained in:
parent
85d36be085
commit
882ae9f002
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.collections;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.AbstractIntSortedSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntComparator;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterators;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSets;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/**
|
||||
* Set from start (inclusive) to end (exclusive).
|
||||
*/
|
||||
public class RangeIntSet extends AbstractIntSortedSet
|
||||
{
|
||||
private final int start;
|
||||
private final int end;
|
||||
|
||||
public RangeIntSet(int start, int end)
|
||||
{
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntBidirectionalIterator iterator()
|
||||
{
|
||||
return IntIterators.fromTo(start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntBidirectionalIterator iterator(int fromElement)
|
||||
{
|
||||
if (fromElement < end) {
|
||||
return IntIterators.fromTo(Math.max(start, fromElement), end);
|
||||
} else {
|
||||
return IntIterators.EMPTY_ITERATOR;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(int k)
|
||||
{
|
||||
return k >= start && k < end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntSortedSet subSet(int fromElement, int toElement)
|
||||
{
|
||||
if (fromElement < end && toElement > start) {
|
||||
return new RangeIntSet(Math.max(fromElement, start), Math.min(toElement, end));
|
||||
} else {
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntSortedSet headSet(int toElement)
|
||||
{
|
||||
if (toElement > start) {
|
||||
return new RangeIntSet(start, Math.min(toElement, end));
|
||||
} else {
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntSortedSet tailSet(int fromElement)
|
||||
{
|
||||
if (fromElement < end) {
|
||||
return new RangeIntSet(Math.max(start, fromElement), end);
|
||||
} else {
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntComparator comparator()
|
||||
{
|
||||
// Natural ordering.
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int firstInt()
|
||||
{
|
||||
if (start < end) {
|
||||
return start;
|
||||
} else {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int lastInt()
|
||||
{
|
||||
if (start < end) {
|
||||
return end - 1;
|
||||
} else {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size()
|
||||
{
|
||||
return end > start ? end - start : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty()
|
||||
{
|
||||
return end <= start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (o instanceof RangeIntSet) {
|
||||
final RangeIntSet other = (RangeIntSet) o;
|
||||
return (other.start == start && other.end == end) || (other.isEmpty() && isEmpty());
|
||||
} else {
|
||||
return super.equals(o);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return isEmpty() ? 0 : start + 31 * end;
|
||||
}
|
||||
}
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.io.Closer;
|
||||
import org.apache.druid.segment.ColumnSelectorFactory;
|
||||
|
@ -134,7 +134,7 @@ public interface IndexedTable extends ReferenceCountedObject, Closeable
|
|||
* If "key" is some type other than the natural type {@link #keyType()}, it will be converted before checking
|
||||
* the index.
|
||||
*/
|
||||
IntList find(Object key);
|
||||
IntSortedSet find(Object key);
|
||||
|
||||
/**
|
||||
* Returns the row number corresponding to "key" in this index, or {@link #NOT_FOUND} if the key does not exist
|
||||
|
|
|
@ -23,11 +23,14 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
import com.google.common.base.Preconditions;
|
||||
import io.netty.util.SuppressForbidden;
|
||||
import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap;
|
||||
import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterators;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntRBTreeSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSets;
|
||||
import org.apache.druid.collections.RangeIntSet;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
|
@ -74,7 +77,6 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
private final IndexedTable table;
|
||||
private final List<ConditionMatcher> conditionMatchers;
|
||||
private final boolean singleRowMatching;
|
||||
private final IntIterator[] currentMatchedRows;
|
||||
private final ColumnSelectorFactory selectorFactory;
|
||||
|
||||
// matchedRows and matchingRemainder are used to implement matchRemainder().
|
||||
|
@ -105,10 +107,10 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
reset();
|
||||
|
||||
if (condition.isAlwaysTrue()) {
|
||||
this.conditionMatchers = Collections.singletonList(() -> IntIterators.fromTo(0, table.numRows()));
|
||||
this.conditionMatchers = Collections.singletonList(() -> new RangeIntSet(0, table.numRows()));
|
||||
this.singleRowMatching = false;
|
||||
} else if (condition.isAlwaysFalse()) {
|
||||
this.conditionMatchers = Collections.singletonList(() -> IntIterators.EMPTY_ITERATOR);
|
||||
this.conditionMatchers = Collections.singletonList(() -> IntSortedSets.EMPTY_SET);
|
||||
this.singleRowMatching = false;
|
||||
} else if (condition.getNonEquiConditions().isEmpty()) {
|
||||
final List<Pair<IndexedTable.Index, Equality>> indexes =
|
||||
|
@ -130,13 +132,6 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
);
|
||||
}
|
||||
|
||||
if (!singleRowMatching) {
|
||||
// Only used by "matchCondition", and only in the multi-row-matching case.
|
||||
this.currentMatchedRows = new IntIterator[conditionMatchers.size()];
|
||||
} else {
|
||||
this.currentMatchedRows = null;
|
||||
}
|
||||
|
||||
ColumnSelectorFactory selectorFactory = table.makeColumnSelectorFactory(joinableOffset, descending, closer);
|
||||
this.selectorFactory = selectorFactory != null
|
||||
? selectorFactory
|
||||
|
@ -204,18 +199,50 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
}
|
||||
} else {
|
||||
if (conditionMatchers.size() == 1) {
|
||||
currentIterator = conditionMatchers.get(0).match();
|
||||
currentIterator = conditionMatchers.get(0).match().iterator();
|
||||
} else {
|
||||
final IntSortedSet[] matchingSets = new IntSortedSet[conditionMatchers.size()];
|
||||
int smallestMatchingSet = -1;
|
||||
|
||||
for (int i = 0; i < conditionMatchers.size(); i++) {
|
||||
final IntIterator rows = conditionMatchers.get(i).match();
|
||||
if (rows.hasNext()) {
|
||||
currentMatchedRows[i] = rows;
|
||||
} else {
|
||||
return;
|
||||
matchingSets[i] = conditionMatchers.get(i).match();
|
||||
if (i == 0 || matchingSets[i].size() < matchingSets[smallestMatchingSet].size()) {
|
||||
smallestMatchingSet = i;
|
||||
}
|
||||
}
|
||||
|
||||
currentIterator = new SortedIntIntersectionIterator(currentMatchedRows);
|
||||
// Start intersection using the smallest matching set.
|
||||
IntSortedSet intersection = matchingSets[smallestMatchingSet];
|
||||
|
||||
// Remember if we copied matchingSets[smallestMatchingSet] or not. Avoids unnecessary copies.
|
||||
boolean copied = false;
|
||||
|
||||
for (int i = 0; i < conditionMatchers.size(); i++) {
|
||||
final int numIntersectionElements = intersection.size();
|
||||
|
||||
if (numIntersectionElements > 0 && i != smallestMatchingSet) {
|
||||
final IntBidirectionalIterator it = intersection.iterator();
|
||||
while (it.hasNext()) {
|
||||
final int rowNumber = it.nextInt();
|
||||
if (!matchingSets[i].contains(rowNumber)) {
|
||||
// Remove from intersection.
|
||||
if (numIntersectionElements == 1) {
|
||||
intersection = IntSortedSets.EMPTY_SET;
|
||||
break;
|
||||
} else {
|
||||
if (!copied) {
|
||||
intersection = new IntAVLTreeSet(intersection);
|
||||
copied = true;
|
||||
}
|
||||
|
||||
intersection.remove(rowNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currentIterator = intersection.iterator();
|
||||
}
|
||||
|
||||
advanceCurrentRow();
|
||||
|
@ -323,14 +350,14 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
*/
|
||||
default int matchSingleRow()
|
||||
{
|
||||
final IntIterator it = match();
|
||||
return it.hasNext() ? it.nextInt() : NO_CONDITION_MATCH;
|
||||
final IntSortedSet rows = match();
|
||||
return rows.isEmpty() ? NO_CONDITION_MATCH : rows.firstInt();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator for the row numbers that match the current cursor position.
|
||||
* Returns row numbers that match the current cursor position.
|
||||
*/
|
||||
IntIterator match();
|
||||
IntSortedSet match();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -347,9 +374,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
private final ColumnType keyType;
|
||||
private final IndexedTable.Index index;
|
||||
|
||||
// DimensionSelector -> (int) dimension id -> (IntList) row numbers
|
||||
// DimensionSelector -> (int) dimension id -> (IntSortedSet) row numbers
|
||||
@SuppressWarnings("MismatchedQueryAndUpdateOfCollection") // updated via computeIfAbsent
|
||||
private final LruLoadingHashMap<DimensionSelector, Int2IntListMap> dimensionCaches;
|
||||
private final LruLoadingHashMap<DimensionSelector, Int2IntSortedSetMap> dimensionCaches;
|
||||
|
||||
ConditionMatcherFactory(IndexedTable.Index index)
|
||||
{
|
||||
|
@ -360,21 +387,21 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
MAX_NUM_CACHE,
|
||||
selector -> {
|
||||
int cardinality = selector.getValueCardinality();
|
||||
IntFunction<IntList> loader = dimensionId -> getRowNumbers(selector, dimensionId);
|
||||
IntFunction<IntSortedSet> loader = dimensionId -> getRowNumbers(selector, dimensionId);
|
||||
return cardinality <= CACHE_MAX_SIZE
|
||||
? new Int2IntListLookupTable(cardinality, loader)
|
||||
: new Int2IntListLruCache(CACHE_MAX_SIZE, loader);
|
||||
? new Int2IntSortedSetLookupTable(cardinality, loader)
|
||||
: new Int2IntSortedSetLruCache(CACHE_MAX_SIZE, loader);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private IntList getRowNumbers(DimensionSelector selector, int dimensionId)
|
||||
private IntSortedSet getRowNumbers(DimensionSelector selector, int dimensionId)
|
||||
{
|
||||
final String key = selector.lookupName(dimensionId);
|
||||
return index.find(key);
|
||||
}
|
||||
|
||||
private IntList getAndCacheRowNumbers(DimensionSelector selector, int dimensionId)
|
||||
private IntSortedSet getAndCacheRowNumbers(DimensionSelector selector, int dimensionId)
|
||||
{
|
||||
return dimensionCaches.getAndLoadIfAbsent(selector).getAndLoadIfAbsent(dimensionId);
|
||||
}
|
||||
|
@ -403,10 +430,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
|
||||
if (row.size() == 1) {
|
||||
int dimensionId = row.get(0);
|
||||
IntList rowNumbers = getRowNumbers(selector, dimensionId);
|
||||
return rowNumbers.iterator();
|
||||
return getRowNumbers(selector, dimensionId);
|
||||
} else if (row.size() == 0) {
|
||||
return IntIterators.EMPTY_ITERATOR;
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
} else {
|
||||
// Multi-valued rows are not handled by the join system right now
|
||||
// TODO: Remove when https://github.com/apache/druid/issues/9924 is done
|
||||
|
@ -421,10 +447,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
|
||||
if (row.size() == 1) {
|
||||
int dimensionId = row.get(0);
|
||||
IntList rowNumbers = getAndCacheRowNumbers(selector, dimensionId);
|
||||
return rowNumbers.iterator();
|
||||
return getAndCacheRowNumbers(selector, dimensionId);
|
||||
} else if (row.size() == 0) {
|
||||
return IntIterators.EMPTY_ITERATOR;
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
} else {
|
||||
// Multi-valued rows are not handled by the join system right now
|
||||
// TODO: Remove when https://github.com/apache/druid/issues/9924 is done
|
||||
|
@ -438,9 +463,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
public ConditionMatcher makeFloatProcessor(BaseFloatColumnValueSelector selector)
|
||||
{
|
||||
if (NullHandling.replaceWithDefault()) {
|
||||
return () -> index.find(selector.getFloat()).iterator();
|
||||
return () -> index.find(selector.getFloat());
|
||||
} else {
|
||||
return () -> selector.isNull() ? IntIterators.EMPTY_ITERATOR : index.find(selector.getFloat()).iterator();
|
||||
return () -> selector.isNull() ? IntSortedSets.EMPTY_SET : index.find(selector.getFloat());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -448,9 +473,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
public ConditionMatcher makeDoubleProcessor(BaseDoubleColumnValueSelector selector)
|
||||
{
|
||||
if (NullHandling.replaceWithDefault()) {
|
||||
return () -> index.find(selector.getDouble()).iterator();
|
||||
return () -> index.find(selector.getDouble());
|
||||
} else {
|
||||
return () -> selector.isNull() ? IntIterators.EMPTY_ITERATOR : index.find(selector.getDouble()).iterator();
|
||||
return () -> selector.isNull() ? IntSortedSets.EMPTY_SET : index.find(selector.getDouble());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -460,9 +485,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
if (index.keyType().is(ValueType.LONG)) {
|
||||
return makePrimitiveLongMatcher(selector);
|
||||
} else if (NullHandling.replaceWithDefault()) {
|
||||
return () -> index.find(selector.getLong()).iterator();
|
||||
return () -> index.find(selector.getLong());
|
||||
} else {
|
||||
return () -> selector.isNull() ? IntIterators.EMPTY_ITERATOR : index.find(selector.getLong()).iterator();
|
||||
return () -> selector.isNull() ? IntSortedSets.EMPTY_SET : index.find(selector.getLong());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -478,9 +503,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntIterator match()
|
||||
public IntSortedSet match()
|
||||
{
|
||||
return IntIterators.EMPTY_ITERATOR;
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -501,9 +526,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntIterator match()
|
||||
public IntSortedSet match()
|
||||
{
|
||||
return index.find(selector.getLong()).iterator();
|
||||
return index.find(selector.getLong());
|
||||
}
|
||||
};
|
||||
} else {
|
||||
|
@ -516,9 +541,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntIterator match()
|
||||
public IntSortedSet match()
|
||||
{
|
||||
return selector.isNull() ? IntIterators.EMPTY_ITERATOR : index.find(selector.getLong()).iterator();
|
||||
return selector.isNull() ? IntSortedSets.EMPTY_SET : index.find(selector.getLong());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -559,30 +584,30 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
}
|
||||
}
|
||||
|
||||
private interface Int2IntListMap
|
||||
private interface Int2IntSortedSetMap
|
||||
{
|
||||
IntList getAndLoadIfAbsent(int key);
|
||||
IntSortedSet getAndLoadIfAbsent(int key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Lookup table for keys in the range from 0 to maxSize - 1
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static class Int2IntListLookupTable implements Int2IntListMap
|
||||
static class Int2IntSortedSetLookupTable implements Int2IntSortedSetMap
|
||||
{
|
||||
private final IntList[] lookup;
|
||||
private final IntFunction<IntList> loader;
|
||||
private final IntSortedSet[] lookup;
|
||||
private final IntFunction<IntSortedSet> loader;
|
||||
|
||||
Int2IntListLookupTable(int maxSize, IntFunction<IntList> loader)
|
||||
Int2IntSortedSetLookupTable(int maxSize, IntFunction<IntSortedSet> loader)
|
||||
{
|
||||
this.loader = loader;
|
||||
this.lookup = new IntList[maxSize];
|
||||
this.lookup = new IntSortedSet[maxSize];
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntList getAndLoadIfAbsent(int key)
|
||||
public IntSortedSet getAndLoadIfAbsent(int key)
|
||||
{
|
||||
IntList value = lookup[key];
|
||||
IntSortedSet value = lookup[key];
|
||||
|
||||
if (value == null) {
|
||||
value = loader.apply(key);
|
||||
|
@ -597,13 +622,13 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
* LRU cache optimized for primitive int keys
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static class Int2IntListLruCache implements Int2IntListMap
|
||||
static class Int2IntSortedSetLruCache implements Int2IntSortedSetMap
|
||||
{
|
||||
private final Int2ObjectLinkedOpenHashMap<IntList> cache;
|
||||
private final Int2ObjectLinkedOpenHashMap<IntSortedSet> cache;
|
||||
private final int maxSize;
|
||||
private final IntFunction<IntList> loader;
|
||||
private final IntFunction<IntSortedSet> loader;
|
||||
|
||||
Int2IntListLruCache(int maxSize, IntFunction<IntList> loader)
|
||||
Int2IntSortedSetLruCache(int maxSize, IntFunction<IntSortedSet> loader)
|
||||
{
|
||||
this.cache = new Int2ObjectLinkedOpenHashMap<>(maxSize);
|
||||
this.maxSize = maxSize;
|
||||
|
@ -611,9 +636,9 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntList getAndLoadIfAbsent(int key)
|
||||
public IntSortedSet getAndLoadIfAbsent(int key)
|
||||
{
|
||||
IntList value = cache.getAndMoveToFirst(key);
|
||||
IntSortedSet value = cache.getAndMoveToFirst(key);
|
||||
|
||||
if (value == null) {
|
||||
value = loader.apply(key);
|
||||
|
@ -628,7 +653,7 @@ public class IndexedTableJoinMatcher implements JoinMatcher
|
|||
}
|
||||
|
||||
@VisibleForTesting
|
||||
IntList get(int key)
|
||||
IntSortedSet get(int key)
|
||||
{
|
||||
return cache.get(key);
|
||||
}
|
||||
|
|
|
@ -20,7 +20,8 @@
|
|||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.guava.Comparators;
|
||||
import org.apache.druid.java.util.common.io.Closer;
|
||||
|
@ -150,9 +151,10 @@ public class IndexedTableJoinable implements Joinable
|
|||
IndexedTable.Index index = table.columnIndex(filterColumnPosition);
|
||||
IndexedTable.Reader reader = table.columnReader(correlatedColumnPosition);
|
||||
closer.register(reader);
|
||||
IntList rowIndex = index.find(searchColumnValue);
|
||||
for (int i = 0; i < rowIndex.size(); i++) {
|
||||
int rowNum = rowIndex.getInt(i);
|
||||
final IntSortedSet rowIndex = index.find(searchColumnValue);
|
||||
final IntBidirectionalIterator rowIterator = rowIndex.iterator();
|
||||
while (rowIterator.hasNext()) {
|
||||
int rowNum = rowIterator.nextInt();
|
||||
String correlatedDimVal = DimensionHandlerUtils.convertObjectToString(reader.read(rowNum));
|
||||
correlatedValues.add(correlatedDimVal);
|
||||
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntLists;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSets;
|
||||
import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
|
||||
import org.apache.druid.segment.DimensionHandlerUtils;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
|
@ -34,7 +34,7 @@ import java.util.Map;
|
|||
public class MapIndex implements IndexedTable.Index
|
||||
{
|
||||
private final ColumnType keyType;
|
||||
private final Map<Object, IntList> index;
|
||||
private final Map<Object, IntSortedSet> index;
|
||||
private final boolean keysUnique;
|
||||
private final boolean isLong2ObjectMap;
|
||||
|
||||
|
@ -47,7 +47,7 @@ public class MapIndex implements IndexedTable.Index
|
|||
*
|
||||
* @see RowBasedIndexBuilder#build() the main caller
|
||||
*/
|
||||
MapIndex(final ColumnType keyType, final Map<Object, IntList> index, final boolean keysUnique)
|
||||
MapIndex(final ColumnType keyType, final Map<Object, IntSortedSet> index, final boolean keysUnique)
|
||||
{
|
||||
this.keyType = Preconditions.checkNotNull(keyType, "keyType");
|
||||
this.index = Preconditions.checkNotNull(index, "index");
|
||||
|
@ -68,19 +68,19 @@ public class MapIndex implements IndexedTable.Index
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntList find(Object key)
|
||||
public IntSortedSet find(Object key)
|
||||
{
|
||||
final Object convertedKey = DimensionHandlerUtils.convertObjectToType(key, keyType, false);
|
||||
|
||||
if (convertedKey != null) {
|
||||
final IntList found = index.get(convertedKey);
|
||||
final IntSortedSet found = index.get(convertedKey);
|
||||
if (found != null) {
|
||||
return found;
|
||||
} else {
|
||||
return IntLists.EMPTY_LIST;
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
}
|
||||
} else {
|
||||
return IntLists.EMPTY_LIST;
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,9 +88,9 @@ public class MapIndex implements IndexedTable.Index
|
|||
public int findUniqueLong(long key)
|
||||
{
|
||||
if (isLong2ObjectMap && keysUnique) {
|
||||
final IntList rows = ((Long2ObjectMap<IntList>) (Map) index).get(key);
|
||||
final IntSortedSet rows = ((Long2ObjectMap<IntSortedSet>) (Map) index).get(key);
|
||||
assert rows == null || rows.size() == 1;
|
||||
return rows != null ? rows.getInt(0) : NOT_FOUND;
|
||||
return rows != null ? rows.firstInt() : NOT_FOUND;
|
||||
} else {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
|
|
@ -20,8 +20,9 @@
|
|||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import com.google.common.primitives.Ints;
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
|
||||
import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectIterator;
|
||||
|
@ -51,7 +52,7 @@ public class RowBasedIndexBuilder
|
|||
private int currentRow = 0;
|
||||
private int nullKeys = 0;
|
||||
private final ColumnType keyType;
|
||||
private final Map<Object, IntList> index;
|
||||
private final Map<Object, IntSortedSet> index;
|
||||
|
||||
private long minLongKey = Long.MAX_VALUE;
|
||||
private long maxLongKey = Long.MIN_VALUE;
|
||||
|
@ -81,7 +82,7 @@ public class RowBasedIndexBuilder
|
|||
final Object castKey = DimensionHandlerUtils.convertObjectToType(key, keyType);
|
||||
|
||||
if (castKey != null) {
|
||||
final IntList rowNums = index.computeIfAbsent(castKey, k -> new IntArrayList());
|
||||
final IntSortedSet rowNums = index.computeIfAbsent(castKey, k -> new IntAVLTreeSet());
|
||||
rowNums.add(currentRow);
|
||||
|
||||
// Track min, max long value so we can decide later on if it's appropriate to use an array-backed implementation.
|
||||
|
@ -132,18 +133,18 @@ public class RowBasedIndexBuilder
|
|||
Arrays.fill(indexAsArray, IndexedTable.Index.NOT_FOUND);
|
||||
|
||||
// Safe to cast to Long2ObjectMap because the constructor always uses one for long-typed keys.
|
||||
final ObjectIterator<Long2ObjectMap.Entry<IntList>> entries =
|
||||
((Long2ObjectMap<IntList>) ((Map) index)).long2ObjectEntrySet().iterator();
|
||||
final ObjectIterator<Long2ObjectMap.Entry<IntSortedSet>> entries =
|
||||
((Long2ObjectMap<IntSortedSet>) ((Map) index)).long2ObjectEntrySet().iterator();
|
||||
|
||||
while (entries.hasNext()) {
|
||||
final Long2ObjectMap.Entry<IntList> entry = entries.next();
|
||||
final IntList rowNums = entry.getValue();
|
||||
final Long2ObjectMap.Entry<IntSortedSet> entry = entries.next();
|
||||
final IntSortedSet rowNums = entry.getValue();
|
||||
|
||||
if (rowNums.size() != 1) {
|
||||
throw new ISE("Expected single element");
|
||||
}
|
||||
|
||||
indexAsArray[Ints.checkedCast(entry.getLongKey() - minLongKey)] = rowNums.getInt(0);
|
||||
indexAsArray[Ints.checkedCast(entry.getLongKey() - minLongKey)] = rowNums.firstInt();
|
||||
entries.remove();
|
||||
}
|
||||
|
||||
|
|
|
@ -1,98 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/**
|
||||
* Iterates over the intersection of an array of sorted int lists. Intended for situations where the number
|
||||
* of iterators is fairly small. The iterators must be composed of ascending, nonnegative ints.
|
||||
*
|
||||
* @see RowBasedIndexedTable#columnReader uses this
|
||||
*/
|
||||
public class SortedIntIntersectionIterator implements IntIterator
|
||||
{
|
||||
private static final int NIL = -1;
|
||||
|
||||
private final IntIterator[] iterators;
|
||||
private final int[] currents;
|
||||
|
||||
private int next = NIL;
|
||||
|
||||
SortedIntIntersectionIterator(final IntIterator[] iterators)
|
||||
{
|
||||
Preconditions.checkArgument(iterators.length > 0, "iterators.length > 0");
|
||||
this.iterators = iterators;
|
||||
this.currents = new int[iterators.length];
|
||||
Arrays.fill(currents, NIL);
|
||||
advance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInt()
|
||||
{
|
||||
if (next == NIL) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
|
||||
final int retVal = next;
|
||||
|
||||
advance();
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext()
|
||||
{
|
||||
return next != NIL;
|
||||
}
|
||||
|
||||
private void advance()
|
||||
{
|
||||
next++;
|
||||
|
||||
// This is the part that assumes the number of iterators is fairly small.
|
||||
boolean foundNext = false;
|
||||
while (!foundNext) {
|
||||
foundNext = true;
|
||||
|
||||
for (int i = 0; i < iterators.length; i++) {
|
||||
while (currents[i] < next && iterators[i].hasNext()) {
|
||||
currents[i] = iterators[i].nextInt();
|
||||
}
|
||||
|
||||
if (currents[i] < next && !iterators[i].hasNext()) {
|
||||
next = NIL;
|
||||
return;
|
||||
} else if (currents[i] > next) {
|
||||
next = currents[i];
|
||||
foundNext = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert Arrays.stream(currents).allMatch(x -> x == next);
|
||||
}
|
||||
}
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntLists;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSets;
|
||||
import org.apache.druid.segment.DimensionHandlerUtils;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
|
||||
|
@ -61,18 +61,18 @@ public class UniqueLongArrayIndex implements IndexedTable.Index
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntList find(Object key)
|
||||
public IntSortedSet find(Object key)
|
||||
{
|
||||
final Long longKey = DimensionHandlerUtils.convertObjectToLong(key);
|
||||
|
||||
if (longKey != null) {
|
||||
final int row = findUniqueLong(longKey);
|
||||
if (row >= 0) {
|
||||
return IntLists.singleton(row);
|
||||
return IntSortedSets.singleton(row);
|
||||
}
|
||||
}
|
||||
|
||||
return IntLists.EMPTY_LIST;
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.collections;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
public class RangeIntSetTest
|
||||
{
|
||||
@Test
|
||||
public void test_constructor_zeroZero()
|
||||
{
|
||||
Assert.assertEquals(Collections.emptySet(), new RangeIntSet(0, 0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_constructor_zeroTwo()
|
||||
{
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_contains()
|
||||
{
|
||||
Assert.assertFalse(new RangeIntSet(0, 2).contains(-1));
|
||||
Assert.assertTrue(new RangeIntSet(0, 2).contains(0));
|
||||
Assert.assertTrue(new RangeIntSet(0, 2).contains(1));
|
||||
Assert.assertFalse(new RangeIntSet(0, 2).contains(2));
|
||||
Assert.assertFalse(new RangeIntSet(0, 2).contains(3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_headSet()
|
||||
{
|
||||
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).headSet(-1));
|
||||
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).headSet(0));
|
||||
Assert.assertEquals(ImmutableSet.of(0), new RangeIntSet(0, 2).headSet(1));
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).headSet(2));
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).headSet(3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_tailSet()
|
||||
{
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).tailSet(-1));
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).tailSet(0));
|
||||
Assert.assertEquals(ImmutableSet.of(1), new RangeIntSet(0, 2).tailSet(1));
|
||||
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).tailSet(2));
|
||||
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).tailSet(3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_subSet()
|
||||
{
|
||||
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).subSet(-2, -1));
|
||||
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).subSet(-1, 0));
|
||||
Assert.assertEquals(ImmutableSet.of(0), new RangeIntSet(0, 2).subSet(-1, 1));
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).subSet(-1, 2));
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).subSet(-1, 3));
|
||||
Assert.assertEquals(ImmutableSet.of(0), new RangeIntSet(0, 2).subSet(0, 1));
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).subSet(0, 2));
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2).subSet(0, 3));
|
||||
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).subSet(1, 1));
|
||||
Assert.assertEquals(ImmutableSet.of(1), new RangeIntSet(0, 2).subSet(1, 2));
|
||||
Assert.assertEquals(ImmutableSet.of(1), new RangeIntSet(0, 2).subSet(1, 3));
|
||||
Assert.assertEquals(ImmutableSet.of(), new RangeIntSet(0, 2).subSet(2, 3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_firstInt()
|
||||
{
|
||||
Assert.assertEquals(0, new RangeIntSet(0, 2).firstInt());
|
||||
Assert.assertThrows(NoSuchElementException.class, () -> new RangeIntSet(0, 0).firstInt());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_lastInt()
|
||||
{
|
||||
Assert.assertEquals(1, new RangeIntSet(0, 2).lastInt());
|
||||
Assert.assertThrows(NoSuchElementException.class, () -> new RangeIntSet(0, 0).firstInt());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_size()
|
||||
{
|
||||
Assert.assertEquals(0, new RangeIntSet(0, 0).size());
|
||||
Assert.assertEquals(2, new RangeIntSet(0, 2).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_iterator()
|
||||
{
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(0, 1),
|
||||
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator())
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_iterator_from()
|
||||
{
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(0, 1),
|
||||
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator(0))
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(1),
|
||||
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator(1))
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(),
|
||||
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator(2))
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(),
|
||||
ImmutableList.copyOf(new RangeIntSet(0, 2).iterator(3))
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_equals()
|
||||
{
|
||||
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(0, 0));
|
||||
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(1, 0));
|
||||
Assert.assertNotEquals(new RangeIntSet(0, 0), new RangeIntSet(0, 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_equals_empty()
|
||||
{
|
||||
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(1, 1));
|
||||
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(1, 0));
|
||||
Assert.assertEquals(new RangeIntSet(0, 0), new RangeIntSet(0, -1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_equals_otherSet()
|
||||
{
|
||||
Assert.assertEquals(ImmutableSet.of(0, 1), new RangeIntSet(0, 2));
|
||||
Assert.assertNotEquals(ImmutableSet.of(0, 1, 2), new RangeIntSet(0, 2));
|
||||
}
|
||||
}
|
|
@ -24,7 +24,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.jackson.DefaultObjectMapper;
|
||||
import org.apache.druid.jackson.SegmentizerModule;
|
||||
|
@ -263,18 +264,19 @@ public class BroadcastSegmentIndexedTableTest extends InitializedNullHandlingTes
|
|||
|
||||
// lets try a few values out
|
||||
for (Object val : vals) {
|
||||
final IntList valIndex = valueIndex.find(val);
|
||||
final IntSortedSet valIndex = valueIndex.find(val);
|
||||
if (val == null) {
|
||||
Assert.assertEquals(0, valIndex.size());
|
||||
} else {
|
||||
Assert.assertTrue(valIndex.size() > 0);
|
||||
for (int i = 0; i < valIndex.size(); i++) {
|
||||
Assert.assertEquals(val, reader.read(valIndex.getInt(i)));
|
||||
final IntBidirectionalIterator rowIterator = valIndex.iterator();
|
||||
while (rowIterator.hasNext()) {
|
||||
Assert.assertEquals(val, reader.read(rowIterator.nextInt()));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (Object val : nonmatchingVals) {
|
||||
final IntList valIndex = valueIndex.find(val);
|
||||
final IntSortedSet valIndex = valueIndex.find(val);
|
||||
Assert.assertEquals(0, valIndex.size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,9 +21,10 @@ package org.apache.druid.segment.join.table;
|
|||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.primitives.Ints;
|
||||
import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntLists;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSets;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.query.QueryUnsupportedException;
|
||||
import org.apache.druid.segment.BaseLongColumnValueSelector;
|
||||
|
@ -262,7 +263,7 @@ public class IndexedTableJoinMatcherTest
|
|||
);
|
||||
|
||||
Assert.assertNotNull(dimensionProcessor.match());
|
||||
Assert.assertFalse(dimensionProcessor.match().hasNext());
|
||||
Assert.assertTrue(dimensionProcessor.match().isEmpty());
|
||||
|
||||
Assert.assertEquals(IndexedTableJoinMatcher.NO_CONDITION_MATCH, dimensionProcessor.matchSingleRow());
|
||||
}
|
||||
|
@ -284,7 +285,7 @@ public class IndexedTableJoinMatcherTest
|
|||
);
|
||||
|
||||
Assert.assertNotNull(dimensionProcessor.match());
|
||||
Assert.assertFalse(dimensionProcessor.match().hasNext());
|
||||
Assert.assertTrue(dimensionProcessor.match().isEmpty());
|
||||
|
||||
Assert.assertEquals(IndexedTableJoinMatcher.NO_CONDITION_MATCH, dimensionProcessor.matchSingleRow());
|
||||
}
|
||||
|
@ -405,7 +406,7 @@ public class IndexedTableJoinMatcherTest
|
|||
|
||||
public static class Int2IntListLookupTableTest
|
||||
{
|
||||
private IndexedTableJoinMatcher.Int2IntListLookupTable target;
|
||||
private IndexedTableJoinMatcher.Int2IntSortedSetLookupTable target;
|
||||
|
||||
private AtomicLong counter;
|
||||
|
||||
|
@ -413,19 +414,19 @@ public class IndexedTableJoinMatcherTest
|
|||
public void setup()
|
||||
{
|
||||
counter = new AtomicLong(0);
|
||||
IntFunction<IntList> loader = key -> {
|
||||
IntFunction<IntSortedSet> loader = key -> {
|
||||
counter.incrementAndGet();
|
||||
return IntLists.singleton(key);
|
||||
return IntSortedSets.singleton(key);
|
||||
};
|
||||
|
||||
target = new IndexedTableJoinMatcher.Int2IntListLookupTable(SIZE, loader);
|
||||
target = new IndexedTableJoinMatcher.Int2IntSortedSetLookupTable(SIZE, loader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void loadsValueIfAbsent()
|
||||
{
|
||||
int key = 1;
|
||||
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(1L, counter.longValue());
|
||||
}
|
||||
|
||||
|
@ -433,15 +434,15 @@ public class IndexedTableJoinMatcherTest
|
|||
public void doesNotLoadIfPresent()
|
||||
{
|
||||
int key = 1;
|
||||
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(1L, counter.longValue());
|
||||
}
|
||||
}
|
||||
|
||||
public static class Int2IntListLruCache
|
||||
public static class Int2IntSortedSetLruCache
|
||||
{
|
||||
private IndexedTableJoinMatcher.Int2IntListLruCache target;
|
||||
private IndexedTableJoinMatcher.Int2IntSortedSetLruCache target;
|
||||
|
||||
private AtomicLong counter;
|
||||
|
||||
|
@ -449,19 +450,19 @@ public class IndexedTableJoinMatcherTest
|
|||
public void setup()
|
||||
{
|
||||
counter = new AtomicLong(0);
|
||||
IntFunction<IntList> loader = key -> {
|
||||
IntFunction<IntSortedSet> loader = key -> {
|
||||
counter.incrementAndGet();
|
||||
return IntLists.singleton(key);
|
||||
return IntSortedSets.singleton(key);
|
||||
};
|
||||
|
||||
target = new IndexedTableJoinMatcher.Int2IntListLruCache(SIZE, loader);
|
||||
target = new IndexedTableJoinMatcher.Int2IntSortedSetLruCache(SIZE, loader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void loadsValueIfAbsent()
|
||||
{
|
||||
int key = 1;
|
||||
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(1L, counter.longValue());
|
||||
}
|
||||
|
||||
|
@ -469,8 +470,8 @@ public class IndexedTableJoinMatcherTest
|
|||
public void doesNotLoadIfPresent()
|
||||
{
|
||||
int key = 1;
|
||||
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(1L, counter.longValue());
|
||||
}
|
||||
|
||||
|
@ -481,10 +482,10 @@ public class IndexedTableJoinMatcherTest
|
|||
int next = start + SIZE;
|
||||
|
||||
for (int key = start; key < next; key++) {
|
||||
Assert.assertEquals(IntLists.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
Assert.assertEquals(IntSortedSets.singleton(key), target.getAndLoadIfAbsent(key));
|
||||
}
|
||||
|
||||
Assert.assertEquals(IntLists.singleton(next), target.getAndLoadIfAbsent(next));
|
||||
Assert.assertEquals(IntSortedSets.singleton(next), target.getAndLoadIfAbsent(next));
|
||||
Assert.assertNull(target.get(start));
|
||||
|
||||
Assert.assertEquals(SIZE + 1, counter.longValue());
|
||||
|
@ -508,9 +509,9 @@ public class IndexedTableJoinMatcherTest
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntList find(Object key)
|
||||
public IntSortedSet find(Object key)
|
||||
{
|
||||
return IntLists.singleton(((String) key).length());
|
||||
return IntSortedSets.singleton(((String) key).length());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -538,12 +539,12 @@ public class IndexedTableJoinMatcherTest
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntList find(Object key)
|
||||
public IntSortedSet find(Object key)
|
||||
{
|
||||
if ("1".equals(DimensionHandlerUtils.convertObjectToString(key))) {
|
||||
return IntLists.singleton(3);
|
||||
return IntSortedSets.singleton(3);
|
||||
} else {
|
||||
return IntLists.EMPTY_LIST;
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -572,14 +573,14 @@ public class IndexedTableJoinMatcherTest
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntList find(Object key)
|
||||
public IntSortedSet find(Object key)
|
||||
{
|
||||
final Long l = DimensionHandlerUtils.convertObjectToLong(key);
|
||||
|
||||
if (l == null && NullHandling.sqlCompatible()) {
|
||||
return IntLists.EMPTY_LIST;
|
||||
return IntSortedSets.EMPTY_SET;
|
||||
} else {
|
||||
return IntLists.singleton(Ints.checkedCast((l == null ? 0L : l) + 1));
|
||||
return IntSortedSets.singleton(Ints.checkedCast((l == null ? 0L : l) + 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -608,9 +609,9 @@ public class IndexedTableJoinMatcherTest
|
|||
}
|
||||
|
||||
@Override
|
||||
public IntList find(Object key)
|
||||
public IntSortedSet find(Object key)
|
||||
{
|
||||
return new IntArrayList(new int[]{1, 2, 3});
|
||||
return new IntAVLTreeSet(new int[]{1, 2, 3});
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSortedSet;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.hamcrest.CoreMatchers;
|
||||
import org.junit.Assert;
|
||||
|
@ -50,13 +50,13 @@ public class RowBasedIndexBuilderTest
|
|||
Assert.assertEquals(ColumnType.STRING, index.keyType());
|
||||
Assert.assertTrue(index.areKeysUnique());
|
||||
|
||||
Assert.assertEquals(intList(0), index.find("abc"));
|
||||
Assert.assertEquals(intList(1), index.find(""));
|
||||
Assert.assertEquals(intList(3), index.find(1L));
|
||||
Assert.assertEquals(intList(3), index.find("1"));
|
||||
Assert.assertEquals(intList(4), index.find("def"));
|
||||
Assert.assertEquals(intList(), index.find(null));
|
||||
Assert.assertEquals(intList(), index.find("nonexistent"));
|
||||
Assert.assertEquals(intSet(0), index.find("abc"));
|
||||
Assert.assertEquals(intSet(1), index.find(""));
|
||||
Assert.assertEquals(intSet(3), index.find(1L));
|
||||
Assert.assertEquals(intSet(3), index.find("1"));
|
||||
Assert.assertEquals(intSet(4), index.find("def"));
|
||||
Assert.assertEquals(intSet(), index.find(null));
|
||||
Assert.assertEquals(intSet(), index.find("nonexistent"));
|
||||
|
||||
expectedException.expect(UnsupportedOperationException.class);
|
||||
index.findUniqueLong(0L);
|
||||
|
@ -80,13 +80,13 @@ public class RowBasedIndexBuilderTest
|
|||
Assert.assertEquals(ColumnType.STRING, index.keyType());
|
||||
Assert.assertFalse(index.areKeysUnique());
|
||||
|
||||
Assert.assertEquals(intList(0, 3), index.find("abc"));
|
||||
Assert.assertEquals(intList(1), index.find(""));
|
||||
Assert.assertEquals(intList(4), index.find(1L));
|
||||
Assert.assertEquals(intList(4), index.find("1"));
|
||||
Assert.assertEquals(intList(5), index.find("def"));
|
||||
Assert.assertEquals(intList(), index.find(null));
|
||||
Assert.assertEquals(intList(), index.find("nonexistent"));
|
||||
Assert.assertEquals(intSet(0, 3), index.find("abc"));
|
||||
Assert.assertEquals(intSet(1), index.find(""));
|
||||
Assert.assertEquals(intSet(4), index.find(1L));
|
||||
Assert.assertEquals(intSet(4), index.find("1"));
|
||||
Assert.assertEquals(intSet(5), index.find("def"));
|
||||
Assert.assertEquals(intSet(), index.find(null));
|
||||
Assert.assertEquals(intSet(), index.find("nonexistent"));
|
||||
|
||||
expectedException.expect(UnsupportedOperationException.class);
|
||||
index.findUniqueLong(0L);
|
||||
|
@ -107,10 +107,10 @@ public class RowBasedIndexBuilderTest
|
|||
Assert.assertEquals(ColumnType.LONG, index.keyType());
|
||||
Assert.assertTrue(index.areKeysUnique());
|
||||
|
||||
Assert.assertEquals(intList(0), index.find(1L));
|
||||
Assert.assertEquals(intList(1), index.find(5L));
|
||||
Assert.assertEquals(intList(2), index.find(2L));
|
||||
Assert.assertEquals(intList(), index.find(3L));
|
||||
Assert.assertEquals(intSet(0), index.find(1L));
|
||||
Assert.assertEquals(intSet(1), index.find(5L));
|
||||
Assert.assertEquals(intSet(2), index.find(2L));
|
||||
Assert.assertEquals(intSet(), index.find(3L));
|
||||
|
||||
Assert.assertEquals(0, index.findUniqueLong(1L));
|
||||
Assert.assertEquals(1, index.findUniqueLong(5L));
|
||||
|
@ -133,10 +133,10 @@ public class RowBasedIndexBuilderTest
|
|||
Assert.assertEquals(ColumnType.LONG, index.keyType());
|
||||
Assert.assertTrue(index.areKeysUnique());
|
||||
|
||||
Assert.assertEquals(intList(0), index.find(1L));
|
||||
Assert.assertEquals(intList(1), index.find(10_000_000L));
|
||||
Assert.assertEquals(intList(2), index.find(2L));
|
||||
Assert.assertEquals(intList(), index.find(3L));
|
||||
Assert.assertEquals(intSet(0), index.find(1L));
|
||||
Assert.assertEquals(intSet(1), index.find(10_000_000L));
|
||||
Assert.assertEquals(intSet(2), index.find(2L));
|
||||
Assert.assertEquals(intSet(), index.find(3L));
|
||||
|
||||
Assert.assertEquals(0, index.findUniqueLong(1L));
|
||||
Assert.assertEquals(1, index.findUniqueLong(10_000_000L));
|
||||
|
@ -160,20 +160,20 @@ public class RowBasedIndexBuilderTest
|
|||
Assert.assertEquals(ColumnType.LONG, index.keyType());
|
||||
Assert.assertFalse(index.areKeysUnique());
|
||||
|
||||
Assert.assertEquals(intList(0, 2), index.find("1"));
|
||||
Assert.assertEquals(intList(0, 2), index.find(1));
|
||||
Assert.assertEquals(intList(0, 2), index.find(1L));
|
||||
Assert.assertEquals(intList(1), index.find(5L));
|
||||
Assert.assertEquals(intList(3), index.find(2L));
|
||||
Assert.assertEquals(intList(), index.find(3L));
|
||||
Assert.assertEquals(intSet(0, 2), index.find("1"));
|
||||
Assert.assertEquals(intSet(0, 2), index.find(1));
|
||||
Assert.assertEquals(intSet(0, 2), index.find(1L));
|
||||
Assert.assertEquals(intSet(1), index.find(5L));
|
||||
Assert.assertEquals(intSet(3), index.find(2L));
|
||||
Assert.assertEquals(intSet(), index.find(3L));
|
||||
|
||||
expectedException.expect(UnsupportedOperationException.class);
|
||||
index.findUniqueLong(5L);
|
||||
}
|
||||
|
||||
public IntList intList(final int... ints)
|
||||
public IntSortedSet intSet(final int... ints)
|
||||
{
|
||||
final IntArrayList retVal = new IntArrayList(ints.length);
|
||||
final IntAVLTreeSet retVal = new IntAVLTreeSet();
|
||||
for (int i : ints) {
|
||||
retVal.add(i);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
|
@ -92,9 +91,9 @@ public class RowBasedIndexedTableTest
|
|||
{
|
||||
final IndexedTable.Index index = countriesTable.columnIndex(INDEX_COUNTRIES_COUNTRY_ISO_CODE);
|
||||
|
||||
Assert.assertEquals(ImmutableList.of(), index.find(null));
|
||||
Assert.assertEquals(ImmutableList.of(), index.find(2));
|
||||
Assert.assertEquals(ImmutableList.of(13), index.find("US"));
|
||||
Assert.assertEquals(ImmutableSet.of(), index.find(null));
|
||||
Assert.assertEquals(ImmutableSet.of(), index.find(2));
|
||||
Assert.assertEquals(ImmutableSet.of(13), index.find("US"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -102,15 +101,15 @@ public class RowBasedIndexedTableTest
|
|||
{
|
||||
final IndexedTable.Index index = countriesTable.columnIndex(INDEX_COUNTRIES_COUNTRY_NUMBER);
|
||||
|
||||
Assert.assertEquals(ImmutableList.of(), index.find(null));
|
||||
Assert.assertEquals(ImmutableList.of(0), index.find(0));
|
||||
Assert.assertEquals(ImmutableList.of(0), index.find(0.0));
|
||||
Assert.assertEquals(ImmutableList.of(0), index.find("0"));
|
||||
Assert.assertEquals(ImmutableList.of(2), index.find(2));
|
||||
Assert.assertEquals(ImmutableList.of(2), index.find(2.0));
|
||||
Assert.assertEquals(ImmutableList.of(2), index.find("2"));
|
||||
Assert.assertEquals(ImmutableList.of(), index.find(20));
|
||||
Assert.assertEquals(ImmutableList.of(), index.find("US"));
|
||||
Assert.assertEquals(ImmutableSet.of(), index.find(null));
|
||||
Assert.assertEquals(ImmutableSet.of(0), index.find(0));
|
||||
Assert.assertEquals(ImmutableSet.of(0), index.find(0.0));
|
||||
Assert.assertEquals(ImmutableSet.of(0), index.find("0"));
|
||||
Assert.assertEquals(ImmutableSet.of(2), index.find(2));
|
||||
Assert.assertEquals(ImmutableSet.of(2), index.find(2.0));
|
||||
Assert.assertEquals(ImmutableSet.of(2), index.find("2"));
|
||||
Assert.assertEquals(ImmutableSet.of(), index.find(20));
|
||||
Assert.assertEquals(ImmutableSet.of(), index.find("US"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -132,11 +131,11 @@ public class RowBasedIndexedTableTest
|
|||
{
|
||||
final IndexedTable.Index index = regionsTable.columnIndex(INDEX_REGIONS_REGION_ISO_CODE);
|
||||
|
||||
Assert.assertEquals(ImmutableList.of(), index.find(null));
|
||||
Assert.assertEquals(ImmutableList.of(0), index.find("11"));
|
||||
Assert.assertEquals(ImmutableList.of(1), index.find(13));
|
||||
Assert.assertEquals(ImmutableList.of(12), index.find("QC"));
|
||||
Assert.assertEquals(ImmutableList.of(15, 16), index.find("VA"));
|
||||
Assert.assertEquals(ImmutableSet.of(), index.find(null));
|
||||
Assert.assertEquals(ImmutableSet.of(0), index.find("11"));
|
||||
Assert.assertEquals(ImmutableSet.of(1), index.find(13));
|
||||
Assert.assertEquals(ImmutableSet.of(12), index.find("QC"));
|
||||
Assert.assertEquals(ImmutableSet.of(15, 16), index.find("VA"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -1,106 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.join.table;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class SortedIntIntersectionIteratorTest
|
||||
{
|
||||
@Test
|
||||
public void test_iterator_allPossibleSingleListsWithCardinalityUpToThree()
|
||||
{
|
||||
// 8 possibilities
|
||||
for (int i = 0; i < 8; i++) {
|
||||
final IntList ints = intsFromBits(i);
|
||||
Assert.assertEquals(ints.toString(), ints, intersection(ints));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_iterator_allPossibleSetsOfTwoListsWithCardinalityUpToSix()
|
||||
{
|
||||
// 4096 possibilities: 64 for each list, 2 lists
|
||||
for (int i = 0; i < 4096; i++) {
|
||||
final int bits1 = i & 63;
|
||||
final int bits2 = (i >> 6) & 63;
|
||||
|
||||
final IntList ints1 = intsFromBits(bits1);
|
||||
final IntList ints2 = intsFromBits(bits2);
|
||||
|
||||
Assert.assertEquals(
|
||||
StringUtils.format("ints1 = %s; ints2 = %s", ints1, ints2),
|
||||
intsFromBits(bits1 & bits2),
|
||||
intersection(ints1, ints2)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_iterator_allPossibleSetsOfThreeListsWithCardinalityUpToFour()
|
||||
{
|
||||
// 4096 possibilities: 16 for each list, 3 lists
|
||||
for (int i = 0; i < 4096; i++) {
|
||||
final int bits1 = i & 15;
|
||||
final int bits2 = (i >> 4) & 15;
|
||||
final int bits3 = (i >> 8) & 15;
|
||||
|
||||
final IntList ints1 = intsFromBits(bits1);
|
||||
final IntList ints2 = intsFromBits(bits2);
|
||||
final IntList ints3 = intsFromBits(bits3);
|
||||
|
||||
Assert.assertEquals(
|
||||
StringUtils.format("ints1 = %s; ints2 = %s; ints3 = %s", ints1, ints2, ints3),
|
||||
intsFromBits(bits1 & bits2 & bits3),
|
||||
intersection(ints1, ints2, ints3)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static IntList intersection(final IntList... lists)
|
||||
{
|
||||
final SortedIntIntersectionIterator comboIterator = new SortedIntIntersectionIterator(
|
||||
Arrays.stream(lists)
|
||||
.map(IntList::iterator)
|
||||
.toArray(IntIterator[]::new)
|
||||
);
|
||||
|
||||
return new IntArrayList(comboIterator);
|
||||
}
|
||||
|
||||
private static IntList intsFromBits(final int bits)
|
||||
{
|
||||
final IntArrayList retVal = new IntArrayList(4);
|
||||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (((bits >> i) & 1) == 1) {
|
||||
retVal.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue