mirror of https://github.com/apache/druid.git
Direct UTF-8 access for "in" filters. (#12517)
* Direct UTF-8 access for "in" filters. Directly related: 1) InDimFilter: Store stored Strings (in ValuesSet) plus sorted UTF-8 ByteBuffers (in valuesUtf8). Use valuesUtf8 whenever possible. If necessary, the input set is copied into a ValuesSet. Much logic is simplified, because we always know what type the values set will be. I think that there won't even be an efficiency loss in most cases. InDimFilter is most frequently created by deserialization, and this patch updates the JsonCreator constructor to deserialize directly into a ValuesSet. 2) Add Utf8ValueSetIndex, which InDimFilter uses to avoid UTF-8 decodes during index lookups. 3) Add unsigned comparator to ByteBufferUtils and use it in GenericIndexed.BYTE_BUFFER_STRATEGY. This is important because UTF-8 bytes can be compared as bytes if, and only if, the comparison is unsigned. 4) Add specialization to GenericIndexed.singleThreaded().indexOf that avoids needless ByteBuffer allocations. 5) Clarify that objects returned by ColumnIndexSupplier.as are not thread-safe. DictionaryEncodedStringIndexSupplier now calls singleThreaded() on all relevant GenericIndexed objects, saving a ByteBuffer allocation per access. Also: 1) Fix performance regression in LikeFilter: since #12315, it applied the suffix matcher to all values in range even for type MATCH_ALL. 2) Add ObjectStrategy.canCompare() method. This fixes LikeFilterBenchmark, which was broken due to calls to strategy.compare in GenericIndexed.fromIterable. * Add like-filter implementation tests. * Add in-filter implementation tests. * Add tests, fix issues. * Fix style. * Adjustments from review.
This commit is contained in:
parent
a235aca2b3
commit
69aac6c8dd
|
@ -27,6 +27,7 @@ import org.apache.druid.collections.bitmap.MutableBitmap;
|
|||
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.extendedset.intset.ConciseSetUtils;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.query.filter.BoundDimFilter;
|
||||
import org.apache.druid.query.filter.ColumnIndexSelector;
|
||||
import org.apache.druid.query.ordering.StringComparators;
|
||||
|
@ -48,6 +49,7 @@ import org.openjdk.jmh.annotations.Setup;
|
|||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
@ -160,7 +162,7 @@ public class BoundFilterBenchmark
|
|||
final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
|
||||
final List<Integer> ints = generateInts();
|
||||
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints).transform(i -> i.toString()),
|
||||
FluentIterable.from(ints).transform(Object::toString),
|
||||
GenericIndexed.STRING_STRATEGY
|
||||
);
|
||||
final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
|
||||
|
@ -174,9 +176,14 @@ public class BoundFilterBenchmark
|
|||
),
|
||||
serdeFactory.getObjectStrategy()
|
||||
);
|
||||
final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
|
||||
GenericIndexed.BYTE_BUFFER_STRATEGY
|
||||
);
|
||||
selector = new MockColumnIndexSelector(
|
||||
bitmapFactory,
|
||||
new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, bitmaps, null)
|
||||
new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.apache.druid.benchmark;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.FluentIterable;
|
||||
|
@ -28,6 +27,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
|||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.query.filter.ColumnIndexSelector;
|
||||
import org.apache.druid.query.filter.DruidDoublePredicate;
|
||||
import org.apache.druid.query.filter.DruidFloatPredicate;
|
||||
|
@ -51,6 +51,7 @@ import org.openjdk.jmh.annotations.Setup;
|
|||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
@ -123,18 +124,14 @@ public class DimensionPredicateFilterBenchmark
|
|||
final List<Integer> ints = generateInts();
|
||||
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(
|
||||
new Function<Integer, String>()
|
||||
{
|
||||
@Override
|
||||
public String apply(Integer i)
|
||||
{
|
||||
return i.toString();
|
||||
}
|
||||
}
|
||||
),
|
||||
.transform(Object::toString),
|
||||
GenericIndexed.STRING_STRATEGY
|
||||
);
|
||||
final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
|
||||
GenericIndexed.BYTE_BUFFER_STRATEGY
|
||||
);
|
||||
final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(
|
||||
|
@ -148,7 +145,7 @@ public class DimensionPredicateFilterBenchmark
|
|||
);
|
||||
selector = new MockColumnIndexSelector(
|
||||
bitmapFactory,
|
||||
new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, bitmaps, null)
|
||||
new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.benchmark;
|
||||
|
||||
import com.google.common.collect.FluentIterable;
|
||||
import org.apache.druid.collections.bitmap.BitmapFactory;
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.query.filter.ColumnIndexSelector;
|
||||
import org.apache.druid.query.filter.InDimFilter;
|
||||
import org.apache.druid.segment.data.BitmapSerdeFactory;
|
||||
import org.apache.druid.segment.data.GenericIndexed;
|
||||
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
|
||||
import org.apache.druid.segment.filter.Filters;
|
||||
import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
@Fork(value = 1)
|
||||
@Warmup(iterations = 10)
|
||||
@Measurement(iterations = 10)
|
||||
public class InFilterBenchmark
|
||||
{
|
||||
static {
|
||||
NullHandling.initializeForTests();
|
||||
}
|
||||
|
||||
private static final int START_INT = 10_000_000;
|
||||
|
||||
private InDimFilter inFilter;
|
||||
|
||||
// cardinality of the dictionary. it will contain this many ints (as strings, of course), starting at START_INT,
|
||||
// even numbers only.
|
||||
@Param({"1000000"})
|
||||
int dictionarySize;
|
||||
|
||||
// cardinality of the "in" filter. half of its values will be in the dictionary, half will not.
|
||||
@Param({"10000"})
|
||||
int filterSize;
|
||||
|
||||
// selector will contain a "dictionarySize" number of bitmaps; each one contains a single int.
|
||||
// this benchmark is not about bitmap union speed, so no need for that part to be realistic.
|
||||
ColumnIndexSelector selector;
|
||||
|
||||
@Setup
|
||||
public void setup()
|
||||
{
|
||||
final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
|
||||
final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
|
||||
final Iterable<Integer> ints = intGenerator();
|
||||
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(Object::toString),
|
||||
GenericIndexed.STRING_STRATEGY
|
||||
);
|
||||
final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
|
||||
GenericIndexed.BYTE_BUFFER_STRATEGY
|
||||
);
|
||||
final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
|
||||
() -> IntStream.range(0, dictionarySize)
|
||||
.mapToObj(
|
||||
i -> {
|
||||
final MutableBitmap mutableBitmap = bitmapFactory.makeEmptyMutableBitmap();
|
||||
mutableBitmap.add(i);
|
||||
return bitmapFactory.makeImmutableBitmap(mutableBitmap);
|
||||
}
|
||||
)
|
||||
.iterator(),
|
||||
serdeFactory.getObjectStrategy()
|
||||
);
|
||||
selector = new MockColumnIndexSelector(
|
||||
bitmapFactory,
|
||||
new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null)
|
||||
);
|
||||
inFilter = new InDimFilter(
|
||||
"dummy",
|
||||
IntStream.range(START_INT, START_INT + filterSize).mapToObj(String::valueOf).collect(Collectors.toSet())
|
||||
);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
public void doFilter(Blackhole blackhole)
|
||||
{
|
||||
final ImmutableBitmap bitmapIndex = Filters.computeDefaultBitmapResults(inFilter, selector);
|
||||
blackhole.consume(bitmapIndex);
|
||||
}
|
||||
|
||||
private Iterable<Integer> intGenerator()
|
||||
{
|
||||
// i * 2 => half of these values will be present in the inFilter, half won't.
|
||||
return () -> IntStream.range(0, dictionarySize).map(i -> START_INT + i * 2).boxed().iterator();
|
||||
}
|
||||
}
|
|
@ -25,6 +25,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
|||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.query.filter.BoundDimFilter;
|
||||
import org.apache.druid.query.filter.ColumnIndexSelector;
|
||||
import org.apache.druid.query.filter.Filter;
|
||||
|
@ -50,6 +51,7 @@ import org.openjdk.jmh.annotations.State;
|
|||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
@ -122,11 +124,14 @@ public class LikeFilterBenchmark
|
|||
final List<Integer> ints = generateInts();
|
||||
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(
|
||||
i -> i.toString()
|
||||
),
|
||||
.transform(Object::toString),
|
||||
GenericIndexed.STRING_STRATEGY
|
||||
);
|
||||
final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
|
||||
GenericIndexed.BYTE_BUFFER_STRATEGY
|
||||
);
|
||||
final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
.transform(
|
||||
|
@ -140,7 +145,7 @@ public class LikeFilterBenchmark
|
|||
);
|
||||
selector = new MockColumnIndexSelector(
|
||||
bitmapFactory,
|
||||
new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, bitmaps, null)
|
||||
new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -23,12 +23,14 @@ import org.apache.druid.collections.ResourceHolder;
|
|||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.utils.JvmUtils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.lang.invoke.MethodHandle;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.invoke.MethodType;
|
||||
import java.lang.reflect.Method;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.util.Comparator;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
|
@ -48,6 +50,8 @@ public class ByteBufferUtils
|
|||
// null if unmap is supported
|
||||
private static final RuntimeException UNMAP_NOT_SUPPORTED_EXCEPTION;
|
||||
|
||||
private static final Comparator<ByteBuffer> COMPARATOR_UNSIGNED = new UnsignedByteBufferComparator();
|
||||
|
||||
static {
|
||||
Object unmap = null;
|
||||
RuntimeException exception = null;
|
||||
|
@ -211,4 +215,79 @@ public class ByteBufferUtils
|
|||
{
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two ByteBuffer ranges using unsigned byte ordering.
|
||||
*
|
||||
* Different from {@link ByteBuffer#compareTo}, which uses signed ordering.
|
||||
*/
|
||||
public static int compareByteBuffers(
|
||||
final ByteBuffer buf1,
|
||||
final int position1,
|
||||
final int length1,
|
||||
final ByteBuffer buf2,
|
||||
final int position2,
|
||||
final int length2
|
||||
)
|
||||
{
|
||||
final int commonLength = Math.min(length1, length2);
|
||||
|
||||
for (int i = 0; i < commonLength; i++) {
|
||||
final byte byte1 = buf1.get(position1 + i);
|
||||
final byte byte2 = buf2.get(position2 + i);
|
||||
final int cmp = (byte1 & 0xFF) - (byte2 & 0xFF); // Unsigned comparison
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
|
||||
return Integer.compare(length1, length2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two ByteBuffers from their positions to their limits using unsigned byte ordering. Accepts null
|
||||
* buffers, which are ordered earlier than any nonnull buffer.
|
||||
*
|
||||
* Different from {@link ByteBuffer#compareTo}, which uses signed ordering.
|
||||
*/
|
||||
public static int compareByteBuffers(
|
||||
@Nullable final ByteBuffer buf1,
|
||||
@Nullable final ByteBuffer buf2
|
||||
)
|
||||
{
|
||||
if (buf1 == null) {
|
||||
return buf2 == null ? 0 : -1;
|
||||
}
|
||||
|
||||
if (buf2 == null) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return ByteBufferUtils.compareByteBuffers(
|
||||
buf1,
|
||||
buf1.position(),
|
||||
buf1.remaining(),
|
||||
buf2,
|
||||
buf2.position(),
|
||||
buf2.remaining()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparator that compares two {@link ByteBuffer} using unsigned ordering. Null buffers are accepted, and
|
||||
* are ordered earlier than any nonnull buffer.
|
||||
*/
|
||||
public static Comparator<ByteBuffer> unsignedComparator()
|
||||
{
|
||||
return COMPARATOR_UNSIGNED;
|
||||
}
|
||||
|
||||
private static class UnsignedByteBufferComparator implements Comparator<ByteBuffer>
|
||||
{
|
||||
@Override
|
||||
public int compare(@Nullable ByteBuffer o1, @Nullable ByteBuffer o2)
|
||||
{
|
||||
return ByteBufferUtils.compareByteBuffers(o1, o2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -108,6 +108,11 @@ public class StringUtils
|
|||
return StringUtils.fromUtf8(buffer, buffer.remaining());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string to a UTF-8 byte array.
|
||||
*
|
||||
* @throws NullPointerException if "string" is null
|
||||
*/
|
||||
public static byte[] toUtf8(final String string)
|
||||
{
|
||||
try {
|
||||
|
@ -119,6 +124,16 @@ public class StringUtils
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string to UTF-8 bytes, returning them as a newly-allocated on-heap {@link ByteBuffer}.
|
||||
* If "string" is null, returns null.
|
||||
*/
|
||||
@Nullable
|
||||
public static ByteBuffer toUtf8ByteBuffer(@Nullable final String string)
|
||||
{
|
||||
return string == null ? null : ByteBuffer.wrap(toUtf8(string));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes "string" into the buffer "byteBuffer", using no more than the number of bytes remaining in the buffer.
|
||||
* Will only encode whole characters. The byteBuffer's position and limit may be changed during operation, but will
|
||||
|
|
|
@ -23,7 +23,10 @@ import org.apache.druid.java.util.common.StringUtils;
|
|||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class StringUtilsTest
|
||||
{
|
||||
|
@ -76,4 +79,12 @@ public class StringUtilsTest
|
|||
Assert.assertEquals(4, StringUtils.estimatedBinaryLengthAsUTF8(invalid));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToUtf8ByteBuffer()
|
||||
{
|
||||
Assert.assertNull(StringUtils.toUtf8ByteBuffer(null));
|
||||
Assert.assertEquals(ByteBuffer.allocate(0), StringUtils.toUtf8ByteBuffer(""));
|
||||
Assert.assertEquals(ByteBuffer.wrap(StringUtils.toUtf8("foo")), StringUtils.toUtf8ByteBuffer("foo"));
|
||||
Assert.assertEquals(ByteBuffer.wrap(StringUtils.toUtf8("🙂")), StringUtils.toUtf8ByteBuffer("🙂"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ package org.apache.druid.java.util.common;
|
|||
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.druid.collections.ResourceHolder;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
|
@ -33,6 +35,7 @@ import java.io.OutputStream;
|
|||
import java.nio.ByteBuffer;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
public class ByteBufferUtilsTest
|
||||
{
|
||||
|
@ -76,4 +79,53 @@ public class ByteBufferUtilsTest
|
|||
final ByteBuffer heapBuffer = ByteBuffer.allocate(4096);
|
||||
ByteBufferUtils.free(heapBuffer);
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("EqualsWithItself")
|
||||
public void testUnsignedComparator()
|
||||
{
|
||||
final Comparator<ByteBuffer> comparator = ByteBufferUtils.unsignedComparator();
|
||||
|
||||
// Tests involving null
|
||||
MatcherAssert.assertThat(comparator.compare(null, null), Matchers.equalTo(0));
|
||||
MatcherAssert.assertThat(comparator.compare(null, ByteBuffer.allocate(0)), Matchers.lessThan(0));
|
||||
MatcherAssert.assertThat(comparator.compare(ByteBuffer.allocate(0), null), Matchers.greaterThan(0));
|
||||
MatcherAssert.assertThat(comparator.compare(null, ByteBuffer.allocate(1)), Matchers.lessThan(0));
|
||||
MatcherAssert.assertThat(comparator.compare(ByteBuffer.allocate(1), null), Matchers.greaterThan(0));
|
||||
MatcherAssert.assertThat(comparator.compare(null, ByteBuffer.wrap(new byte[]{-1})), Matchers.lessThan(0));
|
||||
MatcherAssert.assertThat(comparator.compare(ByteBuffer.wrap(new byte[]{-1}), null), Matchers.greaterThan(0));
|
||||
|
||||
// Tests involving buffers of different lengths
|
||||
MatcherAssert.assertThat(
|
||||
comparator.compare(
|
||||
ByteBuffer.wrap(new byte[]{1, 2, 3}),
|
||||
ByteBuffer.wrap(new byte[]{1, 2, 3, 4})
|
||||
),
|
||||
Matchers.lessThan(0)
|
||||
);
|
||||
|
||||
MatcherAssert.assertThat(
|
||||
comparator.compare(
|
||||
ByteBuffer.wrap(new byte[]{1, 2, 3, 4}),
|
||||
ByteBuffer.wrap(new byte[]{1, 2, 3})
|
||||
),
|
||||
Matchers.greaterThan(0)
|
||||
);
|
||||
|
||||
// Tests involving the full range of bytes
|
||||
for (byte i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) {
|
||||
for (byte j = Byte.MIN_VALUE; j < Byte.MAX_VALUE; j++) {
|
||||
final int cmp = Integer.compare(Byte.toUnsignedInt(i), Byte.toUnsignedInt(j));
|
||||
|
||||
MatcherAssert.assertThat(
|
||||
StringUtils.format("comparison of %s to %s", Byte.toUnsignedInt(i), Byte.toUnsignedInt(j)),
|
||||
comparator.compare(
|
||||
ByteBuffer.wrap(new byte[]{i}),
|
||||
ByteBuffer.wrap(new byte[]{j})
|
||||
),
|
||||
cmp < 0 ? Matchers.lessThan(0) : cmp > 0 ? Matchers.greaterThan(0) : Matchers.equalTo(0)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -210,7 +210,7 @@ public class Druids
|
|||
{
|
||||
final Set<String> filterValues = Sets.newHashSet(values);
|
||||
filterValues.add(value);
|
||||
dimFilter = new InDimFilter(dimensionName, filterValues, null, null);
|
||||
dimFilter = new InDimFilter(dimensionName, filterValues);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,11 +29,11 @@ import com.google.common.base.Preconditions;
|
|||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Supplier;
|
||||
import com.google.common.base.Suppliers;
|
||||
import com.google.common.collect.ForwardingSortedSet;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Range;
|
||||
import com.google.common.collect.RangeSet;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.collect.TreeRangeSet;
|
||||
import com.google.common.hash.Hasher;
|
||||
import com.google.common.hash.Hashing;
|
||||
|
@ -42,6 +42,7 @@ import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
|||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.ByteBufferUtils;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.guava.Comparators;
|
||||
|
@ -59,25 +60,26 @@ import org.apache.druid.segment.DimensionHandlerUtils;
|
|||
import org.apache.druid.segment.column.BitmapColumnIndex;
|
||||
import org.apache.druid.segment.column.ColumnIndexSupplier;
|
||||
import org.apache.druid.segment.column.StringValueSetIndex;
|
||||
import org.apache.druid.segment.column.Utf8ValueSetIndex;
|
||||
import org.apache.druid.segment.filter.Filters;
|
||||
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
||||
{
|
||||
// Values can contain `null` object
|
||||
private final Set<String> values;
|
||||
// Values can contain `null` object. Values are sorted (nulls-first).
|
||||
private final ValuesSet values;
|
||||
// Computed eagerly, not lazily, because lazy computations would block all processing threads for a given query.
|
||||
private final SortedSet<ByteBuffer> valuesUtf8;
|
||||
private final String dimension;
|
||||
@Nullable
|
||||
private final ExtractionFn extractionFn;
|
||||
|
@ -103,7 +105,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
@JsonProperty("dimension") String dimension,
|
||||
// This 'values' collection instance can be reused if possible to avoid copying a big collection.
|
||||
// Callers should _not_ modify the collection after it is passed to this constructor.
|
||||
@JsonProperty("values") Set<String> values,
|
||||
@JsonProperty("values") ValuesSet values,
|
||||
@JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn,
|
||||
@JsonProperty("filterTuning") @Nullable FilterTuning filterTuning
|
||||
)
|
||||
|
@ -121,32 +123,35 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
* Creates a new filter without an extraction function or any special filter tuning.
|
||||
*
|
||||
* @param dimension column to search
|
||||
* @param values set of values to match. This collection may be reused to avoid copying a big collection.
|
||||
* Therefore, callers should <b>not</b> modify the collection after it is passed to this
|
||||
* constructor.
|
||||
* @param values set of values to match. If this collection is a {@link SortedSet}, it may be reused to avoid
|
||||
* copying a big collection. Therefore, callers should <b>not</b> modify the collection after it
|
||||
* is passed to this constructor.
|
||||
*/
|
||||
public InDimFilter(
|
||||
String dimension,
|
||||
Set<String> values
|
||||
)
|
||||
public InDimFilter(String dimension, Set<String> values)
|
||||
{
|
||||
this(
|
||||
dimension,
|
||||
values,
|
||||
values instanceof ValuesSet ? (ValuesSet) values : new ValuesSet(values),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* This constructor should be called only in unit tests since accepting a Collection makes copying more likely.
|
||||
* Creates a new filter without an extraction function or any special filter tuning.
|
||||
*
|
||||
* @param dimension column to search
|
||||
* @param values set of values to match. If this collection is a {@link SortedSet}, it may be reused to avoid
|
||||
* copying a big collection. Therefore, callers should <b>not</b> modify the collection after it
|
||||
* is passed to this constructor.
|
||||
* @param extractionFn extraction function to apply to the column before checking against "values"
|
||||
*/
|
||||
@VisibleForTesting
|
||||
public InDimFilter(String dimension, Collection<String> values, @Nullable ExtractionFn extractionFn)
|
||||
{
|
||||
this(
|
||||
dimension,
|
||||
values instanceof Set ? (Set<String>) values : new HashSet<>(values),
|
||||
values instanceof ValuesSet ? (ValuesSet) values : new ValuesSet(values),
|
||||
extractionFn,
|
||||
null,
|
||||
null
|
||||
|
@ -158,7 +163,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
*/
|
||||
private InDimFilter(
|
||||
final String dimension,
|
||||
final Set<String> values,
|
||||
final ValuesSet values,
|
||||
@Nullable final ExtractionFn extractionFn,
|
||||
@Nullable final FilterTuning filterTuning,
|
||||
@Nullable final DruidPredicateFactory predicateFactory
|
||||
|
@ -166,19 +171,15 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
{
|
||||
Preconditions.checkNotNull(values, "values cannot be null");
|
||||
|
||||
// The values set can be huge. Try to avoid copying the set if possible.
|
||||
// Note that we may still need to copy values to a list for caching. See getCacheKey().
|
||||
this.values = values;
|
||||
|
||||
if (!NullHandling.sqlCompatible() && values.contains("")) {
|
||||
// In Non sql compatible mode, empty strings should be converted to nulls for the filter.
|
||||
// In sql compatible mode, empty strings and nulls should be treated differently
|
||||
this.values = Sets.newHashSetWithExpectedSize(values.size());
|
||||
for (String v : values) {
|
||||
this.values.add(NullHandling.emptyToNullIfNeeded(v));
|
||||
}
|
||||
} else {
|
||||
this.values = values;
|
||||
// In non-SQL-compatible mode, empty strings must be converted to nulls for the filter.
|
||||
this.values.remove("");
|
||||
this.values.add(null);
|
||||
}
|
||||
|
||||
this.valuesUtf8 = this.values.toUtf8();
|
||||
this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null");
|
||||
this.extractionFn = extractionFn;
|
||||
this.filterTuning = filterTuning;
|
||||
|
@ -199,7 +200,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
}
|
||||
|
||||
@JsonProperty
|
||||
public Set<String> getValues()
|
||||
public SortedSet<String> getValues()
|
||||
{
|
||||
return values;
|
||||
}
|
||||
|
@ -296,9 +297,15 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
selector
|
||||
);
|
||||
}
|
||||
final StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class);
|
||||
if (valueSetIndex != null) {
|
||||
return valueSetIndex.forValues(values);
|
||||
|
||||
final Utf8ValueSetIndex utf8ValueSetIndex = indexSupplier.as(Utf8ValueSetIndex.class);
|
||||
if (utf8ValueSetIndex != null) {
|
||||
return utf8ValueSetIndex.forSortedValuesUtf8(valuesUtf8);
|
||||
}
|
||||
|
||||
final StringValueSetIndex stringValueSetIndex = indexSupplier.as(StringValueSetIndex.class);
|
||||
if (stringValueSetIndex != null) {
|
||||
return stringValueSetIndex.forSortedValues(values);
|
||||
}
|
||||
}
|
||||
return Filters.makePredicateIndex(
|
||||
|
@ -399,20 +406,9 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
|
||||
private byte[] computeCacheKey()
|
||||
{
|
||||
final Collection<String> sortedValues;
|
||||
|
||||
if (values instanceof SortedSet && isNaturalOrder(((SortedSet<String>) values).comparator())) {
|
||||
// Avoid copying "values" when it is already in the order we need for cache key computation.
|
||||
sortedValues = values;
|
||||
} else {
|
||||
final List<String> sortedValuesList = new ArrayList<>(values);
|
||||
sortedValuesList.sort(Comparators.naturalNullsFirst());
|
||||
sortedValues = sortedValuesList;
|
||||
}
|
||||
|
||||
// Hash all values, in sorted order, as their length followed by their content.
|
||||
final Hasher hasher = Hashing.sha256().newHasher();
|
||||
for (String v : sortedValues) {
|
||||
for (String v : values) {
|
||||
if (v == null) {
|
||||
// Encode null as length -1, no content.
|
||||
hasher.putInt(-1);
|
||||
|
@ -438,7 +434,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
|
||||
LookupExtractor lookup = exFn.getLookup();
|
||||
|
||||
final Set<String> keys = new HashSet<>();
|
||||
final ValuesSet keys = new ValuesSet();
|
||||
for (String value : values) {
|
||||
|
||||
// We cannot do an unapply()-based optimization if the selector value
|
||||
|
@ -468,40 +464,11 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the comparator is null or the singleton {@link Comparators#naturalNullsFirst()}. Useful for
|
||||
* detecting if a sorted set is in natural order or not.
|
||||
*
|
||||
* May return false negatives (i.e. there are naturally-ordered comparators that will return false here).
|
||||
*/
|
||||
private static <T> boolean isNaturalOrder(@Nullable final Comparator<T> comparator)
|
||||
{
|
||||
return comparator == null || Comparators.naturalNullsFirst().equals(comparator);
|
||||
}
|
||||
|
||||
@SuppressWarnings("ReturnValueIgnored")
|
||||
private static Predicate<String> createStringPredicate(final Set<String> values)
|
||||
{
|
||||
Preconditions.checkNotNull(values, "values");
|
||||
|
||||
try {
|
||||
// Check to see if values.contains(null) will throw a NullPointerException. Jackson JSON deserialization won't
|
||||
// lead to this (it will create a HashSet, which can accept nulls). But when InDimFilters are created
|
||||
// programmatically as a result of optimizations like rewriting inner joins as filters, the passed-in Set may
|
||||
// not be able to accept nulls. We don't want to copy the Sets (since they may be large) so instead we'll wrap
|
||||
// it in a null-checking lambda if needed.
|
||||
values.contains(null);
|
||||
|
||||
// Safe to do values.contains(null).
|
||||
return values::contains;
|
||||
}
|
||||
catch (NullPointerException ignored) {
|
||||
// Fall through
|
||||
}
|
||||
|
||||
// Not safe to do values.contains(null); must return a wrapper.
|
||||
// Return false for null, since an exception means the set cannot accept null (and therefore does not include it).
|
||||
return value -> value != null && values.contains(value);
|
||||
return values::contains;
|
||||
}
|
||||
|
||||
private static DruidLongPredicate createLongPredicate(final Set<String> values)
|
||||
|
@ -558,7 +525,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
|
||||
InFilterDruidPredicateFactory(
|
||||
final ExtractionFn extractionFn,
|
||||
final Set<String> values
|
||||
final ValuesSet values
|
||||
)
|
||||
{
|
||||
this.extractionFn = extractionFn;
|
||||
|
@ -638,4 +605,50 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
return Objects.hash(extractionFn, values);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ValuesSet extends ForwardingSortedSet<String>
|
||||
{
|
||||
private final SortedSet<String> values;
|
||||
|
||||
public ValuesSet()
|
||||
{
|
||||
this.values = new TreeSet<>(Comparators.naturalNullsFirst());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a ValuesSet from another Collection. The Collection will be reused if it is a {@link SortedSet} with
|
||||
* an appropriate comparator.
|
||||
*/
|
||||
public ValuesSet(final Collection<String> values)
|
||||
{
|
||||
if (values instanceof SortedSet && Comparators.naturalNullsFirst()
|
||||
.equals(((SortedSet<String>) values).comparator())) {
|
||||
this.values = (SortedSet<String>) values;
|
||||
} else {
|
||||
this.values = new TreeSet<>(Comparators.naturalNullsFirst());
|
||||
this.values.addAll(values);
|
||||
}
|
||||
}
|
||||
|
||||
public SortedSet<ByteBuffer> toUtf8()
|
||||
{
|
||||
final TreeSet<ByteBuffer> valuesUtf8 = new TreeSet<>(ByteBufferUtils.unsignedComparator());
|
||||
|
||||
for (final String value : values) {
|
||||
if (value == null) {
|
||||
valuesUtf8.add(null);
|
||||
} else {
|
||||
valuesUtf8.add(ByteBuffer.wrap(StringUtils.toUtf8(value)));
|
||||
}
|
||||
}
|
||||
|
||||
return valuesUtf8;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SortedSet<String> delegate()
|
||||
{
|
||||
return values;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -294,9 +294,8 @@ public class LikeDimFilter extends AbstractOptimizableDimFilter implements DimFi
|
|||
}
|
||||
|
||||
/**
|
||||
* Checks if the suffix of strings.get(i) matches the suffix of this matcher. The first prefix.length characters
|
||||
* of s are ignored. This method is useful if you've already independently verified the prefix. This method
|
||||
* evalutes strings.get(i) lazily to save time when it isn't necessary to actually look at the string.
|
||||
* Checks if the suffix of "value" matches the suffix of this matcher. The first prefix.length() characters
|
||||
* of "value" are ignored. This method is useful if you've already independently verified the prefix.
|
||||
*/
|
||||
public boolean matchesSuffixOnly(@Nullable String value)
|
||||
{
|
||||
|
|
|
@ -34,7 +34,6 @@ import org.apache.druid.segment.filter.DimensionPredicateFilter;
|
|||
import org.apache.druid.segment.filter.SelectorFilter;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -96,7 +95,9 @@ public class SelectorDimFilter extends AbstractOptimizableDimFilter implements D
|
|||
@Override
|
||||
public DimFilter optimize()
|
||||
{
|
||||
return new InDimFilter(dimension, Collections.singleton(value), extractionFn, filterTuning).optimize();
|
||||
final InDimFilter.ValuesSet valuesSet = new InDimFilter.ValuesSet();
|
||||
valuesSet.add(value);
|
||||
return new InDimFilter(dimension, valuesSet, extractionFn, filterTuning).optimize();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,9 +20,9 @@
|
|||
package org.apache.druid.query.topn;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
import org.apache.druid.java.util.common.granularity.Granularity;
|
||||
import org.apache.druid.java.util.common.guava.Comparators;
|
||||
import org.apache.druid.query.BaseQuery;
|
||||
import org.apache.druid.query.DataSource;
|
||||
import org.apache.druid.query.TableDataSource;
|
||||
|
@ -45,6 +45,7 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
|
@ -234,7 +235,8 @@ public class TopNQueryBuilder
|
|||
|
||||
public TopNQueryBuilder filters(String dimensionName, String value, String... values)
|
||||
{
|
||||
final Set<String> filterValues = Sets.newHashSet(values);
|
||||
final Set<String> filterValues = new TreeSet<>(Comparators.naturalNullsFirst());
|
||||
filterValues.addAll(Arrays.asList(values));
|
||||
filterValues.add(value);
|
||||
dimFilter = new InDimFilter(dimensionName, filterValues);
|
||||
return this;
|
||||
|
|
|
@ -460,6 +460,7 @@ public class IndexIO
|
|||
new DictionaryEncodedStringIndexSupplier(
|
||||
new ConciseBitmapFactory(),
|
||||
index.getDimValueLookup(dimension),
|
||||
index.getDimValueUtf8Lookup(dimension),
|
||||
bitmaps,
|
||||
spatialIndex
|
||||
),
|
||||
|
|
|
@ -39,6 +39,8 @@ public interface ColumnIndexSupplier
|
|||
* {@link org.apache.druid.segment.data.Offset} to form the basis of a {@link org.apache.druid.segment.Cursor}
|
||||
* (or {@link org.apache.druid.segment.vector.VectorOffset} and {@link org.apache.druid.segment.vector.VectorCursor})
|
||||
* which can greatly reduce the total number of rows which need to be scanned and processed.
|
||||
*
|
||||
* Objects returned by this method are not thread-safe.
|
||||
*/
|
||||
@Nullable
|
||||
<T> T as(Class<T> clazz);
|
||||
|
|
|
@ -30,21 +30,21 @@ import javax.annotation.Nullable;
|
|||
public interface LexicographicalRangeIndex
|
||||
{
|
||||
/**
|
||||
* Get an {@link BitmapColumnIndex} corresponding to the values supplied in the specified range
|
||||
* Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range.
|
||||
*/
|
||||
default BitmapColumnIndex forRange(
|
||||
BitmapColumnIndex forRange(
|
||||
@Nullable String startValue,
|
||||
boolean startStrict,
|
||||
@Nullable String endValue,
|
||||
boolean endStrict
|
||||
)
|
||||
{
|
||||
return forRange(startValue, startStrict, endValue, endStrict, (index) -> true);
|
||||
}
|
||||
);
|
||||
|
||||
/**
|
||||
* Get an {@link BitmapColumnIndex} corresponding to the values supplied in the specified range whose dictionary ids
|
||||
* also match some predicate, such as to match a prefix
|
||||
* Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range whose dictionary ids
|
||||
* also match some predicate, such as to match a prefix.
|
||||
*
|
||||
* If the provided {@code} matcher is always true, it's better to use the other
|
||||
* {@link #forRange(String, boolean, String, boolean)} method.
|
||||
*/
|
||||
BitmapColumnIndex forRange(
|
||||
@Nullable String startValue,
|
||||
|
|
|
@ -22,7 +22,7 @@ package org.apache.druid.segment.column;
|
|||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
|
||||
/**
|
||||
* Index on individual values, and provides bitmaps for the rows which contain these values
|
||||
|
@ -36,7 +36,8 @@ public interface StringValueSetIndex
|
|||
|
||||
/**
|
||||
* Get an {@link Iterable} of {@link ImmutableBitmap} corresponding to the specified set of values (if they are
|
||||
* contained in the underlying column)
|
||||
* contained in the underlying column). The set must be sorted using
|
||||
* {@link org.apache.druid.java.util.common.guava.Comparators#naturalNullsFirst()}.
|
||||
*/
|
||||
BitmapColumnIndex forValues(Set<String> values);
|
||||
BitmapColumnIndex forSortedValues(SortedSet<String> values);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.column;
|
||||
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.SortedSet;
|
||||
|
||||
public interface Utf8ValueSetIndex
|
||||
{
|
||||
/**
|
||||
* Get an {@link Iterable} of {@link ImmutableBitmap} corresponding to the specified set of values (if they are
|
||||
* contained in the underlying column). The set must be sorted using
|
||||
* {@link org.apache.druid.java.util.common.ByteBufferUtils#unsignedComparator()}.
|
||||
*/
|
||||
BitmapColumnIndex forSortedValuesUtf8(SortedSet<ByteBuffer> valuesUtf8);
|
||||
}
|
|
@ -28,6 +28,7 @@ import org.apache.druid.extendedset.intset.ImmutableConciseSet;
|
|||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class ConciseBitmapSerdeFactory implements BitmapSerdeFactory
|
||||
{
|
||||
|
@ -70,6 +71,12 @@ public class ConciseBitmapSerdeFactory implements BitmapSerdeFactory
|
|||
return val.toBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canCompare()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(ImmutableBitmap o1, ImmutableBitmap o2)
|
||||
{
|
||||
|
|
|
@ -70,6 +70,12 @@ public class DecompressingByteBufferObjectStrategy implements ObjectStrategy<Res
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canCompare()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(ResourceHolder<ByteBuffer> o1, ResourceHolder<ByteBuffer> o2)
|
||||
{
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.druid.collections.ResourceHolder;
|
|||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.common.utils.SerializerUtils;
|
||||
import org.apache.druid.io.Channels;
|
||||
import org.apache.druid.java.util.common.ByteBufferUtils;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.guava.Comparators;
|
||||
|
@ -132,9 +133,9 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
}
|
||||
|
||||
@Override
|
||||
public int compare(ByteBuffer o1, ByteBuffer o2)
|
||||
public int compare(@Nullable ByteBuffer o1, @Nullable ByteBuffer o2)
|
||||
{
|
||||
return o1.compareTo(o2);
|
||||
return ByteBufferUtils.unsignedComparator().compare(o1, o2);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -217,7 +218,7 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
|
||||
public static <T> GenericIndexed<T> fromIterable(Iterable<T> objectsIterable, ObjectStrategy<T> strategy)
|
||||
{
|
||||
return fromIterableVersionOne(objectsIterable, strategy, true, strategy);
|
||||
return fromIterableVersionOne(objectsIterable, strategy, strategy.canCompare(), strategy);
|
||||
}
|
||||
|
||||
static int getNumberOfFilesRequired(int bagSize, long numWritten)
|
||||
|
@ -350,17 +351,13 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
if (!allowReverseLookup) {
|
||||
throw new UnsupportedOperationException("Reverse lookup not allowed.");
|
||||
}
|
||||
return indexOf(this, value);
|
||||
}
|
||||
|
||||
private int indexOf(Indexed<T> indexed, @Nullable T value)
|
||||
{
|
||||
int minIndex = 0;
|
||||
int maxIndex = size - 1;
|
||||
while (minIndex <= maxIndex) {
|
||||
int currIndex = (minIndex + maxIndex) >>> 1;
|
||||
|
||||
T currValue = indexed.get(currIndex);
|
||||
T currValue = get(currIndex);
|
||||
int comparison = strategy.compare(currValue, value);
|
||||
if (comparison == 0) {
|
||||
return currIndex;
|
||||
|
@ -458,6 +455,9 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Single-threaded view.
|
||||
*/
|
||||
abstract class BufferIndexed implements Indexed<T>
|
||||
{
|
||||
int lastReadSize;
|
||||
|
@ -468,8 +468,23 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T get(final int index)
|
||||
{
|
||||
final ByteBuffer buf = getByteBuffer(index);
|
||||
if (buf == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Traditionally, ObjectStrategy.fromByteBuffer() is given a buffer with limit set to capacity, and the
|
||||
// actual limit is passed along as an extra parameter.
|
||||
final int len = buf.remaining();
|
||||
buf.limit(buf.capacity());
|
||||
return strategy.fromByteBuffer(buf, len);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
T bufferedIndexedGet(ByteBuffer copyValueBuffer, int startOffset, int endOffset)
|
||||
ByteBuffer bufferedIndexedGetByteBuffer(ByteBuffer copyValueBuffer, int startOffset, int endOffset)
|
||||
{
|
||||
int size = endOffset - startOffset;
|
||||
// When size is 0 and SQL compatibility is enabled also check for null marker before returning null.
|
||||
|
@ -481,14 +496,22 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
lastReadSize = size;
|
||||
|
||||
// ObjectStrategy.fromByteBuffer() is allowed to reset the limit of the buffer. So if the limit is changed,
|
||||
// position() call in the next line could throw an exception, if the position is set beyond the new limit. clear()
|
||||
// sets the limit to the maximum possible, the capacity. It is safe to reset the limit to capacity, because the
|
||||
// value buffer(s) initial limit equals to capacity.
|
||||
copyValueBuffer.clear();
|
||||
// position() call could throw an exception, if the position is set beyond the new limit. Calling limit()
|
||||
// followed by position() is safe, because limit() resets position if needed.
|
||||
copyValueBuffer.limit(endOffset);
|
||||
copyValueBuffer.position(startOffset);
|
||||
return strategy.fromByteBuffer(copyValueBuffer, size);
|
||||
return copyValueBuffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Like {@link #get(int)}, but returns a {@link ByteBuffer} instead of using the {@link ObjectStrategy}.
|
||||
*
|
||||
* The returned ByteBuffer is reused by future calls. Callers must discard it before calling another method
|
||||
* on this BufferedIndexed object that may want to reuse the buffer.
|
||||
*/
|
||||
@Nullable
|
||||
protected abstract ByteBuffer getByteBuffer(int index);
|
||||
|
||||
/**
|
||||
* This method makes no guarantees with respect to thread safety
|
||||
*
|
||||
|
@ -502,7 +525,41 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
@Override
|
||||
public int indexOf(@Nullable T value)
|
||||
{
|
||||
return GenericIndexed.this.indexOf(this, value);
|
||||
if (!allowReverseLookup) {
|
||||
throw new UnsupportedOperationException("Reverse lookup not allowed.");
|
||||
}
|
||||
|
||||
//noinspection ObjectEquality
|
||||
final boolean isByteBufferStrategy = strategy == BYTE_BUFFER_STRATEGY;
|
||||
|
||||
int minIndex = 0;
|
||||
int maxIndex = size - 1;
|
||||
while (minIndex <= maxIndex) {
|
||||
int currIndex = (minIndex + maxIndex) >>> 1;
|
||||
|
||||
int comparison;
|
||||
|
||||
if (isByteBufferStrategy) {
|
||||
// Specialization avoids ByteBuffer allocation in strategy.fromByteBuffer.
|
||||
ByteBuffer currValue = getByteBuffer(currIndex);
|
||||
comparison = ByteBufferUtils.compareByteBuffers(currValue, (ByteBuffer) value);
|
||||
} else {
|
||||
T currValue = get(currIndex);
|
||||
comparison = strategy.compare(currValue, value);
|
||||
}
|
||||
|
||||
if (comparison == 0) {
|
||||
return currIndex;
|
||||
}
|
||||
|
||||
if (comparison < 0) {
|
||||
minIndex = currIndex + 1;
|
||||
} else {
|
||||
maxIndex = currIndex - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return -(minIndex + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -625,8 +682,9 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
final ByteBuffer copyBuffer = firstValueBuffer.asReadOnlyBuffer();
|
||||
return new BufferIndexed()
|
||||
{
|
||||
@Nullable
|
||||
@Override
|
||||
public T get(final int index)
|
||||
protected ByteBuffer getByteBuffer(final int index)
|
||||
{
|
||||
checkIndex(index);
|
||||
|
||||
|
@ -641,7 +699,7 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
startOffset = headerBuffer.getInt(headerPosition) + Integer.BYTES;
|
||||
endOffset = headerBuffer.getInt(headerPosition + Integer.BYTES);
|
||||
}
|
||||
return bufferedIndexedGet(copyBuffer, startOffset, endOffset);
|
||||
return bufferedIndexedGetByteBuffer(copyBuffer, startOffset, endOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -734,8 +792,9 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
|
||||
return new BufferIndexed()
|
||||
{
|
||||
@Nullable
|
||||
@Override
|
||||
public T get(final int index)
|
||||
protected ByteBuffer getByteBuffer(int index)
|
||||
{
|
||||
checkIndex(index);
|
||||
|
||||
|
@ -753,7 +812,7 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|
|||
endOffset = headerBuffer.getInt(headerPosition + Integer.BYTES);
|
||||
}
|
||||
int fileNum = index >> logBaseTwoOfElementsPerValueFile;
|
||||
return bufferedIndexedGet(copyValueBuffers[fileNum], startOffset, endOffset);
|
||||
return bufferedIndexedGetByteBuffer(copyValueBuffers[fileNum], startOffset, endOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -124,6 +124,12 @@ public class GenericIndexedWriter<T> implements Serializer
|
|||
val.position(valPos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canCompare()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(ByteBuffer o1, ByteBuffer o2)
|
||||
{
|
||||
|
|
|
@ -51,6 +51,14 @@ public interface ObjectStrategy<T> extends Comparator<T>
|
|||
@Nullable
|
||||
byte[] toBytes(@Nullable T val);
|
||||
|
||||
/**
|
||||
* Whether {@link #compare} is valid or not.
|
||||
*/
|
||||
default boolean canCompare()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads 4-bytes numBytes from the given buffer, and then delegates to {@link #fromByteBuffer(ByteBuffer, int)}.
|
||||
*/
|
||||
|
|
|
@ -95,6 +95,12 @@ public class RoaringBitmapSerdeFactory implements BitmapSerdeFactory
|
|||
return val.toBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canCompare()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(ImmutableBitmap o1, ImmutableBitmap o2)
|
||||
{
|
||||
|
|
|
@ -94,7 +94,12 @@ public class LikeFilter implements Filter
|
|||
if (rangeIndex != null) {
|
||||
final String lower = NullHandling.nullToEmptyIfNeeded(likeMatcher.getPrefix());
|
||||
final String upper = NullHandling.nullToEmptyIfNeeded(likeMatcher.getPrefix()) + Character.MAX_VALUE;
|
||||
return rangeIndex.forRange(lower, false, upper, false, likeMatcher::matchesSuffixOnly);
|
||||
|
||||
if (likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_ANY) {
|
||||
return rangeIndex.forRange(lower, false, upper, false);
|
||||
} else {
|
||||
return rangeIndex.forRange(lower, false, upper, false, likeMatcher::matchesSuffixOnly);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -71,10 +71,10 @@ public interface Joinable extends ReferenceCountedObject
|
|||
* @param condition join condition for the matcher
|
||||
* @param remainderNeeded whether or not {@link JoinMatcher#matchRemainder()} will ever be called on the
|
||||
* matcher. If we know it will not, additional optimizations are often possible.
|
||||
*
|
||||
* @param descending true if join cursor is iterated in descending order
|
||||
* @param closer closer that will run after join cursor has completed to clean up any per query
|
||||
* resources the joinable uses
|
||||
*
|
||||
* @return the matcher
|
||||
*/
|
||||
JoinMatcher makeJoinMatcher(
|
||||
|
@ -89,6 +89,10 @@ public interface Joinable extends ReferenceCountedObject
|
|||
* Returns all nonnull values from a particular column if they are all unique, if there are "maxNumValues" or fewer,
|
||||
* and if the column exists and supports this operation. Otherwise, returns an empty Optional.
|
||||
*
|
||||
* The returned set may be passed to {@link org.apache.druid.query.filter.InDimFilter}. For efficiency,
|
||||
* implementations should prefer creating the returned set with
|
||||
* {@code new TreeSet<String>(Comparators.naturalNullsFirst()}}. This avoids a copy in the filter's constructor.
|
||||
*
|
||||
* @param columnName name of the column
|
||||
* @param maxNumValues maximum number of values to return
|
||||
*/
|
||||
|
@ -98,13 +102,18 @@ public interface Joinable extends ReferenceCountedObject
|
|||
* Searches a column from this Joinable for a particular value, finds rows that match,
|
||||
* and returns values of a second column for those rows.
|
||||
*
|
||||
* @param searchColumnName Name of the search column. This is the column that is being used in the filter
|
||||
* @param searchColumnValue Target value of the search column. This is the value that is being filtered on.
|
||||
* @param retrievalColumnName The column to retrieve values from. This is the column that is being joined against.
|
||||
* @param maxCorrelationSetSize Maximum number of values to retrieve. If we detect that more values would be
|
||||
* returned than this limit, return absent.
|
||||
* The returned set may be passed to {@link org.apache.druid.query.filter.InDimFilter}. For efficiency,
|
||||
* implementations should prefer creating the returned set with
|
||||
* {@code new TreeSet<String>(Comparators.naturalNullsFirst()}}. This avoids a copy in the filter's constructor.
|
||||
*
|
||||
* @param searchColumnName Name of the search column. This is the column that is being used in the filter
|
||||
* @param searchColumnValue Target value of the search column. This is the value that is being filtered on.
|
||||
* @param retrievalColumnName The column to retrieve values from. This is the column that is being joined against.
|
||||
* @param maxCorrelationSetSize Maximum number of values to retrieve. If we detect that more values would be
|
||||
* returned than this limit, return absent.
|
||||
* @param allowNonKeyColumnSearch If true, allow searchs on non-key columns. If this is false,
|
||||
* a search on a non-key column returns absent.
|
||||
*
|
||||
* @return The set of correlated column values. If we cannot determine correlated values, return absent.
|
||||
*
|
||||
* In case either the search or retrieval column names are not found, this will return absent.
|
||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.druid.segment.join.Joinable;
|
|||
import javax.annotation.Nullable;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
@ -104,10 +103,7 @@ public class IndexedTableJoinable implements Joinable
|
|||
try (final IndexedTable.Reader reader = table.columnReader(columnPosition)) {
|
||||
// Sorted set to encourage "in" filters that result from this method to do dictionary lookups in order.
|
||||
// The hopes are that this will improve locality and therefore improve performance.
|
||||
//
|
||||
// Note: we are using Comparators.naturalNullsFirst() because it prevents the need for lambda-wrapping in
|
||||
// InDimFilter's "createStringPredicate" method.
|
||||
final Set<String> allValues = new TreeSet<>(Comparators.naturalNullsFirst());
|
||||
final Set<String> allValues = createValuesSet();
|
||||
|
||||
for (int i = 0; i < table.numRows(); i++) {
|
||||
final String s = DimensionHandlerUtils.convertObjectToString(reader.read(i));
|
||||
|
@ -147,7 +143,7 @@ public class IndexedTableJoinable implements Joinable
|
|||
return Optional.empty();
|
||||
}
|
||||
try (final Closer closer = Closer.create()) {
|
||||
Set<String> correlatedValues = new HashSet<>();
|
||||
Set<String> correlatedValues = createValuesSet();
|
||||
if (table.keyColumns().contains(searchColumnName)) {
|
||||
IndexedTable.Index index = table.columnIndex(filterColumnPosition);
|
||||
IndexedTable.Reader reader = table.columnReader(correlatedColumnPosition);
|
||||
|
@ -196,4 +192,12 @@ public class IndexedTableJoinable implements Joinable
|
|||
{
|
||||
return table.acquireReferences();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Set that InDimFilter will accept without incurring a copy.
|
||||
*/
|
||||
private static Set<String> createValuesSet()
|
||||
{
|
||||
return new TreeSet<>(Comparators.naturalNullsFirst());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -365,6 +365,7 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
|
|||
new DictionaryEncodedStringIndexSupplier(
|
||||
bitmapSerdeFactory.getBitmapFactory(),
|
||||
rDictionary,
|
||||
rDictionaryUtf8,
|
||||
rBitmaps,
|
||||
rSpatialIndex
|
||||
),
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.apache.druid.segment.serde;
|
||||
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.Iterables;
|
||||
import it.unimi.dsi.fastutil.ints.IntIntImmutablePair;
|
||||
import it.unimi.dsi.fastutil.ints.IntIntPair;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
|
@ -27,6 +28,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory;
|
|||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.spatial.ImmutableRTree;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.query.BitmapResultFactory;
|
||||
import org.apache.druid.query.filter.DruidPredicateFactory;
|
||||
import org.apache.druid.segment.IntListUtils;
|
||||
|
@ -39,13 +41,16 @@ import org.apache.druid.segment.column.LexicographicalRangeIndex;
|
|||
import org.apache.druid.segment.column.SimpleColumnIndexCapabilities;
|
||||
import org.apache.druid.segment.column.SpatialIndex;
|
||||
import org.apache.druid.segment.column.StringValueSetIndex;
|
||||
import org.apache.druid.segment.column.Utf8ValueSetIndex;
|
||||
import org.apache.druid.segment.data.GenericIndexed;
|
||||
import org.apache.druid.segment.data.Indexed;
|
||||
import org.apache.druid.segment.filter.Filters;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
|
||||
public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
||||
{
|
||||
|
@ -53,6 +58,7 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
|
||||
private final BitmapFactory bitmapFactory;
|
||||
private final GenericIndexed<String> dictionary;
|
||||
private final GenericIndexed<ByteBuffer> dictionaryUtf8;
|
||||
@Nullable
|
||||
private final GenericIndexed<ImmutableBitmap> bitmaps;
|
||||
@Nullable
|
||||
|
@ -61,29 +67,38 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
public DictionaryEncodedStringIndexSupplier(
|
||||
BitmapFactory bitmapFactory,
|
||||
GenericIndexed<String> dictionary,
|
||||
GenericIndexed<ImmutableBitmap> bitmaps,
|
||||
ImmutableRTree indexedTree
|
||||
GenericIndexed<ByteBuffer> dictionaryUtf8,
|
||||
@Nullable GenericIndexed<ImmutableBitmap> bitmaps,
|
||||
@Nullable ImmutableRTree indexedTree
|
||||
)
|
||||
{
|
||||
this.bitmapFactory = bitmapFactory;
|
||||
this.bitmaps = bitmaps;
|
||||
this.dictionary = dictionary;
|
||||
this.dictionaryUtf8 = dictionaryUtf8;
|
||||
this.bitmaps = bitmaps;
|
||||
this.indexedTree = indexedTree;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public <T> T as(Class<T> clazz)
|
||||
{
|
||||
if (clazz.equals(StringValueSetIndex.class)) {
|
||||
return (T) new GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory, dictionary, bitmaps);
|
||||
} else if (clazz.equals(DruidPredicateIndex.class)) {
|
||||
if (bitmaps != null && clazz.equals(StringValueSetIndex.class)) {
|
||||
return (T) new GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory, dictionaryUtf8, bitmaps);
|
||||
} else if (bitmaps != null && clazz.equals(Utf8ValueSetIndex.class)) {
|
||||
return (T) new GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory, dictionaryUtf8, bitmaps);
|
||||
} else if (bitmaps != null && clazz.equals(DruidPredicateIndex.class)) {
|
||||
return (T) new GenericIndexedDictionaryEncodedStringDruidPredicateIndex(bitmapFactory, dictionary, bitmaps);
|
||||
} else if (clazz.equals(LexicographicalRangeIndex.class)) {
|
||||
return (T) new GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(bitmapFactory, dictionary, bitmaps);
|
||||
} else if (clazz.equals(DictionaryEncodedStringValueIndex.class)) {
|
||||
} else if (bitmaps != null && clazz.equals(LexicographicalRangeIndex.class)) {
|
||||
return (T) new GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(
|
||||
bitmapFactory,
|
||||
dictionaryUtf8,
|
||||
bitmaps
|
||||
);
|
||||
} else if (bitmaps != null && clazz.equals(DictionaryEncodedStringValueIndex.class)) {
|
||||
return (T) new GenericIndexedDictionaryEncodedStringValueIndex(bitmapFactory, dictionary, bitmaps);
|
||||
} else if (clazz.equals(SpatialIndex.class)) {
|
||||
} else if (indexedTree != null && clazz.equals(SpatialIndex.class)) {
|
||||
return (T) (SpatialIndex) () -> indexedTree;
|
||||
}
|
||||
return null;
|
||||
|
@ -98,21 +113,21 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
}
|
||||
}
|
||||
|
||||
private abstract static class BaseGenericIndexedDictionaryEncodedStringIndex
|
||||
private abstract static class BaseGenericIndexedDictionaryEncodedIndex<T>
|
||||
{
|
||||
protected final BitmapFactory bitmapFactory;
|
||||
protected final GenericIndexed<String> dictionary;
|
||||
protected final GenericIndexed<ImmutableBitmap> bitmaps;
|
||||
protected final Indexed<T> dictionary;
|
||||
protected final Indexed<ImmutableBitmap> bitmaps;
|
||||
|
||||
protected BaseGenericIndexedDictionaryEncodedStringIndex(
|
||||
protected BaseGenericIndexedDictionaryEncodedIndex(
|
||||
BitmapFactory bitmapFactory,
|
||||
GenericIndexed<String> dictionary,
|
||||
GenericIndexed<T> dictionary,
|
||||
GenericIndexed<ImmutableBitmap> bitmaps
|
||||
)
|
||||
{
|
||||
this.bitmapFactory = bitmapFactory;
|
||||
this.dictionary = dictionary;
|
||||
this.bitmaps = bitmaps;
|
||||
this.dictionary = dictionary.singleThreaded();
|
||||
this.bitmaps = bitmaps.singleThreaded();
|
||||
}
|
||||
|
||||
public ImmutableBitmap getBitmap(int idx)
|
||||
|
@ -127,7 +142,7 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
}
|
||||
|
||||
public static final class GenericIndexedDictionaryEncodedStringValueIndex
|
||||
extends BaseGenericIndexedDictionaryEncodedStringIndex implements DictionaryEncodedStringValueIndex
|
||||
extends BaseGenericIndexedDictionaryEncodedIndex<String> implements DictionaryEncodedStringValueIndex
|
||||
{
|
||||
public GenericIndexedDictionaryEncodedStringValueIndex(
|
||||
BitmapFactory bitmapFactory,
|
||||
|
@ -165,12 +180,13 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
}
|
||||
|
||||
public static final class GenericIndexedDictionaryEncodedStringValueSetIndex
|
||||
extends BaseGenericIndexedDictionaryEncodedStringIndex implements StringValueSetIndex
|
||||
extends BaseGenericIndexedDictionaryEncodedIndex<ByteBuffer> implements StringValueSetIndex, Utf8ValueSetIndex
|
||||
{
|
||||
private static final int SIZE_WORTH_CHECKING_MIN = 8;
|
||||
|
||||
public GenericIndexedDictionaryEncodedStringValueSetIndex(
|
||||
BitmapFactory bitmapFactory,
|
||||
GenericIndexed<String> dictionary,
|
||||
GenericIndexed<ByteBuffer> dictionary,
|
||||
GenericIndexed<ImmutableBitmap> bitmaps
|
||||
)
|
||||
{
|
||||
|
@ -197,14 +213,43 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
|
||||
private ImmutableBitmap getBitmapForValue()
|
||||
{
|
||||
final int idx = dictionary.indexOf(value);
|
||||
final ByteBuffer valueUtf8 = value == null ? null : ByteBuffer.wrap(StringUtils.toUtf8(value));
|
||||
final int idx = dictionary.indexOf(valueUtf8);
|
||||
return getBitmap(idx);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitmapColumnIndex forValues(Set<String> values)
|
||||
public BitmapColumnIndex forSortedValues(SortedSet<String> values)
|
||||
{
|
||||
return getBitmapColumnIndexForSortedIterableUtf8(
|
||||
Iterables.transform(
|
||||
values,
|
||||
input -> ByteBuffer.wrap(StringUtils.toUtf8(input))
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitmapColumnIndex forSortedValuesUtf8(SortedSet<ByteBuffer> valuesUtf8)
|
||||
{
|
||||
final SortedSet<ByteBuffer> tailSet;
|
||||
|
||||
if (valuesUtf8.size() >= SIZE_WORTH_CHECKING_MIN) {
|
||||
final ByteBuffer minValueInColumn = dictionary.get(0);
|
||||
tailSet = valuesUtf8.tailSet(minValueInColumn);
|
||||
} else {
|
||||
tailSet = valuesUtf8;
|
||||
}
|
||||
|
||||
return getBitmapColumnIndexForSortedIterableUtf8(tailSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper used by {@link #forSortedValues} and {@link #forSortedValuesUtf8}.
|
||||
*/
|
||||
private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8(Iterable<ByteBuffer> valuesUtf8)
|
||||
{
|
||||
return new DictionaryEncodedStringBitmapColumnIndex()
|
||||
{
|
||||
|
@ -222,9 +267,11 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
|
||||
private Iterable<ImmutableBitmap> getBitmapsIterable()
|
||||
{
|
||||
final int dictionarySize = dictionary.size();
|
||||
|
||||
return () -> new Iterator<ImmutableBitmap>()
|
||||
{
|
||||
final Iterator<String> iterator = values.iterator();
|
||||
final Iterator<ByteBuffer> iterator = valuesUtf8.iterator();
|
||||
int next = -1;
|
||||
|
||||
@Override
|
||||
|
@ -253,8 +300,15 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
private void findNext()
|
||||
{
|
||||
while (next < 0 && iterator.hasNext()) {
|
||||
String nextValue = iterator.next();
|
||||
ByteBuffer nextValue = iterator.next();
|
||||
next = dictionary.indexOf(nextValue);
|
||||
|
||||
if (next == -dictionarySize - 1) {
|
||||
// nextValue is past the end of the dictionary.
|
||||
// Note: we can rely on indexOf returning (-(insertion point) - 1), even though Indexed doesn't
|
||||
// guarantee it, because "dictionary" comes from GenericIndexed singleThreaded().
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -264,7 +318,7 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
}
|
||||
|
||||
public static final class GenericIndexedDictionaryEncodedStringDruidPredicateIndex
|
||||
extends BaseGenericIndexedDictionaryEncodedStringIndex implements DruidPredicateIndex
|
||||
extends BaseGenericIndexedDictionaryEncodedIndex<String> implements DruidPredicateIndex
|
||||
{
|
||||
public GenericIndexedDictionaryEncodedStringDruidPredicateIndex(
|
||||
BitmapFactory bitmapFactory,
|
||||
|
@ -350,12 +404,12 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
}
|
||||
|
||||
public static final class GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex
|
||||
extends BaseGenericIndexedDictionaryEncodedStringIndex implements LexicographicalRangeIndex
|
||||
extends BaseGenericIndexedDictionaryEncodedIndex<ByteBuffer> implements LexicographicalRangeIndex
|
||||
{
|
||||
|
||||
public GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(
|
||||
BitmapFactory bitmapFactory,
|
||||
GenericIndexed<String> dictionary,
|
||||
GenericIndexed<ByteBuffer> dictionary,
|
||||
GenericIndexed<ImmutableBitmap> bitmaps
|
||||
)
|
||||
{
|
||||
|
@ -447,7 +501,7 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
|
||||
private int findNext()
|
||||
{
|
||||
while (currIndex < end && !matcher.apply(dictionary.get(currIndex))) {
|
||||
while (currIndex < end && !applyMatcher(dictionary.get(currIndex))) {
|
||||
currIndex++;
|
||||
}
|
||||
|
||||
|
@ -478,16 +532,32 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
private boolean applyMatcher(@Nullable final ByteBuffer valueUtf8)
|
||||
{
|
||||
if (valueUtf8 == null) {
|
||||
return matcher.apply(null);
|
||||
} else {
|
||||
// Duplicate buffer, because StringUtils.fromUtf8 advances the position, and we do not want to do that.
|
||||
return matcher.apply(StringUtils.fromUtf8(valueUtf8.duplicate()));
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private IntIntPair getRange(@Nullable String startValue, boolean startStrict, @Nullable String endValue, boolean endStrict)
|
||||
private IntIntPair getRange(
|
||||
@Nullable String startValue,
|
||||
boolean startStrict,
|
||||
@Nullable String endValue,
|
||||
boolean endStrict
|
||||
)
|
||||
{
|
||||
int startIndex, endIndex;
|
||||
if (startValue == null) {
|
||||
startIndex = 0;
|
||||
} else {
|
||||
final int found = dictionary.indexOf(NullHandling.emptyToNullIfNeeded(startValue));
|
||||
final String startValueToUse = NullHandling.emptyToNullIfNeeded(startValue);
|
||||
final int found = dictionary.indexOf(StringUtils.toUtf8ByteBuffer(startValueToUse));
|
||||
if (found >= 0) {
|
||||
startIndex = startStrict ? found + 1 : found;
|
||||
} else {
|
||||
|
@ -498,7 +568,8 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
|
|||
if (endValue == null) {
|
||||
endIndex = dictionary.size();
|
||||
} else {
|
||||
final int found = dictionary.indexOf(NullHandling.emptyToNullIfNeeded(endValue));
|
||||
final String endValueToUse = NullHandling.emptyToNullIfNeeded(endValue);
|
||||
final int found = dictionary.indexOf(StringUtils.toUtf8ByteBuffer(endValueToUse));
|
||||
if (found >= 0) {
|
||||
endIndex = endStrict ? found : found + 1;
|
||||
} else {
|
||||
|
|
|
@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
|
|||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Predicates;
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.query.BitmapResultFactory;
|
||||
|
@ -60,6 +61,7 @@ import java.util.List;
|
|||
import java.util.NoSuchElementException;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
|
||||
/**
|
||||
* {@link VirtualColumn} form of {@link ListFilteredDimensionSpec}, powered by
|
||||
|
@ -397,7 +399,7 @@ public class ListFilteredVirtualColumn implements VirtualColumn
|
|||
}
|
||||
|
||||
@Override
|
||||
public BitmapColumnIndex forValues(Set<String> values)
|
||||
public BitmapColumnIndex forSortedValues(SortedSet<String> values)
|
||||
{
|
||||
return new BaseVirtualIndex()
|
||||
{
|
||||
|
@ -505,6 +507,17 @@ public class ListFilteredVirtualColumn implements VirtualColumn
|
|||
super(delegate, idMapping);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitmapColumnIndex forRange(
|
||||
@Nullable String startValue,
|
||||
boolean startStrict,
|
||||
@Nullable String endValue,
|
||||
boolean endStrict
|
||||
)
|
||||
{
|
||||
return forRange(startValue, startStrict, endValue, endStrict, Predicates.alwaysTrue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitmapColumnIndex forRange(
|
||||
@Nullable String startValue,
|
||||
|
|
|
@ -31,11 +31,20 @@ import org.apache.druid.jackson.DefaultObjectMapper;
|
|||
import org.apache.druid.query.extraction.RegexDimExtractionFn;
|
||||
import org.apache.druid.segment.RowAdapters;
|
||||
import org.apache.druid.segment.RowBasedColumnSelectorFactory;
|
||||
import org.apache.druid.segment.column.BitmapColumnIndex;
|
||||
import org.apache.druid.segment.column.ColumnIndexSupplier;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.segment.column.RowSignature;
|
||||
import org.apache.druid.segment.column.StringValueSetIndex;
|
||||
import org.apache.druid.segment.column.Utf8ValueSetIndex;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.junit.MockitoJUnit;
|
||||
import org.mockito.junit.MockitoRule;
|
||||
import org.mockito.quality.Strictness;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -52,6 +61,9 @@ public class InDimFilterTest extends InitializedNullHandlingTest
|
|||
private final String serializedFilter =
|
||||
"{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"bad\",\"good\"]}";
|
||||
|
||||
@Rule
|
||||
public MockitoRule mockitoRule = MockitoJUnit.rule().strictness(Strictness.STRICT_STUBS);
|
||||
|
||||
@Test
|
||||
public void testDeserialization() throws IOException
|
||||
{
|
||||
|
@ -71,21 +83,23 @@ public class InDimFilterTest extends InitializedNullHandlingTest
|
|||
@Test
|
||||
public void testGetValuesWithValuesSetOfNonEmptyStringsUseTheGivenSet()
|
||||
{
|
||||
final Set<String> values = ImmutableSet.of("v1", "v2", "v3");
|
||||
final InDimFilter filter = new InDimFilter("dim", values, null, null);
|
||||
final Set<String> values = new InDimFilter.ValuesSet();
|
||||
values.addAll(Arrays.asList("v1", "v2", "v3"));
|
||||
final InDimFilter filter = new InDimFilter("dim", values);
|
||||
Assert.assertSame(values, filter.getValues());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetValuesWithValuesSetIncludingEmptyString()
|
||||
{
|
||||
final Set<String> values = Sets.newHashSet("v1", "", "v3");
|
||||
final InDimFilter filter = new InDimFilter("dim", values, null, null);
|
||||
final InDimFilter.ValuesSet values = new InDimFilter.ValuesSet(ImmutableSet.of("v1", "", "v3"));
|
||||
final InDimFilter filter = new InDimFilter("dim", values);
|
||||
if (NullHandling.replaceWithDefault()) {
|
||||
Assert.assertNotSame(values, filter.getValues());
|
||||
Assert.assertSame(values, filter.getValues());
|
||||
Assert.assertEquals(Sets.newHashSet("v1", null, "v3"), filter.getValues());
|
||||
} else {
|
||||
Assert.assertSame(values, filter.getValues());
|
||||
Assert.assertEquals(Sets.newHashSet("v1", "", "v3"), filter.getValues());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -234,4 +248,53 @@ public class InDimFilterTest extends InitializedNullHandlingTest
|
|||
// Now it *shouldn't* match.
|
||||
Assert.assertFalse(matcher.matches());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsesUtf8SetIndex()
|
||||
{
|
||||
// An implementation test.
|
||||
// This test confirms that "in" filters use utf8 index lookups when available.
|
||||
|
||||
final Filter inFilter = new InDimFilter("dim0", ImmutableSet.of("v1", "v2")).toFilter();
|
||||
|
||||
final ColumnIndexSelector indexSelector = Mockito.mock(ColumnIndexSelector.class);
|
||||
final ColumnIndexSupplier indexSupplier = Mockito.mock(ColumnIndexSupplier.class);
|
||||
final Utf8ValueSetIndex valueIndex = Mockito.mock(Utf8ValueSetIndex.class);
|
||||
final BitmapColumnIndex bitmapColumnIndex = Mockito.mock(BitmapColumnIndex.class);
|
||||
|
||||
final InDimFilter.ValuesSet expectedValuesSet = new InDimFilter.ValuesSet();
|
||||
expectedValuesSet.addAll(Arrays.asList("v1", "v2"));
|
||||
|
||||
Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier);
|
||||
Mockito.when(indexSupplier.as(Utf8ValueSetIndex.class)).thenReturn(valueIndex);
|
||||
Mockito.when(valueIndex.forSortedValuesUtf8(expectedValuesSet.toUtf8())).thenReturn(bitmapColumnIndex);
|
||||
|
||||
final BitmapColumnIndex retVal = inFilter.getBitmapColumnIndex(indexSelector);
|
||||
Assert.assertSame("inFilter returns the intended bitmapColumnIndex", bitmapColumnIndex, retVal);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsesStringSetIndex()
|
||||
{
|
||||
// An implementation test.
|
||||
// This test confirms that "in" filters use non-utf8 string index lookups when utf8 indexes are not available.
|
||||
|
||||
final Filter inFilter = new InDimFilter("dim0", ImmutableSet.of("v1", "v2")).toFilter();
|
||||
|
||||
final ColumnIndexSelector indexSelector = Mockito.mock(ColumnIndexSelector.class);
|
||||
final ColumnIndexSupplier indexSupplier = Mockito.mock(ColumnIndexSupplier.class);
|
||||
final StringValueSetIndex valueIndex = Mockito.mock(StringValueSetIndex.class);
|
||||
final BitmapColumnIndex bitmapColumnIndex = Mockito.mock(BitmapColumnIndex.class);
|
||||
|
||||
final InDimFilter.ValuesSet expectedValuesSet = new InDimFilter.ValuesSet();
|
||||
expectedValuesSet.addAll(Arrays.asList("v1", "v2"));
|
||||
|
||||
Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier);
|
||||
Mockito.when(indexSupplier.as(Utf8ValueSetIndex.class)).thenReturn(null); // Will check for UTF-8 first.
|
||||
Mockito.when(indexSupplier.as(StringValueSetIndex.class)).thenReturn(valueIndex);
|
||||
Mockito.when(valueIndex.forSortedValues(expectedValuesSet)).thenReturn(bitmapColumnIndex);
|
||||
|
||||
final BitmapColumnIndex retVal = inFilter.getBitmapColumnIndex(indexSelector);
|
||||
Assert.assertSame("inFilter returns the intended bitmapColumnIndex", bitmapColumnIndex, retVal);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,15 +24,27 @@ import com.google.common.collect.Sets;
|
|||
import nl.jqno.equalsverifier.EqualsVerifier;
|
||||
import org.apache.druid.jackson.DefaultObjectMapper;
|
||||
import org.apache.druid.query.extraction.SubstringDimExtractionFn;
|
||||
import org.apache.druid.segment.column.BitmapColumnIndex;
|
||||
import org.apache.druid.segment.column.ColumnIndexSupplier;
|
||||
import org.apache.druid.segment.column.LexicographicalRangeIndex;
|
||||
import org.apache.druid.segment.column.StringValueSetIndex;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.junit.MockitoJUnit;
|
||||
import org.mockito.junit.MockitoRule;
|
||||
import org.mockito.quality.Strictness;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class LikeDimFilterTest extends InitializedNullHandlingTest
|
||||
{
|
||||
@Rule
|
||||
public MockitoRule mockitoRule = MockitoJUnit.rule().strictness(Strictness.STRICT_STUBS);
|
||||
|
||||
@Test
|
||||
public void testSerde() throws IOException
|
||||
{
|
||||
|
@ -88,4 +100,49 @@ public class LikeDimFilterTest extends InitializedNullHandlingTest
|
|||
.withNonnullFields("suffixMatch", "prefix", "pattern")
|
||||
.verify();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPrefixMatchUsesRangeIndex()
|
||||
{
|
||||
// An implementation test.
|
||||
// This test confirms that "like" filters with prefix matchers use index-range lookups without matcher predicates.
|
||||
|
||||
final Filter likeFilter = new LikeDimFilter("dim0", "f%", null, null, null).toFilter();
|
||||
|
||||
final ColumnIndexSelector indexSelector = Mockito.mock(ColumnIndexSelector.class);
|
||||
final ColumnIndexSupplier indexSupplier = Mockito.mock(ColumnIndexSupplier.class);
|
||||
final LexicographicalRangeIndex rangeIndex = Mockito.mock(LexicographicalRangeIndex.class);
|
||||
final BitmapColumnIndex bitmapColumnIndex = Mockito.mock(BitmapColumnIndex.class);
|
||||
|
||||
Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier);
|
||||
Mockito.when(indexSupplier.as(LexicographicalRangeIndex.class)).thenReturn(rangeIndex);
|
||||
Mockito.when(
|
||||
// Verify that likeFilter uses forRange without a matcher predicate; it's unnecessary and slows things down
|
||||
rangeIndex.forRange("f", false, "f" + Character.MAX_VALUE, false)
|
||||
).thenReturn(bitmapColumnIndex);
|
||||
|
||||
final BitmapColumnIndex retVal = likeFilter.getBitmapColumnIndex(indexSelector);
|
||||
Assert.assertSame("likeFilter returns the intended bitmapColumnIndex", bitmapColumnIndex, retVal);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExactMatchUsesValueIndex()
|
||||
{
|
||||
// An implementation test.
|
||||
// This test confirms that "like" filters with exact matchers use index lookups.
|
||||
|
||||
final Filter likeFilter = new LikeDimFilter("dim0", "f", null, null, null).toFilter();
|
||||
|
||||
final ColumnIndexSelector indexSelector = Mockito.mock(ColumnIndexSelector.class);
|
||||
final ColumnIndexSupplier indexSupplier = Mockito.mock(ColumnIndexSupplier.class);
|
||||
final StringValueSetIndex valueIndex = Mockito.mock(StringValueSetIndex.class);
|
||||
final BitmapColumnIndex bitmapColumnIndex = Mockito.mock(BitmapColumnIndex.class);
|
||||
|
||||
Mockito.when(indexSelector.getIndexSupplier("dim0")).thenReturn(indexSupplier);
|
||||
Mockito.when(indexSupplier.as(StringValueSetIndex.class)).thenReturn(valueIndex);
|
||||
Mockito.when(valueIndex.forValue("f")).thenReturn(bitmapColumnIndex);
|
||||
|
||||
final BitmapColumnIndex retVal = likeFilter.getBitmapColumnIndex(indexSelector);
|
||||
Assert.assertSame("likeFilter returns the intended bitmapColumnIndex", bitmapColumnIndex, retVal);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.druid.collections.bitmap.ConciseBitmapFactory;
|
|||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.query.extraction.DimExtractionFn;
|
||||
import org.apache.druid.query.extraction.ExtractionFn;
|
||||
import org.apache.druid.query.filter.ColumnIndexSelector;
|
||||
|
@ -49,6 +50,7 @@ import org.junit.runner.RunWith;
|
|||
import org.junit.runners.Parameterized;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -117,6 +119,10 @@ public class ExtractionDimFilterTest extends InitializedNullHandlingTest
|
|||
return new DictionaryEncodedStringIndexSupplier(
|
||||
factory,
|
||||
GenericIndexed.fromIterable(Collections.singletonList("foo1"), GenericIndexed.STRING_STRATEGY),
|
||||
GenericIndexed.fromIterable(
|
||||
Collections.singletonList(ByteBuffer.wrap(StringUtils.toUtf8("foo1"))),
|
||||
GenericIndexed.BYTE_BUFFER_STRATEGY
|
||||
),
|
||||
GenericIndexed.fromIterable(Collections.singletonList(foo1BitMap), serdeFactory.getObjectStrategy()),
|
||||
null
|
||||
);
|
||||
|
|
|
@ -386,7 +386,7 @@ public class InFilterTest extends BaseFilterTest
|
|||
EqualsVerifier.forClass(InDimFilter.class)
|
||||
.usingGetClass()
|
||||
.withNonnullFields("dimension", "values")
|
||||
.withIgnoredFields("cacheKeySupplier", "predicateFactory", "cachedOptimizedFilter")
|
||||
.withIgnoredFields("cacheKeySupplier", "predicateFactory", "cachedOptimizedFilter", "valuesUtf8")
|
||||
.verify();
|
||||
}
|
||||
|
||||
|
|
|
@ -242,7 +242,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#en.wikipedia"),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("US"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("US")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter("rtc.countryName", "United States"),
|
||||
|
@ -649,7 +649,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
))
|
||||
)
|
||||
),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("CA"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("CA")).toFilter()
|
||||
)
|
||||
),
|
||||
new AndFilter(
|
||||
|
@ -751,7 +751,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#en.wikipedia"),
|
||||
new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0", ImmutableSet.of("SU"), null, null).toFilter()
|
||||
new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0", ImmutableSet.of("SU")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter("rtc.countryName", "States United"),
|
||||
|
@ -898,7 +898,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#ko.wikipedia"),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("US"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("US")).toFilter()
|
||||
)
|
||||
),
|
||||
new OrFilter(
|
||||
|
@ -906,8 +906,8 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new SelectorFilter("channel", "#ko.wikipedia"),
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("US"), null, null).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("VA"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("US")).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("VA")).toFilter()
|
||||
)
|
||||
)
|
||||
)
|
||||
|
@ -1004,7 +1004,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#en.wikipedia"),
|
||||
new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0", ImmutableSet.of("USCA"), null, null).toFilter()
|
||||
new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0", ImmutableSet.of("USCA")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter("c1.countryName", "Usca"),
|
||||
|
@ -1094,7 +1094,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#en.wikipedia"),
|
||||
new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0", ImmutableSet.of("USCA"), null, null).toFilter()
|
||||
new InDimFilter("JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0", ImmutableSet.of("USCA")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter("c1.v", "Usca"),
|
||||
|
@ -1164,7 +1164,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#de.wikipedia"),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("DE"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("DE")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName", "Germany"),
|
||||
|
@ -1221,7 +1221,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#de.wikipedia"),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("DE"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("DE")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "v", "Germany"),
|
||||
|
@ -1453,7 +1453,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#en.wikipedia"),
|
||||
new InDimFilter("countryNumber", ImmutableSet.of("0"), null, null).toFilter()
|
||||
new InDimFilter("countryNumber", ImmutableSet.of("0")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter(FACT_TO_COUNTRY_ON_NUMBER_PREFIX + "countryName", "Australia"),
|
||||
|
@ -1516,7 +1516,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#en.wikipedia"),
|
||||
new InDimFilter("countryNumber", ImmutableSet.of("0"), null, null).toFilter()
|
||||
new InDimFilter("countryNumber", ImmutableSet.of("0")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter(FACT_TO_COUNTRY_ON_NUMBER_PREFIX + "v", "Australia"),
|
||||
|
@ -1683,7 +1683,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#es.wikipedia"),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("SV"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("SV")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName", "El Salvador"),
|
||||
|
@ -1740,7 +1740,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#es.wikipedia"),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("SV"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("SV")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "v", "El Salvador"),
|
||||
|
@ -1916,8 +1916,8 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
JoinFilterSplit expectedFilterSplit = new JoinFilterSplit(
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("MMMM"), null, null).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("MMMM")).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM")).toFilter()
|
||||
)
|
||||
),
|
||||
new SelectorFilter("r1.regionName", "Fourems Province"),
|
||||
|
@ -1987,7 +1987,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
JoinFilterSplit expectedFilterSplit = new JoinFilterSplit(
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM"), null, null).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM")).toFilter(),
|
||||
new SelectorFilter("regionIsoCode", "AAAA")
|
||||
)
|
||||
),
|
||||
|
@ -2288,7 +2288,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
null
|
||||
)),
|
||||
expressionFilter,
|
||||
new InDimFilter("rtc.countryIsoCode", ImmutableSet.of("CA", "CA2", "CA3"), null, null).toFilter(),
|
||||
new InDimFilter("rtc.countryIsoCode", ImmutableSet.of("CA", "CA2", "CA3")).toFilter(),
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new SelectorFilter("channel", "#fr.wikipedia"),
|
||||
|
@ -2414,7 +2414,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
ImmutableList.of(
|
||||
new SelectorFilter(rewrittenRegionIsoCodeColumnName, "ON"),
|
||||
new SelectorFilter(rewrittenCountryIsoCodeColumnName, "CA"),
|
||||
new InDimFilter(rewrittenCountryIsoCodeColumnName, ImmutableSet.of("CA"), null, null).toFilter(),
|
||||
new InDimFilter(rewrittenCountryIsoCodeColumnName, ImmutableSet.of("CA")).toFilter(),
|
||||
new BoundFilter(new BoundDimFilter(
|
||||
rewrittenCountryIsoCodeColumnName,
|
||||
"CA",
|
||||
|
@ -2427,9 +2427,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
)),
|
||||
new InDimFilter(
|
||||
rewrittenCountryIsoCodeColumnName,
|
||||
ImmutableSet.of("CA", "CA2", "CA3"),
|
||||
null,
|
||||
null
|
||||
ImmutableSet.of("CA", "CA2", "CA3")
|
||||
).toFilter(),
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
|
@ -2453,9 +2451,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
new SelectorFilter(rewrittenCountryIsoCodeColumnName, "ABCDEF"),
|
||||
new InDimFilter(
|
||||
rewrittenCountryIsoCodeColumnName,
|
||||
ImmutableSet.of("CA"),
|
||||
null,
|
||||
null
|
||||
ImmutableSet.of("CA")
|
||||
).toFilter(),
|
||||
new BoundFilter(new BoundDimFilter(
|
||||
rewrittenCountryIsoCodeColumnName,
|
||||
|
@ -2549,10 +2545,10 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes
|
|||
JoinFilterSplit expectedFilterSplit = new JoinFilterSplit(
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("US"), null, null).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("CA"), null, null).toFilter(),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("MMMM", "AAAA"), null, null).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM", "AAAA"), null, null).toFilter()
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("US")).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("CA")).toFilter(),
|
||||
new InDimFilter("countryIsoCode", ImmutableSet.of("MMMM", "AAAA")).toFilter(),
|
||||
new InDimFilter("regionIsoCode", ImmutableSet.of("MMMM", "AAAA")).toFilter()
|
||||
)
|
||||
),
|
||||
originalFilter,
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.apache.druid.sql.calcite.expression.builtin;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.calcite.rex.RexCall;
|
||||
import org.apache.calcite.rex.RexNode;
|
||||
import org.apache.calcite.sql.SqlFunction;
|
||||
|
@ -40,6 +39,7 @@ import org.apache.druid.sql.calcite.planner.PlannerContext;
|
|||
import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOperatorConversion
|
||||
|
@ -123,7 +123,7 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera
|
|||
} else {
|
||||
return new InDimFilter(
|
||||
simpleExtractionExpr.getSimpleExtraction().getColumn(),
|
||||
Sets.newHashSet(arrayElements),
|
||||
new InDimFilter.ValuesSet(Arrays.asList(arrayElements)),
|
||||
simpleExtractionExpr.getSimpleExtraction().getExtractionFn(),
|
||||
null
|
||||
);
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
package org.apache.druid.sql.calcite.filtration;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.query.filter.DimFilter;
|
||||
import org.apache.druid.query.filter.InDimFilter;
|
||||
|
@ -34,7 +33,6 @@ import java.util.ArrayList;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class ConvertSelectorsToIns extends BottomUpTransform
|
||||
{
|
||||
|
@ -80,7 +78,7 @@ public class ConvertSelectorsToIns extends BottomUpTransform
|
|||
final List<SelectorDimFilter> filterList = entry.getValue();
|
||||
if (filterList.size() > 1) {
|
||||
// We found a simplification. Remove the old filters and add new ones.
|
||||
final Set<String> values = Sets.newHashSetWithExpectedSize(filterList.size());
|
||||
final InDimFilter.ValuesSet values = new InDimFilter.ValuesSet();
|
||||
|
||||
for (final SelectorDimFilter selector : filterList) {
|
||||
values.add(selector.getValue());
|
||||
|
|
Loading…
Reference in New Issue