mirror of https://github.com/apache/druid.git
Additional short circuiting knowledge in filter bundles. (#16292)
* Additional short circuiting knowledge in filter bundles. Three updates: 1) The parameter "selectionRowCount" on "makeFilterBundle" is renamed "applyRowCount", and redefined as an upper bound on rows remaining after short-circuiting (rather than number of rows selected so far). This definition works better for OR filters, which pass through the FALSE set rather than the TRUE set to the next subfilter. 2) AndFilter uses min(applyRowCount, indexIntersectionSize) rather than using selectionRowCount for the first subfilter and indexIntersectionSize for each filter thereafter. This improves accuracy when the incoming applyRowCount is smaller than the row count from the first few indexes. 3) OrFilter uses min(applyRowCount, totalRowCount - indexUnionSize) rather than applyRowCount for subfilters. This allows an OR filter to pass information about short-circuiting to its subfilters. To help write tests for this, the patch also moves the sampled wikiticker data file from sql to processing. * Forbidden APIs. * Forbidden APIs. * Better comments. * Fix inspection. * Adjustments to tests.
This commit is contained in:
parent
4fa377c7fd
commit
ccc1ffb032
|
@ -52,7 +52,11 @@ public interface Filter
|
|||
* them
|
||||
* @param bitmapResultFactory - wrapper for {@link ImmutableBitmap} operations to tie into
|
||||
* {@link org.apache.druid.query.QueryMetrics} and build the output indexes
|
||||
* @param selectionRowCount - number of rows selected so far by any previous bundle computations
|
||||
* @param applyRowCount - upper bound on number of rows this filter would be applied to, after removing rows
|
||||
* short-circuited by prior bundle operations. For example, given "x AND y", if "x" is
|
||||
* resolved using an index, then "y" will receive the number of rows that matched
|
||||
* the filter "x". As another example, given "x OR y", if "x" is resolved using an
|
||||
* index, then "y" will receive the number of rows that did *not* match the filter "x".
|
||||
* @param totalRowCount - total number of rows to be scanned if no indexes are applied
|
||||
* @param includeUnknown - mapping for Druid native two state logic system into SQL three-state logic system. If
|
||||
* set to true, bitmaps returned by this method should include true bits for any rows
|
||||
|
@ -65,7 +69,7 @@ public interface Filter
|
|||
default <T> FilterBundle makeFilterBundle(
|
||||
ColumnIndexSelector columnIndexSelector,
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
|
@ -77,7 +81,7 @@ public interface Filter
|
|||
final long bitmapConstructionStartNs = System.nanoTime();
|
||||
final T result = columnIndex.computeBitmapResult(
|
||||
bitmapResultFactory,
|
||||
selectionRowCount,
|
||||
applyRowCount,
|
||||
totalRowCount,
|
||||
includeUnknown
|
||||
);
|
||||
|
|
|
@ -38,6 +38,7 @@ import java.util.List;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* FilterBundle is a container for all the goodies used for producing filtered cursors, a {@link ImmutableBitmap} if
|
||||
|
@ -120,7 +121,9 @@ public class FilterBundle
|
|||
public interface IndexBundle
|
||||
{
|
||||
IndexBundleInfo getIndexInfo();
|
||||
|
||||
ImmutableBitmap getBitmap();
|
||||
|
||||
ColumnIndexCapabilities getIndexCapabilities();
|
||||
}
|
||||
|
||||
|
@ -135,7 +138,9 @@ public class FilterBundle
|
|||
public interface MatcherBundle
|
||||
{
|
||||
MatcherBundleInfo getMatcherInfo();
|
||||
|
||||
ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending);
|
||||
|
||||
VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset);
|
||||
}
|
||||
|
||||
|
@ -205,7 +210,10 @@ public class FilterBundle
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset)
|
||||
public VectorValueMatcher vectorMatcher(
|
||||
VectorColumnSelectorFactory selectorFactory,
|
||||
ReadableVectorOffset baseOffset
|
||||
)
|
||||
{
|
||||
return vectorMatcherFn.apply(selectorFactory);
|
||||
}
|
||||
|
@ -240,6 +248,21 @@ public class FilterBundle
|
|||
return matcher;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a multiline description string, suitable for comparisons in tests.
|
||||
*/
|
||||
public String describe()
|
||||
{
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
if (index != null) {
|
||||
sb.append(index.describe());
|
||||
}
|
||||
if (matcher != null) {
|
||||
sb.append(matcher.describe());
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
|
@ -249,6 +272,8 @@ public class FilterBundle
|
|||
|
||||
public static class IndexBundleInfo
|
||||
{
|
||||
private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^");
|
||||
|
||||
private final Supplier<String> filter;
|
||||
private final List<IndexBundleInfo> indexes;
|
||||
private final int selectionSize;
|
||||
|
@ -292,6 +317,27 @@ public class FilterBundle
|
|||
return indexes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a multiline description string, suitable for comparisons in tests.
|
||||
*/
|
||||
public String describe()
|
||||
{
|
||||
final StringBuilder sb = new StringBuilder()
|
||||
.append("index: ")
|
||||
.append(filter.get())
|
||||
.append(" (selectionSize = ")
|
||||
.append(selectionSize)
|
||||
.append(")\n");
|
||||
|
||||
if (indexes != null) {
|
||||
for (final IndexBundleInfo info : indexes) {
|
||||
sb.append(PATTERN_LINE_START.matcher(info.describe()).replaceAll(" "));
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
|
@ -306,6 +352,8 @@ public class FilterBundle
|
|||
|
||||
public static class MatcherBundleInfo
|
||||
{
|
||||
private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^");
|
||||
|
||||
private final Supplier<String> filter;
|
||||
@Nullable
|
||||
final List<MatcherBundleInfo> matchers;
|
||||
|
@ -345,6 +393,30 @@ public class FilterBundle
|
|||
return matchers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a multiline description string, suitable for comparisons in tests.
|
||||
*/
|
||||
public String describe()
|
||||
{
|
||||
final StringBuilder sb = new StringBuilder()
|
||||
.append("matcher: ")
|
||||
.append(filter.get())
|
||||
.append("\n");
|
||||
|
||||
if (partialIndex != null) {
|
||||
sb.append(" with partial ")
|
||||
.append(PATTERN_LINE_START.matcher(partialIndex.describe()).replaceAll(" ").substring(2));
|
||||
}
|
||||
|
||||
if (matchers != null) {
|
||||
for (MatcherBundleInfo info : matchers) {
|
||||
sb.append(PATTERN_LINE_START.matcher(info.describe()).replaceAll(" "));
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
|
|
|
@ -76,7 +76,7 @@ public class AndFilter implements BooleanFilter
|
|||
public <T> FilterBundle makeFilterBundle(
|
||||
ColumnIndexSelector columnIndexSelector,
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
|
@ -85,7 +85,7 @@ public class AndFilter implements BooleanFilter
|
|||
final List<FilterBundle.MatcherBundle> matcherBundles = new ArrayList<>();
|
||||
final List<FilterBundle.MatcherBundleInfo> matcherBundleInfos = new ArrayList<>();
|
||||
|
||||
int selectionCount = selectionRowCount;
|
||||
int indexIntersectionSize = totalRowCount;
|
||||
ImmutableBitmap index = null;
|
||||
ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true, true);
|
||||
// AND filter can be partitioned into a bundle that has both indexes and value matchers. The filters which support
|
||||
|
@ -101,7 +101,7 @@ public class AndFilter implements BooleanFilter
|
|||
final FilterBundle subBundle = subfilter.makeFilterBundle(
|
||||
columnIndexSelector,
|
||||
bitmapResultFactory,
|
||||
selectionCount,
|
||||
Math.min(applyRowCount, indexIntersectionSize),
|
||||
totalRowCount,
|
||||
includeUnknown
|
||||
);
|
||||
|
@ -120,7 +120,7 @@ public class AndFilter implements BooleanFilter
|
|||
} else {
|
||||
index = index.intersection(subBundle.getIndex().getBitmap());
|
||||
}
|
||||
selectionCount = index.size();
|
||||
indexIntersectionSize = index.size();
|
||||
}
|
||||
if (subBundle.getMatcherBundle() != null) {
|
||||
matcherBundles.add(subBundle.getMatcherBundle());
|
||||
|
@ -140,7 +140,7 @@ public class AndFilter implements BooleanFilter
|
|||
indexBundle = new FilterBundle.SimpleIndexBundle(
|
||||
new FilterBundle.IndexBundleInfo(
|
||||
() -> "AND",
|
||||
selectionCount,
|
||||
indexIntersectionSize,
|
||||
System.nanoTime() - bitmapConstructionStartNs,
|
||||
indexBundleInfos
|
||||
),
|
||||
|
@ -247,7 +247,7 @@ public class AndFilter implements BooleanFilter
|
|||
@Override
|
||||
public <T> T computeBitmapResult(
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
|
@ -256,7 +256,7 @@ public class AndFilter implements BooleanFilter
|
|||
for (final BitmapColumnIndex index : bitmapColumnIndices) {
|
||||
final T bitmapResult = index.computeBitmapResult(
|
||||
bitmapResultFactory,
|
||||
selectionRowCount,
|
||||
applyRowCount,
|
||||
totalRowCount,
|
||||
includeUnknown
|
||||
);
|
||||
|
|
|
@ -94,7 +94,7 @@ public class IsBooleanFilter implements Filter
|
|||
@Override
|
||||
public <T> T computeBitmapResult(
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
|
@ -102,7 +102,7 @@ public class IsBooleanFilter implements Filter
|
|||
if (isTrue) {
|
||||
return baseIndex.computeBitmapResult(
|
||||
bitmapResultFactory,
|
||||
selectionRowCount,
|
||||
applyRowCount,
|
||||
totalRowCount,
|
||||
false
|
||||
);
|
||||
|
@ -110,7 +110,7 @@ public class IsBooleanFilter implements Filter
|
|||
|
||||
final T result = baseIndex.computeBitmapResult(
|
||||
bitmapResultFactory,
|
||||
selectionRowCount,
|
||||
applyRowCount,
|
||||
totalRowCount,
|
||||
useThreeValueLogic
|
||||
);
|
||||
|
|
|
@ -93,14 +93,14 @@ public class NotFilter implements Filter
|
|||
@Override
|
||||
public <T> T computeBitmapResult(
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
{
|
||||
final T result = baseIndex.computeBitmapResult(
|
||||
bitmapResultFactory,
|
||||
selectionRowCount,
|
||||
applyRowCount,
|
||||
totalRowCount,
|
||||
!includeUnknown && useThreeValueLogic
|
||||
);
|
||||
|
|
|
@ -81,7 +81,7 @@ public class OrFilter implements BooleanFilter
|
|||
public <T> FilterBundle makeFilterBundle(
|
||||
ColumnIndexSelector columnIndexSelector,
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
|
@ -99,6 +99,8 @@ public class OrFilter implements BooleanFilter
|
|||
final List<FilterBundle.IndexBundleInfo> indexOnlyBundlesInfo = new ArrayList<>();
|
||||
final List<FilterBundle.MatcherBundle> partialIndexBundles = new ArrayList<>();
|
||||
final List<FilterBundle.MatcherBundle> matcherOnlyBundles = new ArrayList<>();
|
||||
|
||||
int indexUnionSize = 0;
|
||||
ImmutableBitmap index = null;
|
||||
ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true, true);
|
||||
int emptyCount = 0;
|
||||
|
@ -109,7 +111,7 @@ public class OrFilter implements BooleanFilter
|
|||
final FilterBundle bundle = subfilter.makeFilterBundle(
|
||||
columnIndexSelector,
|
||||
bitmapResultFactory,
|
||||
selectionRowCount,
|
||||
Math.min(applyRowCount, totalRowCount - indexUnionSize),
|
||||
totalRowCount,
|
||||
includeUnknown
|
||||
);
|
||||
|
@ -138,6 +140,7 @@ public class OrFilter implements BooleanFilter
|
|||
} else {
|
||||
index = index.union(bundle.getIndex().getBitmap());
|
||||
}
|
||||
indexUnionSize = index.size();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -165,7 +168,7 @@ public class OrFilter implements BooleanFilter
|
|||
new FilterBundle.SimpleIndexBundle(
|
||||
new FilterBundle.IndexBundleInfo(
|
||||
() -> "OR",
|
||||
selectionRowCount,
|
||||
applyRowCount,
|
||||
totalBitmapConstructTimeNs,
|
||||
indexOnlyBundlesInfo
|
||||
),
|
||||
|
@ -185,7 +188,7 @@ public class OrFilter implements BooleanFilter
|
|||
if (!indexOnlyBundles.isEmpty()) {
|
||||
// translate the indexOnly bundles into a single matcher
|
||||
final FilterBundle.MatcherBundle matcherBundle = convertIndexToMatcherBundle(
|
||||
selectionRowCount,
|
||||
applyRowCount,
|
||||
indexOnlyBundles,
|
||||
indexOnlyBundlesInfo,
|
||||
totalBitmapConstructTimeNs,
|
||||
|
@ -284,14 +287,14 @@ public class OrFilter implements BooleanFilter
|
|||
@Override
|
||||
public <T> T computeBitmapResult(
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
{
|
||||
List<T> results = Lists.newArrayListWithCapacity(bitmapColumnIndices.size());
|
||||
for (BitmapColumnIndex index : bitmapColumnIndices) {
|
||||
final T r = index.computeBitmapResult(bitmapResultFactory, selectionRowCount, totalRowCount, includeUnknown);
|
||||
final T r = index.computeBitmapResult(bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown);
|
||||
if (r == null) {
|
||||
// all or nothing
|
||||
return null;
|
||||
|
|
|
@ -59,7 +59,11 @@ public interface BitmapColumnIndex
|
|||
*
|
||||
* @param bitmapResultFactory helper to format the {@link org.apache.druid.collections.bitmap.ImmutableBitmap} in a
|
||||
* form ready for consumption by callers
|
||||
* @param selectionRowCount number of rows selected so far by any previous index computations
|
||||
* @param applyRowCount upper bound on number of rows this filter would be applied to, after removing rows
|
||||
* short-circuited by prior bundle operations. For example, given "x AND y", if "x" is
|
||||
* resolved using an index, then "y" will receive the number of rows that matched
|
||||
* the filter "x". As another example, given "x OR y", if "x" is resolved using an
|
||||
* index, then "y" will receive the number of rows that did *not* match the filter "x".
|
||||
* @param totalRowCount total number of rows to be scanned if no indexes are used
|
||||
* @param includeUnknown mapping for Druid native two state logic system into SQL three-state logic system. If
|
||||
* set to true, bitmaps returned by this method should include true bits for any rows where
|
||||
|
@ -71,7 +75,7 @@ public interface BitmapColumnIndex
|
|||
@Nullable
|
||||
default <T> T computeBitmapResult(
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
|
|
|
@ -50,12 +50,12 @@ public abstract class DictionaryRangeScanningBitmapIndex extends SimpleImmutable
|
|||
@Override
|
||||
public final <T> T computeBitmapResult(
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
{
|
||||
final int scale = (int) Math.ceil(sizeScale * selectionRowCount);
|
||||
final int scale = (int) Math.ceil(sizeScale * applyRowCount);
|
||||
if (rangeSize > scale) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -53,12 +53,12 @@ public abstract class DictionaryScanningBitmapIndex extends SimpleImmutableBitma
|
|||
@Override
|
||||
public final <T> T computeBitmapResult(
|
||||
BitmapResultFactory<T> bitmapResultFactory,
|
||||
int selectionRowCount,
|
||||
int applyRowCount,
|
||||
int totalRowCount,
|
||||
boolean includeUnknown
|
||||
)
|
||||
{
|
||||
if (selectionRowCount != totalRowCount && selectionRowCount < (dictionarySize * scaleThreshold)) {
|
||||
if (applyRowCount != totalRowCount && applyRowCount < (dictionarySize * scaleThreshold)) {
|
||||
return null;
|
||||
}
|
||||
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(includeUnknown));
|
||||
|
|
|
@ -24,6 +24,7 @@ import com.google.common.base.Suppliers;
|
|||
import com.google.common.io.CharSource;
|
||||
import com.google.common.io.LineProcessor;
|
||||
import com.google.common.io.Resources;
|
||||
import org.apache.druid.data.input.ResourceInputSource;
|
||||
import org.apache.druid.data.input.impl.DelimitedParseSpec;
|
||||
import org.apache.druid.data.input.impl.DimensionSchema;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
|
@ -37,6 +38,7 @@ import org.apache.druid.java.util.common.DateTimes;
|
|||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.Intervals;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.query.NestedDataTestUtils;
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.DoubleMinAggregatorFactory;
|
||||
|
@ -68,6 +70,7 @@ import java.util.List;
|
|||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class TestIndex
|
||||
{
|
||||
|
@ -222,6 +225,9 @@ public class TestIndex
|
|||
.build()
|
||||
)
|
||||
);
|
||||
private static Supplier<QueryableIndex> wikipediaMMappedIndex = Suppliers.memoize(
|
||||
() -> persistRealtimeAndLoadMMapped(makeWikipediaIncrementalIndex())
|
||||
);
|
||||
|
||||
public static IncrementalIndex getIncrementalTestIndex()
|
||||
{
|
||||
|
@ -243,6 +249,11 @@ public class TestIndex
|
|||
return mmappedIndex.get();
|
||||
}
|
||||
|
||||
public static QueryableIndex getMMappedWikipediaIndex()
|
||||
{
|
||||
return wikipediaMMappedIndex.get();
|
||||
}
|
||||
|
||||
public static QueryableIndex getNoRollupMMappedTestIndex()
|
||||
{
|
||||
return noRollupMmappedIndex.get();
|
||||
|
@ -322,6 +333,62 @@ public class TestIndex
|
|||
}
|
||||
}
|
||||
|
||||
public static IncrementalIndex makeWikipediaIncrementalIndex()
|
||||
{
|
||||
final List<DimensionSchema> dimensions = Arrays.asList(
|
||||
new StringDimensionSchema("channel"),
|
||||
new StringDimensionSchema("cityName"),
|
||||
new StringDimensionSchema("comment"),
|
||||
new StringDimensionSchema("countryIsoCode"),
|
||||
new StringDimensionSchema("countryName"),
|
||||
new StringDimensionSchema("isAnonymous"),
|
||||
new StringDimensionSchema("isMinor"),
|
||||
new StringDimensionSchema("isNew"),
|
||||
new StringDimensionSchema("isRobot"),
|
||||
new StringDimensionSchema("isUnpatrolled"),
|
||||
new StringDimensionSchema("metroCode"),
|
||||
new StringDimensionSchema("namespace"),
|
||||
new StringDimensionSchema("page"),
|
||||
new StringDimensionSchema("regionIsoCode"),
|
||||
new StringDimensionSchema("regionName"),
|
||||
new StringDimensionSchema("user"),
|
||||
new LongDimensionSchema("delta"),
|
||||
new LongDimensionSchema("added"),
|
||||
new LongDimensionSchema("deleted")
|
||||
);
|
||||
|
||||
final File tmpDir;
|
||||
try {
|
||||
tmpDir = FileUtils.createTempDir("test-index-input-source");
|
||||
try {
|
||||
return IndexBuilder
|
||||
.create()
|
||||
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
|
||||
.schema(new IncrementalIndexSchema.Builder()
|
||||
.withRollup(false)
|
||||
.withTimestampSpec(new TimestampSpec("time", null, null))
|
||||
.withDimensionsSpec(new DimensionsSpec(dimensions))
|
||||
.build()
|
||||
)
|
||||
.inputSource(
|
||||
ResourceInputSource.of(
|
||||
TestIndex.class.getClassLoader(),
|
||||
"wikipedia/wikiticker-2015-09-12-sampled.json.gz"
|
||||
)
|
||||
)
|
||||
.inputFormat(NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT)
|
||||
.inputTmpDir(new File(tmpDir, "tmpWikipedia1"))
|
||||
.buildIncrementalIndex();
|
||||
}
|
||||
finally {
|
||||
FileUtils.deleteDirectory(tmpDir);
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static IncrementalIndex loadIncrementalIndex(
|
||||
final IncrementalIndex retVal,
|
||||
final CharSource source
|
||||
|
|
|
@ -0,0 +1,328 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.filter;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.druid.collections.bitmap.BitmapFactory;
|
||||
import org.apache.druid.java.util.common.io.Closer;
|
||||
import org.apache.druid.query.DefaultBitmapResultFactory;
|
||||
import org.apache.druid.query.filter.ColumnIndexSelector;
|
||||
import org.apache.druid.query.filter.EqualityFilter;
|
||||
import org.apache.druid.query.filter.Filter;
|
||||
import org.apache.druid.query.filter.FilterBundle;
|
||||
import org.apache.druid.query.filter.LikeDimFilter;
|
||||
import org.apache.druid.query.filter.NullFilter;
|
||||
import org.apache.druid.query.filter.TypedInFilter;
|
||||
import org.apache.druid.segment.ColumnCache;
|
||||
import org.apache.druid.segment.ColumnSelectorColumnIndexSelector;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.TestIndex;
|
||||
import org.apache.druid.segment.VirtualColumns;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
|
||||
public class FilterBundleTest extends InitializedNullHandlingTest
|
||||
{
|
||||
private Closer closer;
|
||||
protected BitmapFactory bitmapFactory;
|
||||
protected ColumnIndexSelector indexSelector;
|
||||
|
||||
@Rule
|
||||
public TemporaryFolder tmpDir = new TemporaryFolder();
|
||||
|
||||
@Before
|
||||
public void setUp()
|
||||
{
|
||||
final QueryableIndex index = TestIndex.getMMappedWikipediaIndex();
|
||||
closer = Closer.create();
|
||||
bitmapFactory = index.getBitmapFactoryForDimensions();
|
||||
indexSelector = new ColumnSelectorColumnIndexSelector(
|
||||
bitmapFactory,
|
||||
VirtualColumns.EMPTY,
|
||||
new ColumnCache(index, closer)
|
||||
);
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception
|
||||
{
|
||||
closer.close();
|
||||
indexSelector = null;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_or_country_isRobot()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new EqualityFilter("countryName", ColumnType.STRING, "United States", null),
|
||||
new EqualityFilter("isRobot", ColumnType.STRING, "true", null)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"index: OR (selectionSize = 39244)\n"
|
||||
+ " index: countryName = United States (selectionSize = 528)\n"
|
||||
+ " index: isRobot = true (selectionSize = 15420)\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_and_country_isRobot()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new EqualityFilter("countryName", ColumnType.STRING, "United States", null),
|
||||
new EqualityFilter("isRobot", ColumnType.STRING, "true", null)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"index: AND (selectionSize = 0)\n"
|
||||
+ " index: countryName = United States (selectionSize = 528)\n"
|
||||
+ " index: isRobot = true (selectionSize = 15420)\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_or_countryIsNull_pageLike()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new NullFilter("countryName", null),
|
||||
new LikeDimFilter("page", "%u%", null, null).toFilter()
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"matcher: OR\n"
|
||||
+ " matcher: countryName IS NULL\n"
|
||||
+ " with partial index: countryName IS NULL (selectionSize = 35445)\n"
|
||||
+ " matcher: page LIKE '%u%'\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_and_countryIsNull_pageLike()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new NullFilter("countryName", null),
|
||||
new LikeDimFilter("page", "%u%", null, null).toFilter()
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"index: AND (selectionSize = 14165)\n"
|
||||
+ " index: countryName IS NULL (selectionSize = 35445)\n"
|
||||
+ " index: page LIKE '%u%' (selectionSize = 15328)\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_and_country_pageLike()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new EqualityFilter("countryName", ColumnType.STRING, "United States", null),
|
||||
new LikeDimFilter("page", "%u%", null, null).toFilter()
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"index: countryName = United States (selectionSize = 528)\n"
|
||||
+ "matcher: page LIKE '%u%'\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_or_countryNotNull_pageLike()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new NotFilter(new NullFilter("countryName", null)),
|
||||
new LikeDimFilter("page", "%u%", null, null).toFilter()
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"index: OR (selectionSize = 39244)\n"
|
||||
+ " index: ~(countryName IS NULL) (selectionSize = 3799)\n"
|
||||
+ " index: page LIKE '%u%' (selectionSize = 15328)\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_and_countryNotNull_pageLike()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new NotFilter(new NullFilter("countryName", null)),
|
||||
new LikeDimFilter("page", "%u%", null, null).toFilter()
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"index: ~(countryName IS NULL) (selectionSize = 3799)\n"
|
||||
+ "matcher: page LIKE '%u%'\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_or_countryIsAndPageLike()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new EqualityFilter("countryName", ColumnType.STRING, "United States", null),
|
||||
new LikeDimFilter("page", "%a%", null, null).toFilter()
|
||||
)
|
||||
),
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new EqualityFilter("countryName", ColumnType.STRING, "United Kingdom", null),
|
||||
new LikeDimFilter("page", "%b%", null, null).toFilter()
|
||||
)
|
||||
),
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new NullFilter("countryName", null),
|
||||
new LikeDimFilter("page", "%c%", null, null).toFilter()
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"matcher: OR\n"
|
||||
+ " matcher: AND\n"
|
||||
+ " with partial index: AND (selectionSize = 11851)\n"
|
||||
+ " index: countryName IS NULL (selectionSize = 35445)\n"
|
||||
+ " index: page LIKE '%c%' (selectionSize = 12864)\n"
|
||||
+ " matcher: AND\n"
|
||||
+ " with partial index: countryName = United States (selectionSize = 528)\n"
|
||||
+ " matcher: page LIKE '%a%'\n"
|
||||
+ " matcher: AND\n"
|
||||
+ " with partial index: countryName = United Kingdom (selectionSize = 234)\n"
|
||||
+ " matcher: page LIKE '%b%'\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_or_countryIsNull_and_country_pageLike()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new NullFilter("countryName", null),
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
new EqualityFilter("countryName", ColumnType.STRING, "United States", null),
|
||||
new LikeDimFilter("page", "%a%", null, null).toFilter()
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"matcher: OR\n"
|
||||
+ " matcher: countryName IS NULL\n"
|
||||
+ " with partial index: countryName IS NULL (selectionSize = 35445)\n"
|
||||
+ " matcher: AND\n"
|
||||
+ " with partial index: countryName = United States (selectionSize = 528)\n"
|
||||
+ " matcher: page LIKE '%a%'\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike()
|
||||
{
|
||||
final FilterBundle filterBundle = makeFilterBundle(
|
||||
new OrFilter(
|
||||
ImmutableList.of(
|
||||
new NullFilter("countryName", null),
|
||||
new AndFilter(
|
||||
ImmutableList.of(
|
||||
// isRobot IN (false, true) matches all rows; so this test is equivalent logically to
|
||||
// test_or_countryIsNull_pageLike. It's effectively testing that the AndFilter carries through
|
||||
// the short-circuiting done by the OrFilter when it applies the NullFilter.
|
||||
new TypedInFilter("isRobot", ColumnType.STRING, ImmutableList.of("false", "true"), null, null),
|
||||
new LikeDimFilter("page", "%u%", null, null).toFilter()
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
"matcher: OR\n"
|
||||
+ " matcher: countryName IS NULL\n"
|
||||
+ " with partial index: countryName IS NULL (selectionSize = 35445)\n"
|
||||
+ " matcher: AND\n"
|
||||
+ " with partial index: isRobot IN (false, true) (STRING) (selectionSize = 39244)\n"
|
||||
+ " matcher: page LIKE '%u%'\n",
|
||||
filterBundle.getInfo().describe()
|
||||
);
|
||||
}
|
||||
|
||||
protected FilterBundle makeFilterBundle(final Filter filter)
|
||||
{
|
||||
return filter.makeFilterBundle(
|
||||
indexSelector,
|
||||
new DefaultBitmapResultFactory(bitmapFactory),
|
||||
indexSelector.getNumRows(),
|
||||
indexSelector.getNumRows(),
|
||||
false
|
||||
);
|
||||
}
|
||||
}
|
|
@ -38,6 +38,7 @@ import org.apache.druid.data.input.impl.StringDimensionSchema;
|
|||
import org.apache.druid.data.input.impl.TimestampSpec;
|
||||
import org.apache.druid.java.util.common.DateTimes;
|
||||
import org.apache.druid.java.util.common.Intervals;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.parsers.JSONPathSpec;
|
||||
import org.apache.druid.query.DataSource;
|
||||
import org.apache.druid.query.GlobalTableDataSource;
|
||||
|
@ -56,10 +57,13 @@ import org.apache.druid.query.aggregation.last.FloatLastAggregatorFactory;
|
|||
import org.apache.druid.query.aggregation.last.LongLastAggregatorFactory;
|
||||
import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider;
|
||||
import org.apache.druid.segment.IndexBuilder;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.SegmentWrangler;
|
||||
import org.apache.druid.segment.TestIndex;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.segment.column.RowSignature;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.join.JoinConditionAnalysis;
|
||||
import org.apache.druid.segment.join.Joinable;
|
||||
|
@ -78,11 +82,13 @@ import org.joda.time.DateTime;
|
|||
import org.joda.time.chrono.ISOChronology;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
|
@ -602,47 +608,15 @@ public class TestDataBuilder
|
|||
|
||||
public static QueryableIndex makeWikipediaIndex(File tmpDir)
|
||||
{
|
||||
final List<DimensionSchema> dimensions = Arrays.asList(
|
||||
new StringDimensionSchema("channel"),
|
||||
new StringDimensionSchema("cityName"),
|
||||
new StringDimensionSchema("comment"),
|
||||
new StringDimensionSchema("countryIsoCode"),
|
||||
new StringDimensionSchema("countryName"),
|
||||
new StringDimensionSchema("isAnonymous"),
|
||||
new StringDimensionSchema("isMinor"),
|
||||
new StringDimensionSchema("isNew"),
|
||||
new StringDimensionSchema("isRobot"),
|
||||
new StringDimensionSchema("isUnpatrolled"),
|
||||
new StringDimensionSchema("metroCode"),
|
||||
new StringDimensionSchema("namespace"),
|
||||
new StringDimensionSchema("page"),
|
||||
new StringDimensionSchema("regionIsoCode"),
|
||||
new StringDimensionSchema("regionName"),
|
||||
new StringDimensionSchema("user"),
|
||||
new LongDimensionSchema("delta"),
|
||||
new LongDimensionSchema("added"),
|
||||
new LongDimensionSchema("deleted")
|
||||
);
|
||||
|
||||
return IndexBuilder
|
||||
.create()
|
||||
.tmpDir(new File(tmpDir, "wikipedia1"))
|
||||
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
|
||||
.schema(new IncrementalIndexSchema.Builder()
|
||||
.withRollup(false)
|
||||
.withTimestampSpec(new TimestampSpec("time", null, null))
|
||||
.withDimensionsSpec(new DimensionsSpec(dimensions))
|
||||
.build()
|
||||
)
|
||||
.inputSource(
|
||||
ResourceInputSource.of(
|
||||
TestDataBuilder.class.getClassLoader(),
|
||||
"calcite/tests/wikiticker-2015-09-12-sampled.json.gz"
|
||||
)
|
||||
)
|
||||
.inputFormat(DEFAULT_JSON_INPUT_FORMAT)
|
||||
.inputTmpDir(new File(tmpDir, "tmpWikipedia1"))
|
||||
.buildMMappedIndex();
|
||||
try {
|
||||
final File directory = new File(tmpDir, StringUtils.format("wikipedia-index-%s", UUID.randomUUID()));
|
||||
final IncrementalIndex index = TestIndex.makeWikipediaIncrementalIndex();
|
||||
TestIndex.INDEX_MERGER.persist(index, directory, IndexSpec.DEFAULT, null);
|
||||
return TestIndex.INDEX_IO.loadIndex(directory);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static QueryableIndex makeWikipediaIndexWithAggregation(File tmpDir)
|
||||
|
@ -687,8 +661,8 @@ public class TestDataBuilder
|
|||
)
|
||||
.inputSource(
|
||||
ResourceInputSource.of(
|
||||
TestDataBuilder.class.getClassLoader(),
|
||||
"calcite/tests/wikiticker-2015-09-12-sampled.json.gz"
|
||||
TestIndex.class.getClassLoader(),
|
||||
"wikipedia/wikiticker-2015-09-12-sampled.json.gz"
|
||||
)
|
||||
)
|
||||
.inputFormat(DEFAULT_JSON_INPUT_FORMAT)
|
||||
|
|
Loading…
Reference in New Issue